gcc/gcse.c

   1 /* Global common subexpression elimination/Partial redundancy elimination
   2    and global constant/copy propagation for GNU compiler.
   3    Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* TODO
  23    - reordering of memory allocation and freeing to be more space efficient
  24    - do rough calc of how many regs are needed in each block, and a rough
  25      calc of how many regs are available in each class and use that to
  26      throttle back the code in cases where RTX_COST is minimal.
  27    - dead store elimination
  28    - a store to the same address as a load does not kill the load if the
  29      source of the store is also the destination of the load.  Handling this
  30      allows more load motion, particularly out of loops.
  31    - ability to realloc sbitmap vectors would allow one initial computation
  32      of reg_set_in_block with only subsequent additions, rather than
  33      recomputing it for each pass
  34
  35 */
  36
  37 /* References searched while implementing this.
  38
  39    Compilers Principles, Techniques and Tools
  40    Aho, Sethi, Ullman
  41    Addison-Wesley, 1988
  42
  43    Global Optimization by Suppression of Partial Redundancies
  44    E. Morel, C. Renvoise
  45    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  46
  47    A Portable Machine-Independent Global Optimizer - Design and Measurements
  48    Frederick Chow
  49    Stanford Ph.D. thesis, Dec. 1983
  50
  51    A Fast Algorithm for Code Movement Optimization
  52    D.M. Dhamdhere
  53    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  54
  55    A Solution to a Problem with Morel and Renvoise's
  56    Global Optimization by Suppression of Partial Redundancies
  57    K-H Drechsler, M.P. Stadel
  58    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  59
  60    Practical Adaptation of the Global Optimization
  61    Algorithm of Morel and Renvoise
  62    D.M. Dhamdhere
  63    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  64
  65    Efficiently Computing Static Single Assignment Form and the Control
  66    Dependence Graph
  67    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  68    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  69
  70    Lazy Code Motion
  71    J. Knoop, O. Ruthing, B. Steffen
  72    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  73
  74    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  75    Time for Reducible Flow Control
  76    Thomas Ball
  77    ACM Letters on Programming Languages and Systems,
  78    Vol. 2, Num. 1-4, Mar-Dec 1993
  79
  80    An Efficient Representation for Sparse Sets
  81    Preston Briggs, Linda Torczon
  82    ACM Letters on Programming Languages and Systems,
  83    Vol. 2, Num. 1-4, Mar-Dec 1993
  84
  85    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  86    K-H Drechsler, M.P. Stadel
  87    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  88
  89    Partial Dead Code Elimination
  90    J. Knoop, O. Ruthing, B. Steffen
  91    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  92
  93    Effective Partial Redundancy Elimination
  94    P. Briggs, K.D. Cooper
  95    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  96
  97    The Program Structure Tree: Computing Control Regions in Linear Time
  98    R. Johnson, D. Pearson, K. Pingali
  99    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
 100
 101    Optimal Code Motion: Theory and Practice
 102    J. Knoop, O. Ruthing, B. Steffen
 103    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
 104
 105    The power of assignment motion
 106    J. Knoop, O. Ruthing, B. Steffen
 107    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 108
 109    Global code motion / global value numbering
 110    C. Click
 111    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 112
 113    Value Driven Redundancy Elimination
 114    L.T. Simpson
 115    Rice University Ph.D. thesis, Apr. 1996
 116
 117    Value Numbering
 118    L.T. Simpson
 119    Massively Scalar Compiler Project, Rice University, Sep. 1996
 120
 121    High Performance Compilers for Parallel Computing
 122    Michael Wolfe
 123    Addison-Wesley, 1996
 124
 125    Advanced Compiler Design and Implementation
 126    Steven Muchnick
 127    Morgan Kaufmann, 1997
 128
 129    People wishing to speed up the code here should read:
 130      Elimination Algorithms for Data Flow Analysis
 131      B.G. Ryder, M.C. Paull
 132      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 133
 134      How to Analyze Large Programs Efficiently and Informatively
 135      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 136      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 137
 138    People wishing to do something different can find various possibilities
 139    in the above papers and elsewhere.
 140 */
 141
 142 #include "config.h"
 143 #include "system.h"
 144 #include "toplev.h"
 145
 146 #include "rtl.h"
 147 #include "tm_p.h"
 148 #include "regs.h"
 149 #include "hard-reg-set.h"
 150 #include "flags.h"
 151 #include "real.h"
 152 #include "insn-config.h"
 153 #include "recog.h"
 154 #include "basic-block.h"
 155 #include "output.h"
 156 #include "function.h"
 157 #include "expr.h"
 158
 159 #include "obstack.h"
 160 #define obstack_chunk_alloc gmalloc
 161 #define obstack_chunk_free free
 162
 163 /* Maximum number of passes to perform.  */
 164 #define MAX_PASSES 1
 165
 166 /* Propagate flow information through back edges and thus enable PRE's
 167    moving loop invariant calculations out of loops.
 168
 169    Originally this tended to create worse overall code, but several
 170    improvements during the development of PRE seem to have made following
 171    back edges generally a win.
 172
 173    Note much of the loop invariant code motion done here would normally
 174    be done by loop.c, which has more heuristics for when to move invariants
 175    out of loops.  At some point we might need to move some of those
 176    heuristics into gcse.c.  */
 177 #define FOLLOW_BACK_EDGES 1
 178
 179 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 180    are a superset of those done by GCSE.
 181
 182    We perform the following steps:
 183
 184    1) Compute basic block information.
 185
 186    2) Compute table of places where registers are set.
 187
 188    3) Perform copy/constant propagation.
 189
 190    4) Perform global cse.
 191
 192    5) Perform another pass of copy/constant propagation.
 193
 194    Two passes of copy/constant propagation are done because the first one
 195    enables more GCSE and the second one helps to clean up the copies that
 196    GCSE creates.  This is needed more for PRE than for Classic because Classic
 197    GCSE will try to use an existing register containing the common
 198    subexpression rather than create a new one.  This is harder to do for PRE
 199    because of the code motion (which Classic GCSE doesn't do).
 200
 201    Expressions we are interested in GCSE-ing are of the form
 202    (set (pseudo-reg) (expression)).
 203    Function want_to_gcse_p says what these are.
 204
 205    PRE handles moving invariant expressions out of loops (by treating them as
 206    partially redundant).
 207
 208    Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
 209    assignment) based GVN (global value numbering).  L. T. Simpson's paper
 210    (Rice University) on value numbering is a useful reference for this.
 211
 212    **********************
 213
 214    We used to support multiple passes but there are diminishing returns in
 215    doing so.  The first pass usually makes 90% of the changes that are doable.
 216    A second pass can make a few more changes made possible by the first pass.
 217    Experiments show any further passes don't make enough changes to justify
 218    the expense.
 219
 220    A study of spec92 using an unlimited number of passes:
 221    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 222    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 223    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 224
 225    It was found doing copy propagation between each pass enables further
 226    substitutions.
 227
 228    PRE is quite expensive in complicated functions because the DFA can take
 229    awhile to converge.  Hence we only perform one pass.  Macro MAX_PASSES can
 230    be modified if one wants to experiment.
 231
 232    **********************
 233
 234    The steps for PRE are:
 235
 236    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 237
 238    2) Perform the data flow analysis for PRE.
 239
 240    3) Delete the redundant instructions
 241
 242    4) Insert the required copies [if any] that make the partially
 243       redundant instructions fully redundant.
 244
 245    5) For other reaching expressions, insert an instruction to copy the value
 246       to a newly created pseudo that will reach the redundant instruction.
 247
 248    The deletion is done first so that when we do insertions we
 249    know which pseudo reg to use.
 250
 251    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 252    argue it is not.  The number of iterations for the algorithm to converge
 253    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 254
 255    PRE GCSE depends heavily on the second CSE pass to clean up the copies
 256    we create.  To make an expression reach the place where it's redundant,
 257    the result of the expression is copied to a new register, and the redundant
 258    expression is deleted by replacing it with this new register.  Classic GCSE
 259    doesn't have this problem as much as it computes the reaching defs of
 260    each register in each block and thus can try to use an existing register.
 261
 262    **********************
 263
 264    A fair bit of simplicity is created by creating small functions for simple
 265    tasks, even when the function is only called in one place.  This may
 266    measurably slow things down [or may not] by creating more function call
 267    overhead than is necessary.  The source is laid out so that it's trivial
 268    to make the affected functions inline so that one can measure what speed
 269    up, if any, can be achieved, and maybe later when things settle things can
 270    be rearranged.
 271
 272    Help stamp out big monolithic functions!  */
 273 \f
 274 /* GCSE global vars.  */
 275
 276 /* -dG dump file.  */
 277 static FILE *gcse_file;
 278
 279 /* Note whether or not we should run jump optimization after gcse.  We
 280    want to do this for two cases.
 281
 282     * If we changed any jumps via cprop.
 283
 284     * If we added any labels via edge splitting.  */
 285
 286 static int run_jump_opt_after_gcse;
 287
 288 /* Element I is a list of I's predecessors/successors.  */
 289 static int_list_ptr *s_preds;
 290 static int_list_ptr *s_succs;
 291
 292 /* Element I is the number of predecessors/successors of basic block I.  */
 293 static int *num_preds;
 294 static int *num_succs;
 295
 296 /* Bitmaps are normally not included in debugging dumps.
 297    However it's useful to be able to print them from GDB.
 298    We could create special functions for this, but it's simpler to
 299    just allow passing stderr to the dump_foo fns.  Since stderr can
 300    be a macro, we store a copy here.  */
 301 static FILE *debug_stderr;
 302
 303 /* An obstack for our working variables.  */
 304 static struct obstack gcse_obstack;
 305
 306 /* Non-zero for each mode that supports (set (reg) (reg)).
 307    This is trivially true for integer and floating point values.
 308    It may or may not be true for condition codes.  */
 309 static char can_copy_p[(int) NUM_MACHINE_MODES];
 310
 311 /* Non-zero if can_copy_p has been initialized.  */
 312 static int can_copy_init_p;
 313
 314 struct reg_use {
 315   rtx reg_rtx;
 316 };
 317
 318 /* Hash table of expressions.  */
 319
 320 struct expr
 321 {
 322   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 323   rtx expr;
 324   /* Index in the available expression bitmaps.  */
 325   int bitmap_index;
 326   /* Next entry with the same hash.  */
 327   struct expr *next_same_hash;
 328   /* List of anticipatable occurrences in basic blocks in the function.
 329      An "anticipatable occurrence" is one that is the first occurrence in the
 330      basic block, the operands are not modified in the basic block prior
 331      to the occurrence and the output is not used between the start of
 332      the block and the occurrence.  */
 333   struct occr *antic_occr;
 334   /* List of available occurrence in basic blocks in the function.
 335      An "available occurrence" is one that is the last occurrence in the
 336      basic block and the operands are not modified by following statements in
 337      the basic block [including this insn].  */
 338   struct occr *avail_occr;
 339   /* Non-null if the computation is PRE redundant.
 340      The value is the newly created pseudo-reg to record a copy of the
 341      expression in all the places that reach the redundant copy.  */
 342   rtx reaching_reg;
 343 };
 344
 345 /* Occurrence of an expression.
 346    There is one per basic block.  If a pattern appears more than once the
 347    last appearance is used [or first for anticipatable expressions].  */
 348
 349 struct occr
 350 {
 351   /* Next occurrence of this expression.  */
 352   struct occr *next;
 353   /* The insn that computes the expression.  */
 354   rtx insn;
 355   /* Non-zero if this [anticipatable] occurrence has been deleted.  */
 356   char deleted_p;
 357   /* Non-zero if this [available] occurrence has been copied to
 358      reaching_reg.  */
 359   /* ??? This is mutually exclusive with deleted_p, so they could share
 360      the same byte.  */
 361   char copied_p;
 362 };
 363
 364 /* Expression and copy propagation hash tables.
 365    Each hash table is an array of buckets.
 366    ??? It is known that if it were an array of entries, structure elements
 367    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 368    not clear whether in the final analysis a sufficient amount of memory would
 369    be saved as the size of the available expression bitmaps would be larger
 370    [one could build a mapping table without holes afterwards though].
 371    Someday I'll perform the computation and figure it out.
 372 */
 373
 374 /* Total size of the expression hash table, in elements.  */
 375 static int expr_hash_table_size;
 376 /* The table itself.
 377    This is an array of `expr_hash_table_size' elements.  */
 378 static struct expr **expr_hash_table;
 379
 380 /* Total size of the copy propagation hash table, in elements.  */
 381 static int set_hash_table_size;
 382 /* The table itself.
 383    This is an array of `set_hash_table_size' elements.  */
 384 static struct expr **set_hash_table;
 385
 386 /* Mapping of uids to cuids.
 387    Only real insns get cuids.  */
 388 static int *uid_cuid;
 389
 390 /* Highest UID in UID_CUID.  */
 391 static int max_uid;
 392
 393 /* Get the cuid of an insn.  */
 394 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
 395
 396 /* Number of cuids.  */
 397 static int max_cuid;
 398
 399 /* Mapping of cuids to insns.  */
 400 static rtx *cuid_insn;
 401
 402 /* Get insn from cuid.  */
 403 #define CUID_INSN(CUID) (cuid_insn[CUID])
 404
 405 /* Maximum register number in function prior to doing gcse + 1.
 406    Registers created during this pass have regno >= max_gcse_regno.
 407    This is named with "gcse" to not collide with global of same name.  */
 408 static int max_gcse_regno;
 409
 410 /* Maximum number of cse-able expressions found.  */
 411 static int n_exprs;
 412 /* Maximum number of assignments for copy propagation found.  */
 413 static int n_sets;
 414
 415 /* Table of registers that are modified.
 416    For each register, each element is a list of places where the pseudo-reg
 417    is set.
 418
 419    For simplicity, GCSE is done on sets of pseudo-regs only.  PRE GCSE only
 420    requires knowledge of which blocks kill which regs [and thus could use
 421    a bitmap instead of the lists `reg_set_table' uses].
 422
 423    `reg_set_table' and could be turned into an array of bitmaps
 424    (num-bbs x num-regs)
 425    [however perhaps it may be useful to keep the data as is].
 426    One advantage of recording things this way is that `reg_set_table' is
 427    fairly sparse with respect to pseudo regs but for hard regs could be
 428    fairly dense [relatively speaking].
 429    And recording sets of pseudo-regs in lists speeds
 430    up functions like compute_transp since in the case of pseudo-regs we only
 431    need to iterate over the number of times a pseudo-reg is set, not over the
 432    number of basic blocks [clearly there is a bit of a slow down in the cases
 433    where a pseudo is set more than once in a block, however it is believed
 434    that the net effect is to speed things up].  This isn't done for hard-regs
 435    because recording call-clobbered hard-regs in `reg_set_table' at each
 436    function call can consume a fair bit of memory, and iterating over hard-regs
 437    stored this way in compute_transp will be more expensive.  */
 438
 439 typedef struct reg_set {
 440   /* The next setting of this register.  */
 441   struct reg_set *next;
 442   /* The insn where it was set.  */
 443   rtx insn;
 444 } reg_set;
 445 static reg_set **reg_set_table;
 446 /* Size of `reg_set_table'.
 447    The table starts out at max_gcse_regno + slop, and is enlarged as
 448    necessary.  */
 449 static int reg_set_table_size;
 450 /* Amount to grow `reg_set_table' by when it's full.  */
 451 #define REG_SET_TABLE_SLOP 100
 452
 453 /* Bitmap containing one bit for each register in the program.
 454    Used when performing GCSE to track which registers have been set since
 455    the start of the basic block.  */
 456 static sbitmap reg_set_bitmap;
 457
 458 /* For each block, a bitmap of registers set in the block.
 459    This is used by expr_killed_p and compute_transp.
 460    It is computed during hash table computation and not by compute_sets
 461    as it includes registers added since the last pass (or between cprop and
 462    gcse) and it's currently not easy to realloc sbitmap vectors.  */
 463 static sbitmap *reg_set_in_block;
 464
 465 /* For each block, non-zero if memory is set in that block.
 466    This is computed during hash table computation and is used by
 467    expr_killed_p and compute_transp.
 468    ??? Handling of memory is very simple, we don't make any attempt
 469    to optimize things (later).
 470    ??? This can be computed by compute_sets since the information
 471    doesn't change.  */
 472 static char *mem_set_in_block;
 473
 474 /* Various variables for statistics gathering.  */
 475
 476 /* Memory used in a pass.
 477    This isn't intended to be absolutely precise.  Its intent is only
 478    to keep an eye on memory usage.  */
 479 static int bytes_used;
 480 /* GCSE substitutions made.  */
 481 static int gcse_subst_count;
 482 /* Number of copy instructions created.  */
 483 static int gcse_create_count;
 484 /* Number of constants propagated.  */
 485 static int const_prop_count;
 486 /* Number of copys propagated.  */
 487 static int copy_prop_count;
 488 \f
 489 /* These variables are used by classic GCSE.
 490    Normally they'd be defined a bit later, but `rd_gen' needs to
 491    be declared sooner.  */
 492
 493 /* A bitmap of all ones for implementing the algorithm for available
 494    expressions and reaching definitions.  */
 495 /* ??? Available expression bitmaps have a different size than reaching
 496    definition bitmaps.  This should be the larger of the two, however, it
 497    is not currently used for reaching definitions.  */
 498 static sbitmap u_bitmap;
 499
 500 /* Each block has a bitmap of each type.
 501    The length of each blocks bitmap is:
 502
 503        max_cuid  - for reaching definitions
 504        n_exprs - for available expressions
 505
 506    Thus we view the bitmaps as 2 dimensional arrays.  i.e.
 507    rd_kill[block_num][cuid_num]
 508    ae_kill[block_num][expr_num]
 509 */
 510
 511 /* For reaching defs */
 512 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
 513
 514 /* for available exprs */
 515 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
 516
 517 \f
 518 static void compute_can_copy      PROTO ((void));
 519
 520 static char *gmalloc              PROTO ((unsigned int));
 521 static char *grealloc            PROTO ((char *, unsigned int));
 522 static char *gcse_alloc        PROTO ((unsigned long));
 523 static void alloc_gcse_mem          PROTO ((rtx));
 524 static void free_gcse_mem            PROTO ((void));
 525 static void alloc_reg_set_mem    PROTO ((int));
 526 static void free_reg_set_mem      PROTO ((void));
 527 static void record_one_set          PROTO ((int, rtx));
 528 static void record_set_info        PROTO ((rtx, rtx));
 529 static void compute_sets              PROTO ((rtx));
 530
 531 static void hash_scan_insn          PROTO ((rtx, int, int));
 532 static void hash_scan_set            PROTO ((rtx, rtx, int));
 533 static void hash_scan_clobber    PROTO ((rtx, rtx));
 534 static void hash_scan_call          PROTO ((rtx, rtx));
 535 static int want_to_gcse_p            PROTO ((rtx));
 536 static int oprs_unchanged_p        PROTO ((rtx, rtx, int));
 537 static int oprs_anticipatable_p       PROTO ((rtx, rtx));
 538 static int oprs_available_p        PROTO ((rtx, rtx));
 539 static void insert_expr_in_table      PROTO ((rtx, enum machine_mode,
 540                                               rtx, int, int));
 541 static void insert_set_in_table       PROTO ((rtx, rtx));
 542 static unsigned int hash_expr    PROTO ((rtx, enum machine_mode,
 543                                          int *, int));
 544 static unsigned int hash_expr_1       PROTO ((rtx, enum machine_mode, int *));
 545 static unsigned int hash_set      PROTO ((int, int));
 546 static int expr_equiv_p        PROTO ((rtx, rtx));
 547 static void record_last_reg_set_info  PROTO ((rtx, int));
 548 static void record_last_mem_set_info  PROTO ((rtx));
 549 static void record_last_set_info      PROTO ((rtx, rtx));
 550 static void compute_hash_table  PROTO ((int));
 551 static void alloc_set_hash_table      PROTO ((int));
 552 static void free_set_hash_table       PROTO ((void));
 553 static void compute_set_hash_table    PROTO ((void));
 554 static void alloc_expr_hash_table     PROTO ((int));
 555 static void free_expr_hash_table      PROTO ((void));
 556 static void compute_expr_hash_table   PROTO ((void));
 557 static void dump_hash_table        PROTO ((FILE *, const char *, struct expr **,
 558                                            int, int));
 559 static struct expr *lookup_expr       PROTO ((rtx));
 560 static struct expr *lookup_set  PROTO ((int, rtx));
 561 static struct expr *next_set      PROTO ((int, struct expr *));
 562 static void reset_opr_set_tables      PROTO ((void));
 563 static int oprs_not_set_p            PROTO ((rtx, rtx));
 564 static void mark_call            PROTO ((rtx));
 565 static void mark_set              PROTO ((rtx, rtx));
 566 static void mark_clobber              PROTO ((rtx, rtx));
 567 static void mark_oprs_set            PROTO ((rtx));
 568
 569 static void alloc_cprop_mem        PROTO ((int, int));
 570 static void free_cprop_mem          PROTO ((void));
 571 static void compute_transp          PROTO ((rtx, int, sbitmap *, int));
 572 static void compute_transpout       PROTO ((void));
 573 static void compute_local_properties  PROTO ((sbitmap *, sbitmap *,
 574                                               sbitmap *, int));
 575 static void compute_cprop_avinout     PROTO ((void));
 576 static void compute_cprop_data  PROTO ((void));
 577 static void find_used_regs          PROTO ((rtx));
 578 static int try_replace_reg          PROTO ((rtx, rtx, rtx));
 579 static struct expr *find_avail_set    PROTO ((int, rtx));
 580 static int cprop_jump                   PROTO((rtx, rtx, struct reg_use *, rtx));
 581 #ifdef HAVE_cc0
 582 static int cprop_cc0_jump               PROTO((rtx, struct reg_use *, rtx));
 583 #endif
 584 static int cprop_insn            PROTO ((rtx, int));
 585 static int cprop                      PROTO ((int));
 586 static int one_cprop_pass            PROTO ((int, int));
 587
 588 static void alloc_pre_mem            PROTO ((int, int));
 589 static void free_pre_mem              PROTO ((void));
 590 static void compute_pre_data      PROTO ((void));
 591 static int pre_expr_reaches_here_p    PROTO ((int, struct expr *,
 592                                               int, int, char *));
 593 static void insert_insn_end_bb  PROTO ((struct expr *, int, int));
 594 static void pre_insert          PROTO ((struct expr **));
 595 static void pre_insert_copy_insn      PROTO ((struct expr *, rtx));
 596 static void pre_insert_copies    PROTO ((void));
 597 static int pre_delete            PROTO ((void));
 598 static int pre_gcse                PROTO ((void));
 599 static int one_pre_gcse_pass      PROTO ((int));
 600
 601 static void add_label_notes           PROTO ((rtx, rtx));
 602
 603 static void alloc_code_hoist_mem        PROTO ((int, int));
 604 static void free_code_hoist_mem         PROTO ((void));
 605 static void compute_code_hoist_vbeinout PROTO ((void));
 606 static void compute_code_hoist_data     PROTO ((void));
 607 static int hoist_expr_reaches_here_p    PROTO ((int, int, int, char *));
 608 static void hoist_code                  PROTO ((void));
 609 static int one_code_hoisting_pass       PROTO ((void));
 610
 611 static void alloc_rd_mem              PROTO ((int, int));
 612 static void free_rd_mem        PROTO ((void));
 613 static void handle_rd_kill_set  PROTO ((rtx, int, int));
 614 static void compute_kill_rd        PROTO ((void));
 615 static void compute_rd          PROTO ((void));
 616 static void alloc_avail_expr_mem      PROTO ((int, int));
 617 static void free_avail_expr_mem       PROTO ((void));
 618 static void compute_ae_gen          PROTO ((void));
 619 static int expr_killed_p              PROTO ((rtx, int));
 620 static void compute_ae_kill        PROTO ((void));
 621 static void compute_available    PROTO ((void));
 622 static int expr_reaches_here_p  PROTO ((struct occr *, struct expr *,
 623                                               int, int, char *));
 624 static rtx computing_insn            PROTO ((struct expr *, rtx));
 625 static int def_reaches_here_p    PROTO ((rtx, rtx));
 626 static int can_disregard_other_sets   PROTO ((struct reg_set **, rtx, int));
 627 static int handle_avail_expr      PROTO ((rtx, struct expr *));
 628 static int classic_gcse        PROTO ((void));
 629 static int one_classic_gcse_pass      PROTO ((int));
 630
 631 static void invalidate_nonnull_info     PROTO ((rtx, rtx));
 632
 633 \f
 634 /* Entry point for global common subexpression elimination.
 635    F is the first instruction in the function.  */
 636
 637 int
 638 gcse_main (f, file)
 639      rtx f;
 640      FILE *file;
 641 {
 642   int changed, pass;
 643   /* Bytes used at start of pass.  */
 644   int initial_bytes_used;
 645   /* Maximum number of bytes used by a pass.  */
 646   int max_pass_bytes;
 647   /* Point to release obstack data from for each pass.  */
 648   char *gcse_obstack_bottom;
 649
 650   /* We do not construct an accurate cfg in functions which call
 651      setjmp, so just punt to be safe.  */
 652   if (current_function_calls_setjmp)
 653     return 0;
 654
 655   /* Assume that we do not need to run jump optimizations after gcse.  */
 656   run_jump_opt_after_gcse = 0;
 657
 658   /* For calling dump_foo fns from gdb.  */
 659   debug_stderr = stderr;
 660   gcse_file = file;
 661
 662   /* Identify the basic block information for this function, including
 663      successors and predecessors.  */
 664   max_gcse_regno = max_reg_num ();
 665   find_basic_blocks (f, max_gcse_regno, file, 1);
 666
 667   /* Return if there's nothing to do.  */
 668   if (n_basic_blocks <= 1)
 669     {
 670       /* Free storage allocated by find_basic_blocks.  */
 671       free_basic_block_vars (0);
 672       return 0;
 673     }
 674
 675   /* Trying to perform global optimizations on flow graphs which have
 676      a high connectivity will take a long time and is unlikely to be
 677      particularly useful.
 678
 679      In normal circumstances a cfg should have about twice has many edges
 680      as blocks.  But we do not want to punish small functions which have
 681      a couple switch statements.  So we require a relatively large number
 682      of basic blocks and the ratio of edges to blocks to be high.  */
 683   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
 684     {
 685       /* Free storage allocated by find_basic_blocks.  */
 686       free_basic_block_vars (0);
 687       return 0;
 688     }
 689
 690   /* See what modes support reg/reg copy operations.  */
 691   if (! can_copy_init_p)
 692     {
 693       compute_can_copy ();
 694       can_copy_init_p = 1;
 695     }
 696
 697   gcc_obstack_init (&gcse_obstack);
 698
 699   /* Allocate and compute predecessors/successors.  */
 700
 701   s_preds = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
 702   s_succs = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
 703   num_preds = (int *) alloca (n_basic_blocks * sizeof (int));
 704   num_succs = (int *) alloca (n_basic_blocks * sizeof (int));
 705   bytes_used = 4 * n_basic_blocks * sizeof (int_list_ptr);
 706   compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
 707
 708   if (file)
 709     dump_bb_data (file, s_preds, s_succs, 0);
 710
 711   /* Record where pseudo-registers are set.
 712      This data is kept accurate during each pass.
 713      ??? We could also record hard-reg information here
 714      [since it's unchanging], however it is currently done during
 715      hash table computation.
 716
 717      It may be tempting to compute MEM set information here too, but MEM
 718      sets will be subject to code motion one day and thus we need to compute
 719      information about memory sets when we build the hash tables.  */
 720
 721   alloc_reg_set_mem (max_gcse_regno);
 722   compute_sets (f);
 723
 724   pass = 0;
 725   initial_bytes_used = bytes_used;
 726   max_pass_bytes = 0;
 727   gcse_obstack_bottom = gcse_alloc (1);
 728   changed = 1;
 729   while (changed && pass < MAX_PASSES)
 730     {
 731       changed = 0;
 732       if (file)
 733         fprintf (file, "GCSE pass %d\n\n", pass + 1);
 734
 735       /* Initialize bytes_used to the space for the pred/succ lists,
 736          and the reg_set_table data.  */
 737       bytes_used = initial_bytes_used;
 738
 739       /* Each pass may create new registers, so recalculate each time.  */
 740       max_gcse_regno = max_reg_num ();
 741
 742       alloc_gcse_mem (f);
 743
 744       /* Don't allow constant propagation to modify jumps
 745          during this pass.  */
 746       changed = one_cprop_pass (pass + 1, 0);
 747
 748       if (optimize_size)
 749         changed |= one_classic_gcse_pass (pass + 1);
 750       else
 751         changed |= one_pre_gcse_pass (pass + 1);
 752
 753       if (max_pass_bytes < bytes_used)
 754         max_pass_bytes = bytes_used;
 755
 756       /* Free up memory, then reallocate for code hoisting.  We can
 757          not re-use the existing allocated memory because the tables
 758          will not have info for the insns or registers created by
 759          partial redundancy elimination.  */
 760       free_gcse_mem ();
 761
 762       /* It does not make sense to run code hoisting unless we optimizing
 763          for code size -- it rarely makes programs faster, and can make
 764          them bigger if we did partial redundancy elimination (when optimizing
 765          for space, we use a classic gcse algorithm instead of partial
 766          redundancy algorithms).  */
 767       if (optimize_size)
 768         {
 769           max_gcse_regno = max_reg_num ();
 770           alloc_gcse_mem (f);
 771           changed |= one_code_hoisting_pass ();
 772           free_gcse_mem ();
 773
 774           if (max_pass_bytes < bytes_used)
 775             max_pass_bytes = bytes_used;
 776         }
 777
 778       if (file)
 779         {
 780           fprintf (file, "\n");
 781           fflush (file);
 782         }
 783       obstack_free (&gcse_obstack, gcse_obstack_bottom);
 784       pass++;
 785     }
 786
 787   /* Do one last pass of copy propagation, including cprop into
 788      conditional jumps.  */
 789
 790   max_gcse_regno = max_reg_num ();
 791   alloc_gcse_mem (f);
 792   /* This time, go ahead and allow cprop to alter jumps.  */
 793   one_cprop_pass (pass + 1, 1);
 794   free_gcse_mem ();
 795
 796   if (file)
 797     {
 798       fprintf (file, "GCSE of %s: %d basic blocks, ",
 799                current_function_name, n_basic_blocks);
 800       fprintf (file, "%d pass%s, %d bytes\n\n",
 801                pass, pass > 1 ? "es" : "", max_pass_bytes);
 802     }
 803
 804   /* Free our obstack.  */
 805   obstack_free (&gcse_obstack, NULL_PTR);
 806   /* Free reg_set_table.  */
 807   free_reg_set_mem ();
 808   /* Free storage used to record predecessor/successor data.  */
 809   free_bb_mem ();
 810   /* Free storage allocated by find_basic_blocks.  */
 811   free_basic_block_vars (0);
 812   return run_jump_opt_after_gcse;
 813 }
 814 \f
 815 /* Misc. utilities.  */
 816
 817 /* Compute which modes support reg/reg copy operations.  */
 818
 819 static void
 820 compute_can_copy ()
 821 {
 822   int i;
 823 #ifndef AVOID_CCMODE_COPIES
 824   rtx reg,insn;
 825 #endif
 826   char *free_point = (char *) oballoc (1);
 827
 828   bzero (can_copy_p, NUM_MACHINE_MODES);
 829
 830   start_sequence ();
 831   for (i = 0; i < NUM_MACHINE_MODES; i++)
 832     {
 833       switch (GET_MODE_CLASS (i))
 834         {
 835         case MODE_CC :
 836 #ifdef AVOID_CCMODE_COPIES
 837           can_copy_p[i] = 0;
 838 #else
 839           reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 840           insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 841           if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
 842             can_copy_p[i] = 1;
 843 #endif
 844           break;
 845         default :
 846           can_copy_p[i] = 1;
 847           break;
 848         }
 849     }
 850   end_sequence ();
 851
 852   /* Free the objects we just allocated.  */
 853   obfree (free_point);
 854 }
 855 \f
 856 /* Cover function to xmalloc to record bytes allocated.  */
 857
 858 static char *
 859 gmalloc (size)
 860      unsigned int size;
 861 {
 862   bytes_used += size;
 863   return xmalloc (size);
 864 }
 865
 866 /* Cover function to xrealloc.
 867    We don't record the additional size since we don't know it.
 868    It won't affect memory usage stats much anyway.  */
 869
 870 static char *
 871 grealloc (ptr, size)
 872      char *ptr;
 873      unsigned int size;
 874 {
 875   return xrealloc (ptr, size);
 876 }
 877
 878 /* Cover function to obstack_alloc.
 879    We don't need to record the bytes allocated here since
 880    obstack_chunk_alloc is set to gmalloc.  */
 881
 882 static char *
 883 gcse_alloc (size)
 884      unsigned long size;
 885 {
 886   return (char *) obstack_alloc (&gcse_obstack, size);
 887 }
 888
 889 /* Allocate memory for the cuid mapping array,
 890    and reg/memory set tracking tables.
 891
 892    This is called at the start of each pass.  */
 893
 894 static void
 895 alloc_gcse_mem (f)
 896      rtx f;
 897 {
 898   int i,n;
 899   rtx insn;
 900
 901   /* Find the largest UID and create a mapping from UIDs to CUIDs.
 902      CUIDs are like UIDs except they increase monotonically, have no gaps,
 903      and only apply to real insns.  */
 904
 905   max_uid = get_max_uid ();
 906   n = (max_uid + 1) * sizeof (int);
 907   uid_cuid = (int *) gmalloc (n);
 908   bzero ((char *) uid_cuid, n);
 909   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 910     {
 911       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 912         INSN_CUID (insn) = i++;
 913       else
 914         INSN_CUID (insn) = i;
 915     }
 916
 917   /* Create a table mapping cuids to insns.  */
 918
 919   max_cuid = i;
 920   n = (max_cuid + 1) * sizeof (rtx);
 921   cuid_insn = (rtx *) gmalloc (n);
 922   bzero ((char *) cuid_insn, n);
 923   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 924     {
 925       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 926         {
 927           CUID_INSN (i) = insn;
 928           i++;
 929         }
 930     }
 931
 932   /* Allocate vars to track sets of regs.  */
 933
 934   reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
 935
 936   /* Allocate vars to track sets of regs, memory per block.  */
 937
 938   reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
 939                                                        max_gcse_regno);
 940   mem_set_in_block = (char *) gmalloc (n_basic_blocks);
 941 }
 942
 943 /* Free memory allocated by alloc_gcse_mem.  */
 944
 945 static void
 946 free_gcse_mem ()
 947 {
 948   free (uid_cuid);
 949   free (cuid_insn);
 950
 951   free (reg_set_bitmap);
 952
 953   free (reg_set_in_block);
 954   free (mem_set_in_block);
 955 }
 956
 957 \f
 958 /* Compute the local properties of each recorded expression.
 959    Local properties are those that are defined by the block, irrespective
 960    of other blocks.
 961
 962    An expression is transparent in a block if its operands are not modified
 963    in the block.
 964
 965    An expression is computed (locally available) in a block if it is computed
 966    at least once and expression would contain the same value if the
 967    computation was moved to the end of the block.
 968
 969    An expression is locally anticipatable in a block if it is computed at
 970    least once and expression would contain the same value if the computation
 971    was moved to the beginning of the block.
 972
 973    We call this routine for cprop, pre and code hoisting.  They all
 974    compute basically the same information and thus can easily share
 975    this code.
 976
 977    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording
 978    local properties.  If NULL, then it is not necessary to compute
 979    or record that particular property.
 980
 981    SETP controls which hash table to look at.  If zero, this routine
 982    looks at the expr hash table; if nonzero this routine looks at
 983    the set hash table.  Additionally, TRANSP is computed as ~TRANSP,
 984    since this is really cprop's ABSALTERED.  */
 985
 986 static void
 987 compute_local_properties (transp, comp, antloc, setp)
 988      sbitmap *transp;
 989      sbitmap *comp;
 990      sbitmap *antloc;
 991      int setp;
 992 {
 993   int i, hash_table_size;
 994   struct expr **hash_table;
 995
 996   /* Initialize any bitmaps that were passed in.  */
 997   if (transp)
 998     {
 999       if (setp)
1000         sbitmap_vector_zero (transp, n_basic_blocks);
1001       else
1002         sbitmap_vector_ones (transp, n_basic_blocks);
1003     }
1004   if (comp)
1005     sbitmap_vector_zero (comp, n_basic_blocks);
1006   if (antloc)
1007     sbitmap_vector_zero (antloc, n_basic_blocks);
1008
1009   /* We use the same code for cprop, pre and hoisting.  For cprop
1010      we care about the set hash table, for pre and hoisting we
1011      care about the expr hash table.  */
1012   hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1013   hash_table = setp ? set_hash_table : expr_hash_table;
1014
1015   for (i = 0; i < hash_table_size; i++)
1016     {
1017       struct expr *expr;
1018
1019       for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1020         {
1021           struct occr *occr;
1022           int indx = expr->bitmap_index;
1023
1024           /* The expression is transparent in this block if it is not killed.
1025              We start by assuming all are transparent [none are killed], and
1026              then reset the bits for those that are.  */
1027
1028           if (transp)
1029             compute_transp (expr->expr, indx, transp, setp);
1030
1031           /* The occurrences recorded in antic_occr are exactly those that
1032              we want to set to non-zero in ANTLOC.  */
1033
1034           if (antloc)
1035             {
1036               for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1037                 {
1038                   int bb = BLOCK_NUM (occr->insn);
1039                   SET_BIT (antloc[bb], indx);
1040
1041                   /* While we're scanning the table, this is a good place to
1042                      initialize this.  */
1043                   occr->deleted_p = 0;
1044                 }
1045             }
1046
1047           /* The occurrences recorded in avail_occr are exactly those that
1048              we want to set to non-zero in COMP.  */
1049           if (comp)
1050             {
1051
1052               for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1053                 {
1054                   int bb = BLOCK_NUM (occr->insn);
1055                   SET_BIT (comp[bb], indx);
1056
1057                   /* While we're scanning the table, this is a good place to
1058                      initialize this.  */
1059                   occr->copied_p = 0;
1060                 }
1061             }
1062
1063           /* While we're scanning the table, this is a good place to
1064              initialize this.  */
1065           expr->reaching_reg = 0;
1066         }
1067     }
1068 }
1069
1070 \f
1071 /* Register set information.
1072
1073    `reg_set_table' records where each register is set or otherwise
1074    modified.  */
1075
1076 static struct obstack reg_set_obstack;
1077
1078 static void
1079 alloc_reg_set_mem (n_regs)
1080      int n_regs;
1081 {
1082   int n;
1083
1084   reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1085   n = reg_set_table_size * sizeof (struct reg_set *);
1086   reg_set_table = (struct reg_set **) gmalloc (n);
1087   bzero ((char *) reg_set_table, n);
1088
1089   gcc_obstack_init (&reg_set_obstack);
1090 }
1091
1092 static void
1093 free_reg_set_mem ()
1094 {
1095   free (reg_set_table);
1096   obstack_free (&reg_set_obstack, NULL_PTR);
1097 }
1098
1099 /* Record REGNO in the reg_set table.  */
1100
1101 static void
1102 record_one_set (regno, insn)
1103      int regno;
1104      rtx insn;
1105 {
1106   /* allocate a new reg_set element and link it onto the list */
1107   struct reg_set *new_reg_info, *reg_info_ptr1, *reg_info_ptr2;
1108
1109   /* If the table isn't big enough, enlarge it.  */
1110   if (regno >= reg_set_table_size)
1111     {
1112       int new_size = regno + REG_SET_TABLE_SLOP;
1113       reg_set_table = (struct reg_set **)
1114         grealloc ((char *) reg_set_table,
1115                   new_size * sizeof (struct reg_set *));
1116       bzero ((char *) (reg_set_table + reg_set_table_size),
1117              (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1118       reg_set_table_size = new_size;
1119     }
1120
1121   new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1122                                                    sizeof (struct reg_set));
1123   bytes_used += sizeof (struct reg_set);
1124   new_reg_info->insn = insn;
1125   new_reg_info->next = NULL;
1126   if (reg_set_table[regno] == NULL)
1127     reg_set_table[regno] = new_reg_info;
1128   else
1129     {
1130       reg_info_ptr1 = reg_info_ptr2 = reg_set_table[regno];
1131       /* ??? One could keep a "last" pointer to speed this up.  */
1132       while (reg_info_ptr1 != NULL)
1133         {
1134           reg_info_ptr2 = reg_info_ptr1;
1135           reg_info_ptr1 = reg_info_ptr1->next;
1136         }
1137       reg_info_ptr2->next = new_reg_info;
1138     }
1139 }
1140
1141 /* For communication between next two functions (via note_stores).  */
1142 static rtx record_set_insn;
1143
1144 /* Called from compute_sets via note_stores to handle one
1145    SET or CLOBBER in an insn.  */
1146
1147 static void
1148 record_set_info (dest, setter)
1149      rtx dest, setter ATTRIBUTE_UNUSED;
1150 {
1151   if (GET_CODE (dest) == SUBREG)
1152     dest = SUBREG_REG (dest);
1153
1154   if (GET_CODE (dest) == REG)
1155     {
1156       if (REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1157         record_one_set (REGNO (dest), record_set_insn);
1158     }
1159 }
1160
1161 /* Scan the function and record each set of each pseudo-register.
1162
1163    This is called once, at the start of the gcse pass.
1164    See the comments for `reg_set_table' for further docs.  */
1165
1166 static void
1167 compute_sets (f)
1168      rtx f;
1169 {
1170   rtx insn = f;
1171
1172   while (insn)
1173     {
1174       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
1175         {
1176           record_set_insn = insn;
1177           note_stores (PATTERN (insn), record_set_info);
1178         }
1179       insn = NEXT_INSN (insn);
1180     }
1181 }
1182 \f
1183 /* Hash table support.  */
1184
1185 #define NEVER_SET -1
1186
1187 /* For each register, the cuid of the first/last insn in the block to set it,
1188    or -1 if not set.  */
1189 static int *reg_first_set;
1190 static int *reg_last_set;
1191
1192 /* While computing "first/last set" info, this is the CUID of first/last insn
1193    to set memory or -1 if not set.  `mem_last_set' is also used when
1194    performing GCSE to record whether memory has been set since the beginning
1195    of the block.
1196    Note that handling of memory is very simple, we don't make any attempt
1197    to optimize things (later).  */
1198 static int mem_first_set;
1199 static int mem_last_set;
1200
1201 /* Perform a quick check whether X, the source of a set, is something
1202    we want to consider for GCSE.  */
1203
1204 static int
1205 want_to_gcse_p (x)
1206      rtx x;
1207 {
1208   enum rtx_code code = GET_CODE (x);
1209
1210   switch (code)
1211     {
1212     case REG:
1213     case SUBREG:
1214     case CONST_INT:
1215     case CONST_DOUBLE:
1216     case CALL:
1217       return 0;
1218
1219     default:
1220       break;
1221     }
1222
1223   return 1;
1224 }
1225
1226 /* Return non-zero if the operands of expression X are unchanged from the
1227    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1228    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
1229
1230 static int
1231 oprs_unchanged_p (x, insn, avail_p)
1232      rtx x, insn;
1233      int avail_p;
1234 {
1235   int i;
1236   enum rtx_code code;
1237   const char *fmt;
1238
1239   /* repeat is used to turn tail-recursion into iteration.  */
1240  repeat:
1241
1242   if (x == 0)
1243     return 1;
1244
1245   code = GET_CODE (x);
1246   switch (code)
1247     {
1248     case REG:
1249       if (avail_p)
1250         return (reg_last_set[REGNO (x)] == NEVER_SET
1251                 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1252       else
1253         return (reg_first_set[REGNO (x)] == NEVER_SET
1254                 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1255
1256     case MEM:
1257       if (avail_p)
1258         {
1259           if (mem_last_set != NEVER_SET
1260               && mem_last_set >= INSN_CUID (insn))
1261             return 0;
1262         }
1263       else
1264         {
1265           if (mem_first_set != NEVER_SET
1266               && mem_first_set < INSN_CUID (insn))
1267             return 0;
1268         }
1269       x = XEXP (x, 0);
1270       goto repeat;
1271
1272     case PRE_DEC:
1273     case PRE_INC:
1274     case POST_DEC:
1275     case POST_INC:
1276       return 0;
1277
1278     case PC:
1279     case CC0: /*FIXME*/
1280     case CONST:
1281     case CONST_INT:
1282     case CONST_DOUBLE:
1283     case SYMBOL_REF:
1284     case LABEL_REF:
1285     case ADDR_VEC:
1286     case ADDR_DIFF_VEC:
1287       return 1;
1288
1289     default:
1290       break;
1291     }
1292
1293   i = GET_RTX_LENGTH (code) - 1;
1294   fmt = GET_RTX_FORMAT (code);
1295   for (; i >= 0; i--)
1296     {
1297       if (fmt[i] == 'e')
1298         {
1299           rtx tem = XEXP (x, i);
1300
1301           /* If we are about to do the last recursive call
1302              needed at this level, change it into iteration.
1303              This function is called enough to be worth it.  */
1304           if (i == 0)
1305             {
1306               x = tem;
1307               goto repeat;
1308             }
1309           if (! oprs_unchanged_p (tem, insn, avail_p))
1310             return 0;
1311         }
1312       else if (fmt[i] == 'E')
1313         {
1314           int j;
1315           for (j = 0; j < XVECLEN (x, i); j++)
1316             {
1317               if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1318                 return 0;
1319             }
1320         }
1321     }
1322
1323   return 1;
1324 }
1325
1326 /* Return non-zero if the operands of expression X are unchanged from
1327    the start of INSN's basic block up to but not including INSN.  */
1328
1329 static int
1330 oprs_anticipatable_p (x, insn)
1331      rtx x, insn;
1332 {
1333   return oprs_unchanged_p (x, insn, 0);
1334 }
1335
1336 /* Return non-zero if the operands of expression X are unchanged from
1337    INSN to the end of INSN's basic block.  */
1338
1339 static int
1340 oprs_available_p (x, insn)
1341      rtx x, insn;
1342 {
1343   return oprs_unchanged_p (x, insn, 1);
1344 }
1345
1346 /* Hash expression X.
1347    MODE is only used if X is a CONST_INT.
1348    A boolean indicating if a volatile operand is found or if the expression
1349    contains something we don't want to insert in the table is stored in
1350    DO_NOT_RECORD_P.
1351
1352    ??? One might want to merge this with canon_hash.  Later.  */
1353
1354 static unsigned int
1355 hash_expr (x, mode, do_not_record_p, hash_table_size)
1356      rtx x;
1357      enum machine_mode mode;
1358      int *do_not_record_p;
1359      int hash_table_size;
1360 {
1361   unsigned int hash;
1362
1363   *do_not_record_p = 0;
1364
1365   hash = hash_expr_1 (x, mode, do_not_record_p);
1366   return hash % hash_table_size;
1367 }
1368
1369 /* Subroutine of hash_expr to do the actual work.  */
1370
1371 static unsigned int
1372 hash_expr_1 (x, mode, do_not_record_p)
1373      rtx x;
1374      enum machine_mode mode;
1375      int *do_not_record_p;
1376 {
1377   int i, j;
1378   unsigned hash = 0;
1379   enum rtx_code code;
1380   const char *fmt;
1381
1382   /* repeat is used to turn tail-recursion into iteration.  */
1383  repeat:
1384
1385   if (x == 0)
1386     return hash;
1387
1388   code = GET_CODE (x);
1389   switch (code)
1390     {
1391     case REG:
1392       {
1393         register int regno = REGNO (x);
1394         hash += ((unsigned) REG << 7) + regno;
1395         return hash;
1396       }
1397
1398     case CONST_INT:
1399       {
1400         unsigned HOST_WIDE_INT tem = INTVAL (x);
1401         hash += ((unsigned) CONST_INT << 7) + (unsigned) mode + tem;
1402         return hash;
1403       }
1404
1405     case CONST_DOUBLE:
1406       /* This is like the general case, except that it only counts
1407          the integers representing the constant.  */
1408       hash += (unsigned) code + (unsigned) GET_MODE (x);
1409       if (GET_MODE (x) != VOIDmode)
1410         for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1411           {
1412             unsigned tem = XWINT (x, i);
1413             hash += tem;
1414           }
1415       else
1416         hash += ((unsigned) CONST_DOUBLE_LOW (x)
1417                  + (unsigned) CONST_DOUBLE_HIGH (x));
1418       return hash;
1419
1420       /* Assume there is only one rtx object for any given label.  */
1421     case LABEL_REF:
1422       /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1423          differences and differences between each stage's debugging dumps.  */
1424       hash += ((unsigned) LABEL_REF << 7) + CODE_LABEL_NUMBER (XEXP (x, 0));
1425       return hash;
1426
1427     case SYMBOL_REF:
1428       {
1429         /* Don't hash on the symbol's address to avoid bootstrap differences.
1430            Different hash values may cause expressions to be recorded in
1431            different orders and thus different registers to be used in the
1432            final assembler.  This also avoids differences in the dump files
1433            between various stages.  */
1434         unsigned int h = 0;
1435         unsigned char *p = (unsigned char *) XSTR (x, 0);
1436         while (*p)
1437           h += (h << 7) + *p++; /* ??? revisit */
1438         hash += ((unsigned) SYMBOL_REF << 7) + h;
1439         return hash;
1440       }
1441
1442     case MEM:
1443       if (MEM_VOLATILE_P (x))
1444         {
1445           *do_not_record_p = 1;
1446           return 0;
1447         }
1448       hash += (unsigned) MEM;
1449       x = XEXP (x, 0);
1450       goto repeat;
1451
1452     case PRE_DEC:
1453     case PRE_INC:
1454     case POST_DEC:
1455     case POST_INC:
1456     case PC:
1457     case CC0:
1458     case CALL:
1459     case UNSPEC_VOLATILE:
1460       *do_not_record_p = 1;
1461       return 0;
1462
1463     case ASM_OPERANDS:
1464       if (MEM_VOLATILE_P (x))
1465         {
1466           *do_not_record_p = 1;
1467           return 0;
1468         }
1469
1470     default:
1471       break;
1472     }
1473
1474   i = GET_RTX_LENGTH (code) - 1;
1475   hash += (unsigned) code + (unsigned) GET_MODE (x);
1476   fmt = GET_RTX_FORMAT (code);
1477   for (; i >= 0; i--)
1478     {
1479       if (fmt[i] == 'e')
1480         {
1481           rtx tem = XEXP (x, i);
1482
1483           /* If we are about to do the last recursive call
1484              needed at this level, change it into iteration.
1485              This function is called enough to be worth it.  */
1486           if (i == 0)
1487             {
1488               x = tem;
1489               goto repeat;
1490             }
1491           hash += hash_expr_1 (tem, 0, do_not_record_p);
1492           if (*do_not_record_p)
1493             return 0;
1494         }
1495       else if (fmt[i] == 'E')
1496         for (j = 0; j < XVECLEN (x, i); j++)
1497           {
1498             hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1499             if (*do_not_record_p)
1500               return 0;
1501           }
1502       else if (fmt[i] == 's')
1503         {
1504           register unsigned char *p = (unsigned char *) XSTR (x, i);
1505           if (p)
1506             while (*p)
1507               hash += *p++;
1508         }
1509       else if (fmt[i] == 'i')
1510         {
1511           register unsigned tem = XINT (x, i);
1512           hash += tem;
1513         }
1514       else
1515         abort ();
1516     }
1517
1518   return hash;
1519 }
1520
1521 /* Hash a set of register REGNO.
1522
1523    Sets are hashed on the register that is set.
1524    This simplifies the PRE copy propagation code.
1525
1526    ??? May need to make things more elaborate.  Later, as necessary.  */
1527
1528 static unsigned int
1529 hash_set (regno, hash_table_size)
1530      int regno;
1531      int hash_table_size;
1532 {
1533   unsigned int hash;
1534
1535   hash = regno;
1536   return hash % hash_table_size;
1537 }
1538
1539 /* Return non-zero if exp1 is equivalent to exp2.
1540    ??? Borrowed from cse.c.  Might want to remerge with cse.c.  Later.  */
1541
1542 static int
1543 expr_equiv_p (x, y)
1544      rtx x, y;
1545 {
1546   register int i, j;
1547   register enum rtx_code code;
1548   register const char *fmt;
1549
1550   if (x == y)
1551     return 1;
1552   if (x == 0 || y == 0)
1553     return x == y;
1554
1555   code = GET_CODE (x);
1556   if (code != GET_CODE (y))
1557     return 0;
1558
1559   /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.  */
1560   if (GET_MODE (x) != GET_MODE (y))
1561     return 0;
1562
1563   switch (code)
1564     {
1565     case PC:
1566     case CC0:
1567       return x == y;
1568
1569     case CONST_INT:
1570       return INTVAL (x) == INTVAL (y);
1571
1572     case LABEL_REF:
1573       return XEXP (x, 0) == XEXP (y, 0);
1574
1575     case SYMBOL_REF:
1576       return XSTR (x, 0) == XSTR (y, 0);
1577
1578     case REG:
1579       return REGNO (x) == REGNO (y);
1580
1581     /*  For commutative operations, check both orders.  */
1582     case PLUS:
1583     case MULT:
1584     case AND:
1585     case IOR:
1586     case XOR:
1587     case NE:
1588     case EQ:
1589       return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1590                && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1591               || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1592                   && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1593
1594     default:
1595       break;
1596     }
1597
1598   /* Compare the elements.  If any pair of corresponding elements
1599      fail to match, return 0 for the whole thing.  */
1600
1601   fmt = GET_RTX_FORMAT (code);
1602   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1603     {
1604       switch (fmt[i])
1605         {
1606         case 'e':
1607           if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1608             return 0;
1609           break;
1610
1611         case 'E':
1612           if (XVECLEN (x, i) != XVECLEN (y, i))
1613             return 0;
1614           for (j = 0; j < XVECLEN (x, i); j++)
1615             if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1616               return 0;
1617           break;
1618
1619         case 's':
1620           if (strcmp (XSTR (x, i), XSTR (y, i)))
1621             return 0;
1622           break;
1623
1624         case 'i':
1625           if (XINT (x, i) != XINT (y, i))
1626             return 0;
1627           break;
1628
1629         case 'w':
1630           if (XWINT (x, i) != XWINT (y, i))
1631             return 0;
1632         break;
1633
1634         case '0':
1635           break;
1636
1637         default:
1638           abort ();
1639         }
1640       }
1641
1642   return 1;
1643 }
1644
1645 /* Insert expression X in INSN in the hash table.
1646    If it is already present, record it as the last occurrence in INSN's
1647    basic block.
1648
1649    MODE is the mode of the value X is being stored into.
1650    It is only used if X is a CONST_INT.
1651
1652    ANTIC_P is non-zero if X is an anticipatable expression.
1653    AVAIL_P is non-zero if X is an available expression.  */
1654
1655 static void
1656 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1657      rtx x;
1658      enum machine_mode mode;
1659      rtx insn;
1660      int antic_p, avail_p;
1661 {
1662   int found, do_not_record_p;
1663   unsigned int hash;
1664   struct expr *cur_expr, *last_expr = NULL;
1665   struct occr *antic_occr, *avail_occr;
1666   struct occr *last_occr = NULL;
1667
1668   hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1669
1670   /* Do not insert expression in table if it contains volatile operands,
1671      or if hash_expr determines the expression is something we don't want
1672      to or can't handle.  */
1673   if (do_not_record_p)
1674     return;
1675
1676   cur_expr = expr_hash_table[hash];
1677   found = 0;
1678
1679   while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1680     {
1681       /* If the expression isn't found, save a pointer to the end of
1682          the list.  */
1683       last_expr = cur_expr;
1684       cur_expr = cur_expr->next_same_hash;
1685     }
1686
1687   if (! found)
1688     {
1689       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1690       bytes_used += sizeof (struct expr);
1691       if (expr_hash_table[hash] == NULL)
1692         {
1693           /* This is the first pattern that hashed to this index.  */
1694           expr_hash_table[hash] = cur_expr;
1695         }
1696       else
1697         {
1698           /* Add EXPR to end of this hash chain.  */
1699           last_expr->next_same_hash = cur_expr;
1700         }
1701       /* Set the fields of the expr element.  */
1702       cur_expr->expr = x;
1703       cur_expr->bitmap_index = n_exprs++;
1704       cur_expr->next_same_hash = NULL;
1705       cur_expr->antic_occr = NULL;
1706       cur_expr->avail_occr = NULL;
1707     }
1708
1709   /* Now record the occurrence(s).  */
1710
1711   if (antic_p)
1712     {
1713       antic_occr = cur_expr->antic_occr;
1714
1715       /* Search for another occurrence in the same basic block.  */
1716       while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1717         {
1718           /* If an occurrence isn't found, save a pointer to the end of
1719              the list.  */
1720           last_occr = antic_occr;
1721           antic_occr = antic_occr->next;
1722         }
1723
1724       if (antic_occr)
1725         {
1726           /* Found another instance of the expression in the same basic block.
1727              Prefer the currently recorded one.  We want the first one in the
1728              block and the block is scanned from start to end.  */
1729           ; /* nothing to do */
1730         }
1731       else
1732         {
1733           /* First occurrence of this expression in this basic block.  */
1734           antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1735           bytes_used += sizeof (struct occr);
1736           /* First occurrence of this expression in any block?  */
1737           if (cur_expr->antic_occr == NULL)
1738             cur_expr->antic_occr = antic_occr;
1739           else
1740             last_occr->next = antic_occr;
1741           antic_occr->insn = insn;
1742           antic_occr->next = NULL;
1743         }
1744     }
1745
1746   if (avail_p)
1747     {
1748       avail_occr = cur_expr->avail_occr;
1749
1750       /* Search for another occurrence in the same basic block.  */
1751       while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1752         {
1753           /* If an occurrence isn't found, save a pointer to the end of
1754              the list.  */
1755           last_occr = avail_occr;
1756           avail_occr = avail_occr->next;
1757         }
1758
1759       if (avail_occr)
1760         {
1761           /* Found another instance of the expression in the same basic block.
1762              Prefer this occurrence to the currently recorded one.  We want
1763              the last one in the block and the block is scanned from start
1764              to end.  */
1765           avail_occr->insn = insn;
1766         }
1767       else
1768         {
1769           /* First occurrence of this expression in this basic block.  */
1770           avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1771           bytes_used += sizeof (struct occr);
1772           /* First occurrence of this expression in any block?  */
1773           if (cur_expr->avail_occr == NULL)
1774             cur_expr->avail_occr = avail_occr;
1775           else
1776             last_occr->next = avail_occr;
1777           avail_occr->insn = insn;
1778           avail_occr->next = NULL;
1779         }
1780     }
1781 }
1782
1783 /* Insert pattern X in INSN in the hash table.
1784    X is a SET of a reg to either another reg or a constant.
1785    If it is already present, record it as the last occurrence in INSN's
1786    basic block.  */
1787
1788 static void
1789 insert_set_in_table (x, insn)
1790      rtx x;
1791      rtx insn;
1792 {
1793   int found;
1794   unsigned int hash;
1795   struct expr *cur_expr, *last_expr = NULL;
1796   struct occr *cur_occr, *last_occr = NULL;
1797
1798   if (GET_CODE (x) != SET
1799       || GET_CODE (SET_DEST (x)) != REG)
1800     abort ();
1801
1802   hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1803
1804   cur_expr = set_hash_table[hash];
1805   found = 0;
1806
1807   while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1808     {
1809       /* If the expression isn't found, save a pointer to the end of
1810          the list.  */
1811       last_expr = cur_expr;
1812       cur_expr = cur_expr->next_same_hash;
1813     }
1814
1815   if (! found)
1816     {
1817       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1818       bytes_used += sizeof (struct expr);
1819       if (set_hash_table[hash] == NULL)
1820         {
1821           /* This is the first pattern that hashed to this index.  */
1822           set_hash_table[hash] = cur_expr;
1823         }
1824       else
1825         {
1826           /* Add EXPR to end of this hash chain.  */
1827           last_expr->next_same_hash = cur_expr;
1828         }
1829       /* Set the fields of the expr element.
1830          We must copy X because it can be modified when copy propagation is
1831          performed on its operands.  */
1832       /* ??? Should this go in a different obstack?  */
1833       cur_expr->expr = copy_rtx (x);
1834       cur_expr->bitmap_index = n_sets++;
1835       cur_expr->next_same_hash = NULL;
1836       cur_expr->antic_occr = NULL;
1837       cur_expr->avail_occr = NULL;
1838     }
1839
1840   /* Now record the occurrence.  */
1841
1842   cur_occr = cur_expr->avail_occr;
1843
1844   /* Search for another occurrence in the same basic block.  */
1845   while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1846     {
1847       /* If an occurrence isn't found, save a pointer to the end of
1848          the list.  */
1849       last_occr = cur_occr;
1850       cur_occr = cur_occr->next;
1851     }
1852
1853   if (cur_occr)
1854     {
1855       /* Found another instance of the expression in the same basic block.
1856          Prefer this occurrence to the currently recorded one.  We want
1857          the last one in the block and the block is scanned from start
1858          to end.  */
1859       cur_occr->insn = insn;
1860     }
1861   else
1862     {
1863       /* First occurrence of this expression in this basic block.  */
1864       cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1865       bytes_used += sizeof (struct occr);
1866       /* First occurrence of this expression in any block?  */
1867       if (cur_expr->avail_occr == NULL)
1868         cur_expr->avail_occr = cur_occr;
1869       else
1870         last_occr->next = cur_occr;
1871       cur_occr->insn = insn;
1872       cur_occr->next = NULL;
1873     }
1874 }
1875
1876 /* Scan pattern PAT of INSN and add an entry to the hash table.
1877    If SET_P is non-zero, this is for the assignment hash table,
1878    otherwise it is for the expression hash table.  */
1879
1880 static void
1881 hash_scan_set (pat, insn, set_p)
1882      rtx pat, insn;
1883      int set_p;
1884 {
1885   rtx src = SET_SRC (pat);
1886   rtx dest = SET_DEST (pat);
1887
1888   if (GET_CODE (src) == CALL)
1889     hash_scan_call (src, insn);
1890
1891   if (GET_CODE (dest) == REG)
1892     {
1893       int regno = REGNO (dest);
1894       rtx tmp;
1895
1896       /* Only record sets of pseudo-regs in the hash table.  */
1897       if (! set_p
1898           && regno >= FIRST_PSEUDO_REGISTER
1899           /* Don't GCSE something if we can't do a reg/reg copy.  */
1900           && can_copy_p [GET_MODE (dest)]
1901           /* Is SET_SRC something we want to gcse?  */
1902           && want_to_gcse_p (src))
1903         {
1904           /* An expression is not anticipatable if its operands are
1905              modified before this insn.  */
1906           int antic_p = ! optimize_size && oprs_anticipatable_p (src, insn);
1907           /* An expression is not available if its operands are
1908              subsequently modified, including this insn.  */
1909           int avail_p = oprs_available_p (src, insn);
1910           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1911         }
1912       /* Record sets for constant/copy propagation.  */
1913       else if (set_p
1914                && regno >= FIRST_PSEUDO_REGISTER
1915                && ((GET_CODE (src) == REG
1916                     && REGNO (src) >= FIRST_PSEUDO_REGISTER
1917                     && can_copy_p [GET_MODE (dest)])
1918                    || GET_CODE (src) == CONST_INT
1919                    || GET_CODE (src) == SYMBOL_REF
1920                    || GET_CODE (src) == CONST_DOUBLE)
1921                /* A copy is not available if its src or dest is subsequently
1922                   modified.  Here we want to search from INSN+1 on, but
1923                   oprs_available_p searches from INSN on.  */
1924                && (insn == BLOCK_END (BLOCK_NUM (insn))
1925                    || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1926                        && oprs_available_p (pat, tmp))))
1927         insert_set_in_table (pat, insn);
1928     }
1929 }
1930
1931 static void
1932 hash_scan_clobber (x, insn)
1933      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1934 {
1935   /* Currently nothing to do.  */
1936 }
1937
1938 static void
1939 hash_scan_call (x, insn)
1940      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1941 {
1942   /* Currently nothing to do.  */
1943 }
1944
1945 /* Process INSN and add hash table entries as appropriate.
1946
1947    Only available expressions that set a single pseudo-reg are recorded.
1948
1949    Single sets in a PARALLEL could be handled, but it's an extra complication
1950    that isn't dealt with right now.  The trick is handling the CLOBBERs that
1951    are also in the PARALLEL.  Later.
1952
1953    If SET_P is non-zero, this is for the assignment hash table,
1954    otherwise it is for the expression hash table.
1955    If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1956    not record any expressions.  */
1957
1958 static void
1959 hash_scan_insn (insn, set_p, in_libcall_block)
1960      rtx insn;
1961      int set_p;
1962      int in_libcall_block;
1963 {
1964   rtx pat = PATTERN (insn);
1965
1966   /* Pick out the sets of INSN and for other forms of instructions record
1967      what's been modified.  */
1968
1969   if (GET_CODE (pat) == SET && ! in_libcall_block)
1970     {
1971       /* Ignore obvious no-ops.  */
1972       if (SET_SRC (pat) != SET_DEST (pat))
1973         hash_scan_set (pat, insn, set_p);
1974     }
1975   else if (GET_CODE (pat) == PARALLEL)
1976     {
1977       int i;
1978
1979       for (i = 0; i < XVECLEN (pat, 0); i++)
1980         {
1981           rtx x = XVECEXP (pat, 0, i);
1982
1983           if (GET_CODE (x) == SET)
1984             {
1985               if (GET_CODE (SET_SRC (x)) == CALL)
1986                 hash_scan_call (SET_SRC (x), insn);
1987             }
1988           else if (GET_CODE (x) == CLOBBER)
1989             hash_scan_clobber (x, insn);
1990           else if (GET_CODE (x) == CALL)
1991             hash_scan_call (x, insn);
1992         }
1993     }
1994   else if (GET_CODE (pat) == CLOBBER)
1995     hash_scan_clobber (pat, insn);
1996   else if (GET_CODE (pat) == CALL)
1997     hash_scan_call (pat, insn);
1998 }
1999
2000 static void
2001 dump_hash_table (file, name, table, table_size, total_size)
2002      FILE *file;
2003      const char *name;
2004      struct expr **table;
2005      int table_size, total_size;
2006 {
2007   int i;
2008   /* Flattened out table, so it's printed in proper order.  */
2009   struct expr **flat_table = (struct expr **) alloca (total_size * sizeof (struct expr *));
2010   unsigned int *hash_val = (unsigned int *) alloca (total_size * sizeof (unsigned int));
2011
2012   bzero ((char *) flat_table, total_size * sizeof (struct expr *));
2013   for (i = 0; i < table_size; i++)
2014     {
2015       struct expr *expr;
2016
2017       for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
2018         {
2019           flat_table[expr->bitmap_index] = expr;
2020           hash_val[expr->bitmap_index] = i;
2021         }
2022     }
2023
2024   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2025            name, table_size, total_size);
2026
2027   for (i = 0; i < total_size; i++)
2028     {
2029       struct expr *expr = flat_table[i];
2030
2031       fprintf (file, "Index %d (hash value %d)\n  ",
2032                expr->bitmap_index, hash_val[i]);
2033       print_rtl (file, expr->expr);
2034       fprintf (file, "\n");
2035     }
2036
2037   fprintf (file, "\n");
2038 }
2039
2040 /* Record register first/last/block set information for REGNO in INSN.
2041    reg_first_set records the first place in the block where the register
2042    is set and is used to compute "anticipatability".
2043    reg_last_set records the last place in the block where the register
2044    is set and is used to compute "availability".
2045    reg_set_in_block records whether the register is set in the block
2046    and is used to compute "transparency".  */
2047
2048 static void
2049 record_last_reg_set_info (insn, regno)
2050      rtx insn;
2051      int regno;
2052 {
2053   if (reg_first_set[regno] == NEVER_SET)
2054     reg_first_set[regno] = INSN_CUID (insn);
2055   reg_last_set[regno] = INSN_CUID (insn);
2056   SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2057 }
2058
2059 /* Record memory first/last/block set information for INSN.  */
2060
2061 static void
2062 record_last_mem_set_info (insn)
2063      rtx insn;
2064 {
2065   if (mem_first_set == NEVER_SET)
2066     mem_first_set = INSN_CUID (insn);
2067   mem_last_set = INSN_CUID (insn);
2068   mem_set_in_block[BLOCK_NUM (insn)] = 1;
2069 }
2070
2071 /* Used for communicating between next two routines.  */
2072 static rtx last_set_insn;
2073
2074 /* Called from compute_hash_table via note_stores to handle one
2075    SET or CLOBBER in an insn.  */
2076
2077 static void
2078 record_last_set_info (dest, setter)
2079      rtx dest, setter ATTRIBUTE_UNUSED;
2080 {
2081   if (GET_CODE (dest) == SUBREG)
2082     dest = SUBREG_REG (dest);
2083
2084   if (GET_CODE (dest) == REG)
2085     record_last_reg_set_info (last_set_insn, REGNO (dest));
2086   else if (GET_CODE (dest) == MEM
2087            /* Ignore pushes, they clobber nothing.  */
2088            && ! push_operand (dest, GET_MODE (dest)))
2089     record_last_mem_set_info (last_set_insn);
2090 }
2091
2092 /* Top level function to create an expression or assignment hash table.
2093
2094    Expression entries are placed in the hash table if
2095    - they are of the form (set (pseudo-reg) src),
2096    - src is something we want to perform GCSE on,
2097    - none of the operands are subsequently modified in the block
2098
2099    Assignment entries are placed in the hash table if
2100    - they are of the form (set (pseudo-reg) src),
2101    - src is something we want to perform const/copy propagation on,
2102    - none of the operands or target are subsequently modified in the block
2103    Currently src must be a pseudo-reg or a const_int.
2104
2105    F is the first insn.
2106    SET_P is non-zero for computing the assignment hash table.  */
2107
2108 static void
2109 compute_hash_table (set_p)
2110      int set_p;
2111 {
2112   int bb;
2113
2114   /* While we compute the hash table we also compute a bit array of which
2115      registers are set in which blocks.
2116      We also compute which blocks set memory, in the absence of aliasing
2117      support [which is TODO].
2118      ??? This isn't needed during const/copy propagation, but it's cheap to
2119      compute.  Later.  */
2120   sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2121   bzero ((char *) mem_set_in_block, n_basic_blocks);
2122
2123   /* Some working arrays used to track first and last set in each block.  */
2124   /* ??? One could use alloca here, but at some size a threshold is crossed
2125      beyond which one should use malloc.  Are we at that threshold here?  */
2126   reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2127   reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2128
2129   for (bb = 0; bb < n_basic_blocks; bb++)
2130     {
2131       rtx insn;
2132       int regno;
2133       int in_libcall_block;
2134       int i;
2135
2136       /* First pass over the instructions records information used to
2137          determine when registers and memory are first and last set.
2138          ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2139          could be moved to compute_sets since they currently don't change.  */
2140
2141       for (i = 0; i < max_gcse_regno; i++)
2142         reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2143       mem_first_set = NEVER_SET;
2144       mem_last_set = NEVER_SET;
2145
2146       for (insn = BLOCK_HEAD (bb);
2147            insn && insn != NEXT_INSN (BLOCK_END (bb));
2148            insn = NEXT_INSN (insn))
2149         {
2150 #ifdef NON_SAVING_SETJMP
2151           if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2152               && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2153             {
2154               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2155                 record_last_reg_set_info (insn, regno);
2156               continue;
2157             }
2158 #endif
2159
2160           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
2161             continue;
2162
2163           if (GET_CODE (insn) == CALL_INSN)
2164             {
2165               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2166                 if ((call_used_regs[regno]
2167                      && regno != STACK_POINTER_REGNUM
2168 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2169                      && regno != HARD_FRAME_POINTER_REGNUM
2170 #endif
2171 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2172                      && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2173 #endif
2174 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2175                      && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2176 #endif
2177
2178                      && regno != FRAME_POINTER_REGNUM)
2179                     || global_regs[regno])
2180                   record_last_reg_set_info (insn, regno);
2181               if (! CONST_CALL_P (insn))
2182                 record_last_mem_set_info (insn);
2183             }
2184
2185           last_set_insn = insn;
2186           note_stores (PATTERN (insn), record_last_set_info);
2187         }
2188
2189       /* The next pass builds the hash table.  */
2190
2191       for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2192            insn && insn != NEXT_INSN (BLOCK_END (bb));
2193            insn = NEXT_INSN (insn))
2194         {
2195           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2196             {
2197               if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2198                 in_libcall_block = 1;
2199               else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2200                 in_libcall_block = 0;
2201               hash_scan_insn (insn, set_p, in_libcall_block);
2202             }
2203         }
2204     }
2205
2206   free (reg_first_set);
2207   free (reg_last_set);
2208   /* Catch bugs early.  */
2209   reg_first_set = reg_last_set = 0;
2210 }
2211
2212 /* Allocate space for the set hash table.
2213    N_INSNS is the number of instructions in the function.
2214    It is used to determine the number of buckets to use.  */
2215
2216 static void
2217 alloc_set_hash_table (n_insns)
2218      int n_insns;
2219 {
2220   int n;
2221
2222   set_hash_table_size = n_insns / 4;
2223   if (set_hash_table_size < 11)
2224     set_hash_table_size = 11;
2225   /* Attempt to maintain efficient use of hash table.
2226      Making it an odd number is simplest for now.
2227      ??? Later take some measurements.  */
2228   set_hash_table_size |= 1;
2229   n = set_hash_table_size * sizeof (struct expr *);
2230   set_hash_table = (struct expr **) gmalloc (n);
2231 }
2232
2233 /* Free things allocated by alloc_set_hash_table.  */
2234
2235 static void
2236 free_set_hash_table ()
2237 {
2238   free (set_hash_table);
2239 }
2240
2241 /* Compute the hash table for doing copy/const propagation.  */
2242
2243 static void
2244 compute_set_hash_table ()
2245 {
2246   /* Initialize count of number of entries in hash table.  */
2247   n_sets = 0;
2248   bzero ((char *) set_hash_table, set_hash_table_size * sizeof (struct expr *));
2249
2250   compute_hash_table (1);
2251 }
2252
2253 /* Allocate space for the expression hash table.
2254    N_INSNS is the number of instructions in the function.
2255    It is used to determine the number of buckets to use.  */
2256
2257 static void
2258 alloc_expr_hash_table (n_insns)
2259      int n_insns;
2260 {
2261   int n;
2262
2263   expr_hash_table_size = n_insns / 2;
2264   /* Make sure the amount is usable.  */
2265   if (expr_hash_table_size < 11)
2266     expr_hash_table_size = 11;
2267   /* Attempt to maintain efficient use of hash table.
2268      Making it an odd number is simplest for now.
2269      ??? Later take some measurements.  */
2270   expr_hash_table_size |= 1;
2271   n = expr_hash_table_size * sizeof (struct expr *);
2272   expr_hash_table = (struct expr **) gmalloc (n);
2273 }
2274
2275 /* Free things allocated by alloc_expr_hash_table.  */
2276
2277 static void
2278 free_expr_hash_table ()
2279 {
2280   free (expr_hash_table);
2281 }
2282
2283 /* Compute the hash table for doing GCSE.  */
2284
2285 static void
2286 compute_expr_hash_table ()
2287 {
2288   /* Initialize count of number of entries in hash table.  */
2289   n_exprs = 0;
2290   bzero ((char *) expr_hash_table, expr_hash_table_size * sizeof (struct expr *));
2291
2292   compute_hash_table (0);
2293 }
2294 \f
2295 /* Expression tracking support.  */
2296
2297 /* Lookup pattern PAT in the expression table.
2298    The result is a pointer to the table entry, or NULL if not found.  */
2299
2300 static struct expr *
2301 lookup_expr (pat)
2302      rtx pat;
2303 {
2304   int do_not_record_p;
2305   unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2306                                  expr_hash_table_size);
2307   struct expr *expr;
2308
2309   if (do_not_record_p)
2310     return NULL;
2311
2312   expr = expr_hash_table[hash];
2313
2314   while (expr && ! expr_equiv_p (expr->expr, pat))
2315     expr = expr->next_same_hash;
2316
2317   return expr;
2318 }
2319
2320 /* Lookup REGNO in the set table.
2321    If PAT is non-NULL look for the entry that matches it, otherwise return
2322    the first entry for REGNO.
2323    The result is a pointer to the table entry, or NULL if not found.  */
2324
2325 static struct expr *
2326 lookup_set (regno, pat)
2327      int regno;
2328      rtx pat;
2329 {
2330   unsigned int hash = hash_set (regno, set_hash_table_size);
2331   struct expr *expr;
2332
2333   expr = set_hash_table[hash];
2334
2335   if (pat)
2336     {
2337       while (expr && ! expr_equiv_p (expr->expr, pat))
2338         expr = expr->next_same_hash;
2339     }
2340   else
2341     {
2342       while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2343         expr = expr->next_same_hash;
2344     }
2345
2346   return expr;
2347 }
2348
2349 /* Return the next entry for REGNO in list EXPR.  */
2350
2351 static struct expr *
2352 next_set (regno, expr)
2353      int regno;
2354      struct expr *expr;
2355 {
2356   do
2357     expr = expr->next_same_hash;
2358   while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2359   return expr;
2360 }
2361
2362 /* Reset tables used to keep track of what's still available [since the
2363    start of the block].  */
2364
2365 static void
2366 reset_opr_set_tables ()
2367 {
2368   /* Maintain a bitmap of which regs have been set since beginning of
2369      the block.  */
2370   sbitmap_zero (reg_set_bitmap);
2371   /* Also keep a record of the last instruction to modify memory.
2372      For now this is very trivial, we only record whether any memory
2373      location has been modified.  */
2374   mem_last_set = 0;
2375 }
2376
2377 /* Return non-zero if the operands of X are not set before INSN in
2378    INSN's basic block.  */
2379
2380 static int
2381 oprs_not_set_p (x, insn)
2382      rtx x, insn;
2383 {
2384   int i;
2385   enum rtx_code code;
2386   const char *fmt;
2387
2388   /* repeat is used to turn tail-recursion into iteration.  */
2389 repeat:
2390
2391   if (x == 0)
2392     return 1;
2393
2394   code = GET_CODE (x);
2395   switch (code)
2396     {
2397     case PC:
2398     case CC0:
2399     case CONST:
2400     case CONST_INT:
2401     case CONST_DOUBLE:
2402     case SYMBOL_REF:
2403     case LABEL_REF:
2404     case ADDR_VEC:
2405     case ADDR_DIFF_VEC:
2406       return 1;
2407
2408     case MEM:
2409       if (mem_last_set != 0)
2410         return 0;
2411       x = XEXP (x, 0);
2412       goto repeat;
2413
2414     case REG:
2415       return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2416
2417     default:
2418       break;
2419     }
2420
2421   fmt = GET_RTX_FORMAT (code);
2422   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2423     {
2424       if (fmt[i] == 'e')
2425         {
2426           int not_set_p;
2427           /* If we are about to do the last recursive call
2428              needed at this level, change it into iteration.
2429              This function is called enough to be worth it.  */
2430           if (i == 0)
2431             {
2432               x = XEXP (x, 0);
2433               goto repeat;
2434             }
2435           not_set_p = oprs_not_set_p (XEXP (x, i), insn);
2436           if (! not_set_p)
2437             return 0;
2438         }
2439       else if (fmt[i] == 'E')
2440         {
2441           int j;
2442           for (j = 0; j < XVECLEN (x, i); j++)
2443             {
2444               int not_set_p = oprs_not_set_p (XVECEXP (x, i, j), insn);
2445               if (! not_set_p)
2446                 return 0;
2447             }
2448         }
2449     }
2450
2451   return 1;
2452 }
2453
2454 /* Mark things set by a CALL.  */
2455
2456 static void
2457 mark_call (insn)
2458      rtx insn;
2459 {
2460   mem_last_set = INSN_CUID (insn);
2461 }
2462
2463 /* Mark things set by a SET.  */
2464
2465 static void
2466 mark_set (pat, insn)
2467      rtx pat, insn;
2468 {
2469   rtx dest = SET_DEST (pat);
2470
2471   while (GET_CODE (dest) == SUBREG
2472          || GET_CODE (dest) == ZERO_EXTRACT
2473          || GET_CODE (dest) == SIGN_EXTRACT
2474          || GET_CODE (dest) == STRICT_LOW_PART)
2475     dest = XEXP (dest, 0);
2476
2477   if (GET_CODE (dest) == REG)
2478     SET_BIT (reg_set_bitmap, REGNO (dest));
2479   else if (GET_CODE (dest) == MEM)
2480     mem_last_set = INSN_CUID (insn);
2481
2482   if (GET_CODE (SET_SRC (pat)) == CALL)
2483     mark_call (insn);
2484 }
2485
2486 /* Record things set by a CLOBBER.  */
2487
2488 static void
2489 mark_clobber (pat, insn)
2490      rtx pat, insn;
2491 {
2492   rtx clob = XEXP (pat, 0);
2493
2494   while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2495     clob = XEXP (clob, 0);
2496
2497   if (GET_CODE (clob) == REG)
2498     SET_BIT (reg_set_bitmap, REGNO (clob));
2499   else
2500     mem_last_set = INSN_CUID (insn);
2501 }
2502
2503 /* Record things set by INSN.
2504    This data is used by oprs_not_set_p.  */
2505
2506 static void
2507 mark_oprs_set (insn)
2508      rtx insn;
2509 {
2510   rtx pat = PATTERN (insn);
2511
2512   if (GET_CODE (pat) == SET)
2513     mark_set (pat, insn);
2514   else if (GET_CODE (pat) == PARALLEL)
2515     {
2516       int i;
2517
2518       for (i = 0; i < XVECLEN (pat, 0); i++)
2519         {
2520           rtx x = XVECEXP (pat, 0, i);
2521
2522           if (GET_CODE (x) == SET)
2523             mark_set (x, insn);
2524           else if (GET_CODE (x) == CLOBBER)
2525             mark_clobber (x, insn);
2526           else if (GET_CODE (x) == CALL)
2527             mark_call (insn);
2528         }
2529     }
2530   else if (GET_CODE (pat) == CLOBBER)
2531     mark_clobber (pat, insn);
2532   else if (GET_CODE (pat) == CALL)
2533     mark_call (insn);
2534 }
2535
2536 \f
2537 /* Classic GCSE reaching definition support.  */
2538
2539 /* Allocate reaching def variables.  */
2540
2541 static void
2542 alloc_rd_mem (n_blocks, n_insns)
2543      int n_blocks, n_insns;
2544 {
2545   rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2546   sbitmap_vector_zero (rd_kill, n_basic_blocks);
2547
2548   rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2549   sbitmap_vector_zero (rd_gen, n_basic_blocks);
2550
2551   reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2552   sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2553
2554   rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2555   sbitmap_vector_zero (rd_out, n_basic_blocks);
2556 }
2557
2558 /* Free reaching def variables.  */
2559
2560 static void
2561 free_rd_mem ()
2562 {
2563   free (rd_kill);
2564   free (rd_gen);
2565   free (reaching_defs);
2566   free (rd_out);
2567 }
2568
2569 /* Add INSN to the kills of BB.
2570    REGNO, set in BB, is killed by INSN.  */
2571
2572 static void
2573 handle_rd_kill_set (insn, regno, bb)
2574      rtx insn;
2575      int regno, bb;
2576 {
2577   struct reg_set *this_reg = reg_set_table[regno];
2578
2579   while (this_reg)
2580     {
2581       if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2582         SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2583       this_reg = this_reg->next;
2584     }
2585 }
2586
2587 /* Compute the set of kill's for reaching definitions.  */
2588
2589 static void
2590 compute_kill_rd ()
2591 {
2592   int bb,cuid;
2593
2594   /* For each block
2595        For each set bit in `gen' of the block (i.e each insn which
2596            generates a definition in the block)
2597          Call the reg set by the insn corresponding to that bit regx
2598          Look at the linked list starting at reg_set_table[regx]
2599          For each setting of regx in the linked list, which is not in
2600              this block
2601            Set the bit in `kill' corresponding to that insn
2602     */
2603
2604   for (bb = 0; bb < n_basic_blocks; bb++)
2605     {
2606       for (cuid = 0; cuid < max_cuid; cuid++)
2607         {
2608           if (TEST_BIT (rd_gen[bb], cuid))
2609             {
2610               rtx insn = CUID_INSN (cuid);
2611               rtx pat = PATTERN (insn);
2612
2613               if (GET_CODE (insn) == CALL_INSN)
2614                 {
2615                   int regno;
2616
2617                   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2618                     {
2619                       if ((call_used_regs[regno]
2620                            && regno != STACK_POINTER_REGNUM
2621 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2622                            && regno != HARD_FRAME_POINTER_REGNUM
2623 #endif
2624 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2625                            && ! (regno == ARG_POINTER_REGNUM
2626                                  && fixed_regs[regno])
2627 #endif
2628 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2629                            && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2630 #endif
2631                            && regno != FRAME_POINTER_REGNUM)
2632                           || global_regs[regno])
2633                         handle_rd_kill_set (insn, regno, bb);
2634                     }
2635                 }
2636
2637               if (GET_CODE (pat) == PARALLEL)
2638                 {
2639                   int i;
2640
2641                   /* We work backwards because ... */
2642                   for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2643                     {
2644                       enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2645                       if ((code == SET || code == CLOBBER)
2646                           && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2647                         handle_rd_kill_set (insn,
2648                                             REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2649                                             bb);
2650                     }
2651                 }
2652               else if (GET_CODE (pat) == SET)
2653                 {
2654                   if (GET_CODE (SET_DEST (pat)) == REG)
2655                     {
2656                       /* Each setting of this register outside of this block
2657                          must be marked in the set of kills in this block.  */
2658                       handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2659                     }
2660                 }
2661               /* FIXME: CLOBBER? */
2662             }
2663         }
2664     }
2665 }
2666
2667 /* Compute the reaching definitions as in
2668    Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2669    Chapter 10.  It is the same algorithm as used for computing available
2670    expressions but applied to the gens and kills of reaching definitions.  */
2671
2672 static void
2673 compute_rd ()
2674 {
2675   int bb, changed, passes;
2676
2677   for (bb = 0; bb < n_basic_blocks; bb++)
2678     sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2679
2680   passes = 0;
2681   changed = 1;
2682   while (changed)
2683     {
2684       changed = 0;
2685       for (bb = 0; bb < n_basic_blocks; bb++)
2686         {
2687           sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2688           changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2689                                             reaching_defs[bb], rd_kill[bb]);
2690         }
2691       passes++;
2692     }
2693
2694   if (gcse_file)
2695     fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2696 }
2697 \f
2698 /* Classic GCSE available expression support.  */
2699
2700 /* Allocate memory for available expression computation.  */
2701
2702 static void
2703 alloc_avail_expr_mem (n_blocks, n_exprs)
2704      int n_blocks, n_exprs;
2705 {
2706   ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2707   sbitmap_vector_zero (ae_kill, n_basic_blocks);
2708
2709   ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2710   sbitmap_vector_zero (ae_gen, n_basic_blocks);
2711
2712   ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2713   sbitmap_vector_zero (ae_in, n_basic_blocks);
2714
2715   ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2716   sbitmap_vector_zero (ae_out, n_basic_blocks);
2717
2718   u_bitmap = (sbitmap) sbitmap_alloc (n_exprs);
2719   sbitmap_ones (u_bitmap);
2720 }
2721
2722 static void
2723 free_avail_expr_mem ()
2724 {
2725   free (ae_kill);
2726   free (ae_gen);
2727   free (ae_in);
2728   free (ae_out);
2729   free (u_bitmap);
2730 }
2731
2732 /* Compute the set of available expressions generated in each basic block.  */
2733
2734 static void
2735 compute_ae_gen ()
2736 {
2737   int i;
2738
2739   /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2740      This is all we have to do because an expression is not recorded if it
2741      is not available, and the only expressions we want to work with are the
2742      ones that are recorded.  */
2743
2744   for (i = 0; i < expr_hash_table_size; i++)
2745     {
2746       struct expr *expr = expr_hash_table[i];
2747       while (expr != NULL)
2748         {
2749           struct occr *occr = expr->avail_occr;
2750           while (occr != NULL)
2751             {
2752               SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2753               occr = occr->next;
2754             }
2755           expr = expr->next_same_hash;
2756         }
2757     }
2758 }
2759
2760 /* Return non-zero if expression X is killed in BB.  */
2761
2762 static int
2763 expr_killed_p (x, bb)
2764      rtx x;
2765      int bb;
2766 {
2767   int i;
2768   enum rtx_code code;
2769   const char *fmt;
2770
2771   /* repeat is used to turn tail-recursion into iteration.  */
2772  repeat:
2773
2774   if (x == 0)
2775     return 1;
2776
2777   code = GET_CODE (x);
2778   switch (code)
2779     {
2780     case REG:
2781       return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2782
2783     case MEM:
2784       if (mem_set_in_block[bb])
2785         return 1;
2786       x = XEXP (x, 0);
2787       goto repeat;
2788
2789     case PC:
2790     case CC0: /*FIXME*/
2791     case CONST:
2792     case CONST_INT:
2793     case CONST_DOUBLE:
2794     case SYMBOL_REF:
2795     case LABEL_REF:
2796     case ADDR_VEC:
2797     case ADDR_DIFF_VEC:
2798       return 0;
2799
2800     default:
2801       break;
2802     }
2803
2804   i = GET_RTX_LENGTH (code) - 1;
2805   fmt = GET_RTX_FORMAT (code);
2806   for (; i >= 0; i--)
2807     {
2808       if (fmt[i] == 'e')
2809         {
2810           rtx tem = XEXP (x, i);
2811
2812           /* If we are about to do the last recursive call
2813              needed at this level, change it into iteration.
2814              This function is called enough to be worth it.  */
2815           if (i == 0)
2816             {
2817               x = tem;
2818               goto repeat;
2819             }
2820           if (expr_killed_p (tem, bb))
2821             return 1;
2822         }
2823       else if (fmt[i] == 'E')
2824         {
2825           int j;
2826           for (j = 0; j < XVECLEN (x, i); j++)
2827             {
2828               if (expr_killed_p (XVECEXP (x, i, j), bb))
2829                 return 1;
2830             }
2831         }
2832     }
2833
2834   return 0;
2835 }
2836
2837 /* Compute the set of available expressions killed in each basic block.  */
2838
2839 static void
2840 compute_ae_kill ()
2841 {
2842   int bb,i;
2843
2844   for (bb = 0; bb < n_basic_blocks; bb++)
2845     {
2846       for (i = 0; i < expr_hash_table_size; i++)
2847         {
2848           struct expr *expr = expr_hash_table[i];
2849
2850           for ( ; expr != NULL; expr = expr->next_same_hash)
2851             {
2852               /* Skip EXPR if generated in this block.  */
2853               if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2854                 continue;
2855
2856               if (expr_killed_p (expr->expr, bb))
2857                 SET_BIT (ae_kill[bb], expr->bitmap_index);
2858             }
2859         }
2860     }
2861 }
2862
2863 /* Compute available expressions.
2864
2865    Implement the algorithm to find available expressions
2866    as given in the Aho Sethi Ullman book, pages 627-631.  */
2867
2868 static void
2869 compute_available ()
2870 {
2871   int bb, changed, passes;
2872
2873   sbitmap_zero (ae_in[0]);
2874
2875   sbitmap_copy (ae_out[0] /*dst*/, ae_gen[0] /*src*/);
2876
2877   for (bb = 1; bb < n_basic_blocks; bb++)
2878     sbitmap_difference (ae_out[bb], u_bitmap, ae_kill[bb]);
2879
2880   passes = 0;
2881   changed = 1;
2882   while (changed)
2883     {
2884       changed = 0;
2885       for (bb = 1; bb < n_basic_blocks; bb++)
2886         {
2887           sbitmap_intersection_of_preds (ae_in[bb], ae_out, bb);
2888           changed |= sbitmap_union_of_diff (ae_out[bb], ae_gen[bb],
2889                                             ae_in[bb], ae_kill[bb]);
2890         }
2891       passes++;
2892     }
2893
2894   if (gcse_file)
2895     fprintf (gcse_file, "avail expr computation: %d passes\n", passes);
2896 }
2897 \f
2898 /* Actually perform the Classic GCSE optimizations.  */
2899
2900 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2901
2902    CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2903    as a positive reach.  We want to do this when there are two computations
2904    of the expression in the block.
2905
2906    VISITED is a pointer to a working buffer for tracking which BB's have
2907    been visited.  It is NULL for the top-level call.
2908
2909    We treat reaching expressions that go through blocks containing the same
2910    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2911    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2912    2 as not reaching.  The intent is to improve the probability of finding
2913    only one reaching expression and to reduce register lifetimes by picking
2914    the closest such expression.  */
2915
2916 static int
2917 expr_reaches_here_p (occr, expr, bb, check_self_loop, visited)
2918      struct occr *occr;
2919      struct expr *expr;
2920      int bb;
2921      int check_self_loop;
2922      char *visited;
2923 {
2924   edge pred;
2925
2926   if (visited == NULL)
2927     {
2928       visited = (char *) alloca (n_basic_blocks);
2929       bzero (visited, n_basic_blocks);
2930     }
2931
2932   for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2933     {
2934       int pred_bb = pred->src->index;
2935
2936       if (visited[pred_bb])
2937         {
2938           /* This predecessor has already been visited.
2939              Nothing to do.  */
2940           ;
2941         }
2942       else if (pred_bb == bb)
2943         {
2944           /* BB loops on itself.  */
2945           if (check_self_loop
2946               && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2947               && BLOCK_NUM (occr->insn) == pred_bb)
2948             return 1;
2949           visited[pred_bb] = 1;
2950         }
2951       /* Ignore this predecessor if it kills the expression.  */
2952       else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2953         visited[pred_bb] = 1;
2954       /* Does this predecessor generate this expression?  */
2955       else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2956         {
2957           /* Is this the occurrence we're looking for?
2958              Note that there's only one generating occurrence per block
2959              so we just need to check the block number.  */
2960           if (BLOCK_NUM (occr->insn) == pred_bb)
2961             return 1;
2962           visited[pred_bb] = 1;
2963         }
2964       /* Neither gen nor kill.  */
2965       else
2966         {
2967           visited[pred_bb] = 1;
2968           if (expr_reaches_here_p (occr, expr, pred_bb, check_self_loop, visited))
2969             return 1;
2970         }
2971     }
2972
2973   /* All paths have been checked.  */
2974   return 0;
2975 }
2976
2977 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2978    If there is more than one such instruction, return NULL.
2979
2980    Called only by handle_avail_expr.  */
2981
2982 static rtx
2983 computing_insn (expr, insn)
2984      struct expr *expr;
2985      rtx insn;
2986 {
2987   int bb = BLOCK_NUM (insn);
2988
2989   if (expr->avail_occr->next == NULL)
2990     {
2991       if (BLOCK_NUM (expr->avail_occr->insn) == bb)
2992         {
2993           /* The available expression is actually itself
2994              (i.e. a loop in the flow graph) so do nothing.  */
2995           return NULL;
2996         }
2997       /* (FIXME) Case that we found a pattern that was created by
2998          a substitution that took place.  */
2999       return expr->avail_occr->insn;
3000     }
3001   else
3002     {
3003       /* Pattern is computed more than once.
3004          Search backwards from this insn to see how many of these
3005          computations actually reach this insn.  */
3006       struct occr *occr;
3007       rtx insn_computes_expr = NULL;
3008       int can_reach = 0;
3009
3010       for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
3011         {
3012           if (BLOCK_NUM (occr->insn) == bb)
3013             {
3014               /* The expression is generated in this block.
3015                  The only time we care about this is when the expression
3016                  is generated later in the block [and thus there's a loop].
3017                  We let the normal cse pass handle the other cases.  */
3018               if (INSN_CUID (insn) < INSN_CUID (occr->insn))
3019                 {
3020                   if (expr_reaches_here_p (occr, expr, bb, 1, NULL))
3021                     {
3022                       can_reach++;
3023                       if (can_reach > 1)
3024                         return NULL;
3025                       insn_computes_expr = occr->insn;
3026                     }
3027                 }
3028             }
3029           else /* Computation of the pattern outside this block.  */
3030             {
3031               if (expr_reaches_here_p (occr, expr, bb, 0, NULL))
3032                 {
3033                   can_reach++;
3034                   if (can_reach > 1)
3035                     return NULL;
3036                   insn_computes_expr = occr->insn;
3037                 }
3038             }
3039         }
3040
3041       if (insn_computes_expr == NULL)
3042         abort ();
3043       return insn_computes_expr;
3044     }
3045 }
3046
3047 /* Return non-zero if the definition in DEF_INSN can reach INSN.
3048    Only called by can_disregard_other_sets.  */
3049
3050 static int
3051 def_reaches_here_p (insn, def_insn)
3052      rtx insn, def_insn;
3053 {
3054   rtx reg;
3055
3056   if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3057     return 1;
3058
3059   if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3060     {
3061       if (INSN_CUID (def_insn) < INSN_CUID (insn))
3062         {
3063           if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3064             return 1;
3065           if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
3066             reg = XEXP (PATTERN (def_insn), 0);
3067           else if (GET_CODE (PATTERN (def_insn)) == SET)
3068             reg = SET_DEST (PATTERN (def_insn));
3069           else
3070             abort ();
3071           return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3072         }
3073       else
3074         return 0;
3075     }
3076
3077   return 0;
3078 }
3079
3080 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.
3081    The value returned is the number of definitions that reach INSN.
3082    Returning a value of zero means that [maybe] more than one definition
3083    reaches INSN and the caller can't perform whatever optimization it is
3084    trying.  i.e. it is always safe to return zero.  */
3085
3086 static int
3087 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3088      struct reg_set **addr_this_reg;
3089      rtx insn;
3090      int for_combine;
3091 {
3092   int number_of_reaching_defs = 0;
3093   struct reg_set *this_reg = *addr_this_reg;
3094
3095   while (this_reg)
3096     {
3097       if (def_reaches_here_p (insn, this_reg->insn))
3098         {
3099           number_of_reaching_defs++;
3100           /* Ignore parallels for now.  */
3101           if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3102             return 0;
3103           if (!for_combine
3104               && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3105                   || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3106                                     SET_SRC (PATTERN (insn)))))
3107             {
3108               /* A setting of the reg to a different value reaches INSN.  */
3109               return 0;
3110             }
3111           if (number_of_reaching_defs > 1)
3112             {
3113               /* If in this setting the value the register is being
3114                  set to is equal to the previous value the register
3115                  was set to and this setting reaches the insn we are
3116                  trying to do the substitution on then we are ok.  */
3117
3118               if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3119                 return 0;
3120               if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3121                                  SET_SRC (PATTERN (insn))))
3122                 return 0;
3123             }
3124           *addr_this_reg = this_reg;
3125         }
3126
3127       /* prev_this_reg = this_reg; */
3128       this_reg = this_reg->next;
3129     }
3130
3131   return number_of_reaching_defs;
3132 }
3133
3134 /* Expression computed by insn is available and the substitution is legal,
3135    so try to perform the substitution.
3136
3137    The result is non-zero if any changes were made.  */
3138
3139 static int
3140 handle_avail_expr (insn, expr)
3141      rtx insn;
3142      struct expr *expr;
3143 {
3144   rtx pat, insn_computes_expr;
3145   rtx to;
3146   struct reg_set *this_reg;
3147   int found_setting, use_src;
3148   int changed = 0;
3149
3150   /* We only handle the case where one computation of the expression
3151      reaches this instruction.  */
3152   insn_computes_expr = computing_insn (expr, insn);
3153   if (insn_computes_expr == NULL)
3154     return 0;
3155
3156   found_setting = 0;
3157   use_src = 0;
3158
3159   /* At this point we know only one computation of EXPR outside of this
3160      block reaches this insn.  Now try to find a register that the
3161      expression is computed into.  */
3162
3163   if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3164     {
3165       /* This is the case when the available expression that reaches
3166          here has already been handled as an available expression.  */
3167       int regnum_for_replacing = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3168       /* If the register was created by GCSE we can't use `reg_set_table',
3169          however we know it's set only once.  */
3170       if (regnum_for_replacing >= max_gcse_regno
3171           /* If the register the expression is computed into is set only once,
3172              or only one set reaches this insn, we can use it.  */
3173           || (((this_reg = reg_set_table[regnum_for_replacing]),
3174                this_reg->next == NULL)
3175               || can_disregard_other_sets (&this_reg, insn, 0)))
3176        {
3177          use_src = 1;
3178          found_setting = 1;
3179        }
3180     }
3181
3182   if (!found_setting)
3183     {
3184       int regnum_for_replacing = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3185       /* This shouldn't happen.  */
3186       if (regnum_for_replacing >= max_gcse_regno)
3187         abort ();
3188       this_reg = reg_set_table[regnum_for_replacing];
3189       /* If the register the expression is computed into is set only once,
3190          or only one set reaches this insn, use it.  */
3191       if (this_reg->next == NULL
3192           || can_disregard_other_sets (&this_reg, insn, 0))
3193         found_setting = 1;
3194     }
3195
3196   if (found_setting)
3197     {
3198       pat = PATTERN (insn);
3199       if (use_src)
3200         to = SET_SRC (PATTERN (insn_computes_expr));
3201       else
3202         to = SET_DEST (PATTERN (insn_computes_expr));
3203       changed = validate_change (insn, &SET_SRC (pat), to, 0);
3204
3205       /* We should be able to ignore the return code from validate_change but
3206          to play it safe we check.  */
3207       if (changed)
3208         {
3209           gcse_subst_count++;
3210           if (gcse_file != NULL)
3211             {
3212               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d %s insn %d\n",
3213                        INSN_UID (insn), REGNO (to),
3214                        use_src ? "from" : "set in",
3215                        INSN_UID (insn_computes_expr));
3216             }
3217
3218         }
3219     }
3220   /* The register that the expr is computed into is set more than once.  */
3221   else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3222     {
3223       /* Insert an insn after insnx that copies the reg set in insnx
3224          into a new pseudo register call this new register REGN.
3225          From insnb until end of basic block or until REGB is set
3226          replace all uses of REGB with REGN.  */
3227       rtx new_insn;
3228
3229       to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3230
3231       /* Generate the new insn.  */
3232       /* ??? If the change fails, we return 0, even though we created
3233          an insn.  I think this is ok.  */
3234       new_insn
3235         = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3236                                         SET_DEST (PATTERN (insn_computes_expr))),
3237                                   insn_computes_expr);
3238       /* Keep block number table up to date.  */
3239       set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3240       /* Keep register set table up to date.  */
3241       record_one_set (REGNO (to), new_insn);
3242
3243       gcse_create_count++;
3244       if (gcse_file != NULL)
3245         {
3246           fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d, computed in insn %d,\n",
3247                    INSN_UID (NEXT_INSN (insn_computes_expr)),
3248                    REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))),
3249                    INSN_UID (insn_computes_expr));
3250           fprintf (gcse_file, "      into newly allocated reg %d\n", REGNO (to));
3251         }
3252
3253       pat = PATTERN (insn);
3254
3255       /* Do register replacement for INSN.  */
3256       changed = validate_change (insn, &SET_SRC (pat),
3257                                  SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr))),
3258                                  0);
3259
3260       /* We should be able to ignore the return code from validate_change but
3261          to play it safe we check.  */
3262       if (changed)
3263         {
3264           gcse_subst_count++;
3265           if (gcse_file != NULL)
3266             {
3267               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d set in insn %d\n",
3268                        INSN_UID (insn),
3269                        REGNO (SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr)))),
3270                        INSN_UID (insn_computes_expr));
3271             }
3272
3273         }
3274     }
3275
3276   return changed;
3277 }
3278
3279 /* Perform classic GCSE.
3280    This is called by one_classic_gcse_pass after all the dataflow analysis
3281    has been done.
3282
3283    The result is non-zero if a change was made.  */
3284
3285 static int
3286 classic_gcse ()
3287 {
3288   int bb, changed;
3289   rtx insn;
3290
3291   /* Note we start at block 1.  */
3292
3293   changed = 0;
3294   for (bb = 1; bb < n_basic_blocks; bb++)
3295     {
3296       /* Reset tables used to keep track of what's still valid [since the
3297          start of the block].  */
3298       reset_opr_set_tables ();
3299
3300       for (insn = BLOCK_HEAD (bb);
3301            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3302            insn = NEXT_INSN (insn))
3303         {
3304           /* Is insn of form (set (pseudo-reg) ...)?  */
3305
3306           if (GET_CODE (insn) == INSN
3307               && GET_CODE (PATTERN (insn)) == SET
3308               && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3309               && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3310             {
3311               rtx pat = PATTERN (insn);
3312               rtx src = SET_SRC (pat);
3313               struct expr *expr;
3314
3315               if (want_to_gcse_p (src)
3316                   /* Is the expression recorded?  */
3317                   && ((expr = lookup_expr (src)) != NULL)
3318                   /* Is the expression available [at the start of the
3319                      block]?  */
3320                   && TEST_BIT (ae_in[bb], expr->bitmap_index)
3321                   /* Are the operands unchanged since the start of the
3322                      block?  */
3323                   && oprs_not_set_p (src, insn))
3324                 changed |= handle_avail_expr (insn, expr);
3325             }
3326
3327           /* Keep track of everything modified by this insn.  */
3328           /* ??? Need to be careful w.r.t. mods done to INSN.  */
3329           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3330             mark_oprs_set (insn);
3331         }
3332     }
3333
3334   return changed;
3335 }
3336
3337 /* Top level routine to perform one classic GCSE pass.
3338
3339    Return non-zero if a change was made.  */
3340
3341 static int
3342 one_classic_gcse_pass (pass)
3343      int pass;
3344 {
3345   int changed = 0;
3346
3347   gcse_subst_count = 0;
3348   gcse_create_count = 0;
3349
3350   alloc_expr_hash_table (max_cuid);
3351   alloc_rd_mem (n_basic_blocks, max_cuid);
3352   compute_expr_hash_table ();
3353   if (gcse_file)
3354     dump_hash_table (gcse_file, "Expression", expr_hash_table,
3355                      expr_hash_table_size, n_exprs);
3356   if (n_exprs > 0)
3357     {
3358       compute_kill_rd ();
3359       compute_rd ();
3360       alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3361       compute_ae_gen ();
3362       compute_ae_kill ();
3363       compute_available ();
3364       changed = classic_gcse ();
3365       free_avail_expr_mem ();
3366     }
3367   free_rd_mem ();
3368   free_expr_hash_table ();
3369
3370   if (gcse_file)
3371     {
3372       fprintf (gcse_file, "\n");
3373       fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
3374                current_function_name, pass,
3375                bytes_used, gcse_subst_count, gcse_create_count);
3376     }
3377
3378   return changed;
3379 }
3380 \f
3381 /* Compute copy/constant propagation working variables.  */
3382
3383 /* Local properties of assignments.  */
3384
3385 static sbitmap *cprop_pavloc;
3386 static sbitmap *cprop_absaltered;
3387
3388 /* Global properties of assignments (computed from the local properties).  */
3389
3390 static sbitmap *cprop_avin;
3391 static sbitmap *cprop_avout;
3392
3393 /* Allocate vars used for copy/const propagation.
3394    N_BLOCKS is the number of basic blocks.
3395    N_SETS is the number of sets.  */
3396
3397 static void
3398 alloc_cprop_mem (n_blocks, n_sets)
3399      int n_blocks, n_sets;
3400 {
3401   cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3402   cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3403
3404   cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3405   cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3406 }
3407
3408 /* Free vars used by copy/const propagation.  */
3409
3410 static void
3411 free_cprop_mem ()
3412 {
3413   free (cprop_pavloc);
3414   free (cprop_absaltered);
3415   free (cprop_avin);
3416   free (cprop_avout);
3417 }
3418
3419 /* For each block, compute whether X is transparent.
3420    X is either an expression or an assignment [though we don't care which,
3421    for this context an assignment is treated as an expression].
3422    For each block where an element of X is modified, set (SET_P == 1) or reset
3423    (SET_P == 0) the INDX bit in BMAP.  */
3424
3425 static void
3426 compute_transp (x, indx, bmap, set_p)
3427      rtx x;
3428      int indx;
3429      sbitmap *bmap;
3430      int set_p;
3431 {
3432   int bb,i;
3433   enum rtx_code code;
3434   const char *fmt;
3435
3436   /* repeat is used to turn tail-recursion into iteration.  */
3437  repeat:
3438
3439   if (x == 0)
3440     return;
3441
3442   code = GET_CODE (x);
3443   switch (code)
3444     {
3445     case REG:
3446       {
3447         reg_set *r;
3448         int regno = REGNO (x);
3449
3450         if (set_p)
3451           {
3452             if (regno < FIRST_PSEUDO_REGISTER)
3453               {
3454                 for (bb = 0; bb < n_basic_blocks; bb++)
3455                   if (TEST_BIT (reg_set_in_block[bb], regno))
3456                     SET_BIT (bmap[bb], indx);
3457               }
3458             else
3459               {
3460                 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3461                   {
3462                     bb = BLOCK_NUM (r->insn);
3463                     SET_BIT (bmap[bb], indx);
3464                   }
3465               }
3466           }
3467         else
3468           {
3469             if (regno < FIRST_PSEUDO_REGISTER)
3470               {
3471                 for (bb = 0; bb < n_basic_blocks; bb++)
3472                   if (TEST_BIT (reg_set_in_block[bb], regno))
3473                     RESET_BIT (bmap[bb], indx);
3474               }
3475             else
3476               {
3477                 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3478                   {
3479                     bb = BLOCK_NUM (r->insn);
3480                     RESET_BIT (bmap[bb], indx);
3481                   }
3482               }
3483           }
3484         return;
3485       }
3486
3487     case MEM:
3488       if (set_p)
3489         {
3490           for (bb = 0; bb < n_basic_blocks; bb++)
3491             if (mem_set_in_block[bb])
3492               SET_BIT (bmap[bb], indx);
3493         }
3494       else
3495         {
3496           for (bb = 0; bb < n_basic_blocks; bb++)
3497             if (mem_set_in_block[bb])
3498               RESET_BIT (bmap[bb], indx);
3499         }
3500       x = XEXP (x, 0);
3501       goto repeat;
3502
3503     case PC:
3504     case CC0: /*FIXME*/
3505     case CONST:
3506     case CONST_INT:
3507     case CONST_DOUBLE:
3508     case SYMBOL_REF:
3509     case LABEL_REF:
3510     case ADDR_VEC:
3511     case ADDR_DIFF_VEC:
3512       return;
3513
3514     default:
3515       break;
3516     }
3517
3518   i = GET_RTX_LENGTH (code) - 1;
3519   fmt = GET_RTX_FORMAT (code);
3520   for (; i >= 0; i--)
3521     {
3522       if (fmt[i] == 'e')
3523         {
3524           rtx tem = XEXP (x, i);
3525
3526           /* If we are about to do the last recursive call
3527              needed at this level, change it into iteration.
3528              This function is called enough to be worth it.  */
3529           if (i == 0)
3530             {
3531               x = tem;
3532               goto repeat;
3533             }
3534           compute_transp (tem, indx, bmap, set_p);
3535         }
3536       else if (fmt[i] == 'E')
3537         {
3538           int j;
3539           for (j = 0; j < XVECLEN (x, i); j++)
3540             compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3541         }
3542     }
3543 }
3544
3545 /* Compute the available expressions at the start and end of each
3546    basic block for cprop.  This particular dataflow equation is
3547    used often enough that we might want to generalize it and make
3548    as a subroutine for other global optimizations that need available
3549    in/out information.  */
3550 static void
3551 compute_cprop_avinout ()
3552 {
3553   int bb, changed, passes;
3554
3555   sbitmap_zero (cprop_avin[0]);
3556   sbitmap_vector_ones (cprop_avout, n_basic_blocks);
3557
3558   passes = 0;
3559   changed = 1;
3560   while (changed)
3561     {
3562       changed = 0;
3563       for (bb = 0; bb < n_basic_blocks; bb++)
3564         {
3565           if (bb != 0)
3566             sbitmap_intersection_of_preds (cprop_avin[bb], cprop_avout, bb);
3567           changed |= sbitmap_union_of_diff (cprop_avout[bb],
3568                                             cprop_pavloc[bb],
3569                                             cprop_avin[bb],
3570                                             cprop_absaltered[bb]);
3571         }
3572       passes++;
3573     }
3574
3575   if (gcse_file)
3576     fprintf (gcse_file, "cprop avail expr computation: %d passes\n", passes);
3577 }
3578
3579 /* Top level routine to do the dataflow analysis needed by copy/const
3580    propagation.  */
3581
3582 static void
3583 compute_cprop_data ()
3584 {
3585   compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3586   compute_cprop_avinout ();
3587 }
3588 \f
3589 /* Copy/constant propagation.  */
3590
3591 /* Maximum number of register uses in an insn that we handle.  */
3592 #define MAX_USES 8
3593
3594 /* Table of uses found in an insn.
3595    Allocated statically to avoid alloc/free complexity and overhead.  */
3596 static struct reg_use reg_use_table[MAX_USES];
3597
3598 /* Index into `reg_use_table' while building it.  */
3599 static int reg_use_count;
3600
3601 /* Set up a list of register numbers used in INSN.
3602    The found uses are stored in `reg_use_table'.
3603    `reg_use_count' is initialized to zero before entry, and
3604    contains the number of uses in the table upon exit.
3605
3606    ??? If a register appears multiple times we will record it multiple
3607    times.  This doesn't hurt anything but it will slow things down.  */
3608
3609 static void
3610 find_used_regs (x)
3611      rtx x;
3612 {
3613   int i;
3614   enum rtx_code code;
3615   const char *fmt;
3616
3617   /* repeat is used to turn tail-recursion into iteration.  */
3618  repeat:
3619
3620   if (x == 0)
3621     return;
3622
3623   code = GET_CODE (x);
3624   switch (code)
3625     {
3626     case REG:
3627       if (reg_use_count == MAX_USES)
3628         return;
3629       reg_use_table[reg_use_count].reg_rtx = x;
3630       reg_use_count++;
3631       return;
3632
3633     case MEM:
3634       x = XEXP (x, 0);
3635       goto repeat;
3636
3637     case PC:
3638     case CC0:
3639     case CONST:
3640     case CONST_INT:
3641     case CONST_DOUBLE:
3642     case SYMBOL_REF:
3643     case LABEL_REF:
3644     case CLOBBER:
3645     case ADDR_VEC:
3646     case ADDR_DIFF_VEC:
3647     case ASM_INPUT: /*FIXME*/
3648       return;
3649
3650     case SET:
3651       if (GET_CODE (SET_DEST (x)) == MEM)
3652         find_used_regs (SET_DEST (x));
3653       x = SET_SRC (x);
3654       goto repeat;
3655
3656     default:
3657       break;
3658     }
3659
3660   /* Recursively scan the operands of this expression.  */
3661
3662   fmt = GET_RTX_FORMAT (code);
3663   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3664     {
3665       if (fmt[i] == 'e')
3666         {
3667           /* If we are about to do the last recursive call
3668              needed at this level, change it into iteration.
3669              This function is called enough to be worth it.  */
3670           if (i == 0)
3671             {
3672               x = XEXP (x, 0);
3673               goto repeat;
3674             }
3675           find_used_regs (XEXP (x, i));
3676         }
3677       else if (fmt[i] == 'E')
3678         {
3679           int j;
3680           for (j = 0; j < XVECLEN (x, i); j++)
3681             find_used_regs (XVECEXP (x, i, j));
3682         }
3683     }
3684 }
3685
3686 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3687    Returns non-zero is successful.  */
3688
3689 static int
3690 try_replace_reg (from, to, insn)
3691      rtx from, to, insn;
3692 {
3693   /* If this fails we could try to simplify the result of the
3694      replacement and attempt to recognize the simplified insn.
3695
3696      But we need a general simplify_rtx that doesn't have pass
3697      specific state variables.  I'm not aware of one at the moment.  */
3698   return validate_replace_src (from, to, insn);
3699 }
3700
3701 /* Find a set of REGNO that is available on entry to INSN's block.
3702    Returns NULL if not found.  */
3703
3704 static struct expr *
3705 find_avail_set (regno, insn)
3706      int regno;
3707      rtx insn;
3708 {
3709   /* SET1 contains the last set found that can be returned to the caller for
3710      use in a substitution.  */
3711   struct expr *set1 = 0;
3712
3713   /* Loops are not possible here.  To get a loop we would need two sets
3714      available at the start of the block containing INSN.  ie we would
3715      need two sets like this available at the start of the block:
3716
3717        (set (reg X) (reg Y))
3718        (set (reg Y) (reg X))
3719
3720      This can not happen since the set of (reg Y) would have killed the
3721      set of (reg X) making it unavailable at the start of this block.  */
3722   while (1)
3723      {
3724       rtx src;
3725       struct expr *set = lookup_set (regno, NULL_RTX);
3726
3727       /* Find a set that is available at the start of the block
3728          which contains INSN.  */
3729       while (set)
3730         {
3731           if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3732             break;
3733           set = next_set (regno, set);
3734         }
3735
3736       /* If no available set was found we've reached the end of the
3737          (possibly empty) copy chain.  */
3738       if (set == 0)
3739         break;
3740
3741       if (GET_CODE (set->expr) != SET)
3742         abort ();
3743
3744       src = SET_SRC (set->expr);
3745
3746       /* We know the set is available.
3747          Now check that SRC is ANTLOC (i.e. none of the source operands
3748          have changed since the start of the block).
3749
3750          If the source operand changed, we may still use it for the next
3751          iteration of this loop, but we may not use it for substitutions.  */
3752       if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3753         set1 = set;
3754
3755       /* If the source of the set is anything except a register, then
3756          we have reached the end of the copy chain.  */
3757       if (GET_CODE (src) != REG)
3758         break;
3759
3760       /* Follow the copy chain, ie start another iteration of the loop
3761          and see if we have an available copy into SRC.  */
3762       regno = REGNO (src);
3763      }
3764
3765   /* SET1 holds the last set that was available and anticipatable at
3766      INSN.  */
3767   return set1;
3768 }
3769
3770 /* Subroutine of cprop_insn that tries to propagate constants into
3771    JUMP_INSNS.  INSN must be a conditional jump; COPY is a copy of it
3772    that we can use for substitutions.
3773    REG_USED is the use we will try to replace, SRC is the constant we
3774    will try to substitute for it.
3775    Returns nonzero if a change was made.  */
3776 static int
3777 cprop_jump (insn, copy, reg_used, src)
3778      rtx insn, copy;
3779      struct reg_use *reg_used;
3780      rtx src;
3781 {
3782   rtx set = PATTERN (copy);
3783   rtx temp;
3784
3785   /* Replace the register with the appropriate constant.  */
3786   replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3787
3788   temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3789                                      GET_MODE (SET_SRC (set)),
3790                                      GET_MODE (XEXP (SET_SRC (set), 0)),
3791                                      XEXP (SET_SRC (set), 0),
3792                                      XEXP (SET_SRC (set), 1),
3793                                      XEXP (SET_SRC (set), 2));
3794
3795   /* If no simplification can be made, then try the next
3796      register.  */
3797   if (temp == 0)
3798     return 0;
3799
3800   SET_SRC (set) = temp;
3801
3802   /* That may have changed the structure of TEMP, so
3803      force it to be rerecognized if it has not turned
3804      into a nop or unconditional jump.  */
3805
3806   INSN_CODE (copy) = -1;
3807   if ((SET_DEST (set) == pc_rtx
3808        && (SET_SRC (set) == pc_rtx
3809            || GET_CODE (SET_SRC (set)) == LABEL_REF))
3810       || recog (PATTERN (copy), copy, NULL) >= 0)
3811     {
3812       /* This has either become an unconditional jump
3813          or a nop-jump.  We'd like to delete nop jumps
3814          here, but doing so confuses gcse.  So we just
3815          make the replacement and let later passes
3816          sort things out.  */
3817       PATTERN (insn) = set;
3818       INSN_CODE (insn) = -1;
3819
3820       /* One less use of the label this insn used to jump to
3821          if we turned this into a NOP jump.  */
3822       if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3823         --LABEL_NUSES (JUMP_LABEL (insn));
3824
3825       /* If this has turned into an unconditional jump,
3826          then put a barrier after it so that the unreachable
3827          code will be deleted.  */
3828       if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3829         emit_barrier_after (insn);
3830
3831       run_jump_opt_after_gcse = 1;
3832
3833       const_prop_count++;
3834       if (gcse_file != NULL)
3835         {
3836           int regno = REGNO (reg_used->reg_rtx);
3837           fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
3838                    regno, INSN_UID (insn));
3839           print_rtl (gcse_file, src);
3840           fprintf (gcse_file, "\n");
3841         }
3842       return 1;
3843     }
3844   return 0;
3845 }
3846
3847 #ifdef HAVE_cc0
3848 /* Subroutine of cprop_insn that tries to propagate constants into
3849    JUMP_INSNS for machines that have CC0.  INSN is a single set that
3850    stores into CC0; the insn following it is a conditional jump.
3851    REG_USED is the use we will try to replace, SRC is the constant we
3852    will try to substitute for it.
3853    Returns nonzero if a change was made.  */
3854 static int
3855 cprop_cc0_jump (insn, reg_used, src)
3856      rtx insn;
3857      struct reg_use *reg_used;
3858      rtx src;
3859 {
3860   rtx jump = NEXT_INSN (insn);
3861   rtx copy = copy_rtx (jump);
3862   rtx set = PATTERN (copy);
3863
3864   /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3865      substitute into it.  */
3866   replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3867   if (! cprop_jump (jump, copy, reg_used, src))
3868     return 0;
3869
3870   /* If we succeeded, delete the cc0 setter.  */
3871   PUT_CODE (insn, NOTE);
3872   NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3873   NOTE_SOURCE_FILE (insn) = 0;
3874   return 1;
3875  }
3876 #endif
3877
3878 /* Perform constant and copy propagation on INSN.
3879    The result is non-zero if a change was made.  */
3880
3881 static int
3882 cprop_insn (insn, alter_jumps)
3883      rtx insn;
3884      int alter_jumps;
3885 {
3886   struct reg_use *reg_used;
3887   int changed = 0;
3888
3889   /* Only propagate into SETs.  Note that a conditional jump is a
3890      SET with pc_rtx as the destination.  */
3891   if ((GET_CODE (insn) != INSN
3892        && GET_CODE (insn) != JUMP_INSN)
3893       || GET_CODE (PATTERN (insn)) != SET)
3894     return 0;
3895
3896   reg_use_count = 0;
3897   find_used_regs (PATTERN (insn));
3898
3899   reg_used = &reg_use_table[0];
3900   for ( ; reg_use_count > 0; reg_used++, reg_use_count--)
3901     {
3902       rtx pat, src;
3903       struct expr *set;
3904       int regno = REGNO (reg_used->reg_rtx);
3905
3906       /* Ignore registers created by GCSE.
3907          We do this because ... */
3908       if (regno >= max_gcse_regno)
3909         continue;
3910
3911       /* If the register has already been set in this block, there's
3912          nothing we can do.  */
3913       if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3914         continue;
3915
3916       /* Find an assignment that sets reg_used and is available
3917          at the start of the block.  */
3918       set = find_avail_set (regno, insn);
3919       if (! set)
3920         continue;
3921
3922       pat = set->expr;
3923       /* ??? We might be able to handle PARALLELs.  Later.  */
3924       if (GET_CODE (pat) != SET)
3925         abort ();
3926       src = SET_SRC (pat);
3927
3928       /* Constant propagation.  */
3929       if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3930           || GET_CODE (src) == SYMBOL_REF)
3931         {
3932           /* Handle normal insns first.  */
3933           if (GET_CODE (insn) == INSN
3934               && try_replace_reg (reg_used->reg_rtx, src, insn))
3935             {
3936               changed = 1;
3937               const_prop_count++;
3938               if (gcse_file != NULL)
3939                 {
3940                   fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
3941                            regno, INSN_UID (insn));
3942                   print_rtl (gcse_file, src);
3943                   fprintf (gcse_file, "\n");
3944                 }
3945
3946               /* The original insn setting reg_used may or may not now be
3947                  deletable.  We leave the deletion to flow.  */
3948             }
3949
3950           /* Try to propagate a CONST_INT into a conditional jump.
3951              We're pretty specific about what we will handle in this
3952              code, we can extend this as necessary over time.
3953
3954              Right now the insn in question must look like
3955              (set (pc) (if_then_else ...))  */
3956           else if (alter_jumps
3957                    && GET_CODE (insn) == JUMP_INSN
3958                    && condjump_p (insn)
3959                    && ! simplejump_p (insn))
3960             changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
3961 #ifdef HAVE_cc0
3962           /* Similar code for machines that use a pair of CC0 setter and
3963              conditional jump insn.  */
3964           else if (alter_jumps
3965                    && GET_CODE (PATTERN (insn)) == SET
3966                    && SET_DEST (PATTERN (insn)) == cc0_rtx
3967                    && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
3968                    && condjump_p (NEXT_INSN (insn))
3969                    && ! simplejump_p (NEXT_INSN (insn)))
3970             changed |= cprop_cc0_jump (insn, reg_used, src);
3971 #endif
3972         }
3973       else if (GET_CODE (src) == REG
3974                && REGNO (src) >= FIRST_PSEUDO_REGISTER
3975                && REGNO (src) != regno)
3976         {
3977           if (try_replace_reg (reg_used->reg_rtx, src, insn))
3978             {
3979               changed = 1;
3980               copy_prop_count++;
3981               if (gcse_file != NULL)
3982                 {
3983                   fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d with reg %d\n",
3984                            regno, INSN_UID (insn), REGNO (src));
3985                 }
3986
3987               /* The original insn setting reg_used may or may not now be
3988                  deletable.  We leave the deletion to flow.  */
3989               /* FIXME: If it turns out that the insn isn't deletable,
3990                  then we may have unnecessarily extended register lifetimes
3991                  and made things worse.  */
3992             }
3993         }
3994     }
3995
3996   return changed;
3997 }
3998
3999 /* Forward propagate copies.
4000    This includes copies and constants.
4001    Return non-zero if a change was made.  */
4002
4003 static int
4004 cprop (alter_jumps)
4005      int alter_jumps;
4006 {
4007   int bb, changed;
4008   rtx insn;
4009
4010   /* Note we start at block 1.  */
4011
4012   changed = 0;
4013   for (bb = 1; bb < n_basic_blocks; bb++)
4014     {
4015       /* Reset tables used to keep track of what's still valid [since the
4016          start of the block].  */
4017       reset_opr_set_tables ();
4018
4019       for (insn = BLOCK_HEAD (bb);
4020            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
4021            insn = NEXT_INSN (insn))
4022         {
4023           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4024             {
4025               changed |= cprop_insn (insn, alter_jumps);
4026
4027               /* Keep track of everything modified by this insn.  */
4028               /* ??? Need to be careful w.r.t. mods done to INSN.  Don't
4029                  call mark_oprs_set if we turned the insn into a NOTE.  */
4030               if (GET_CODE (insn) != NOTE)
4031                 mark_oprs_set (insn);
4032             }
4033         }
4034     }
4035
4036   if (gcse_file != NULL)
4037     fprintf (gcse_file, "\n");
4038
4039   return changed;
4040 }
4041
4042 /* Perform one copy/constant propagation pass.
4043    F is the first insn in the function.
4044    PASS is the pass count.  */
4045
4046 static int
4047 one_cprop_pass (pass, alter_jumps)
4048      int pass;
4049      int alter_jumps;
4050 {
4051   int changed = 0;
4052
4053   const_prop_count = 0;
4054   copy_prop_count = 0;
4055
4056   alloc_set_hash_table (max_cuid);
4057   compute_set_hash_table ();
4058   if (gcse_file)
4059     dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
4060                      n_sets);
4061   if (n_sets > 0)
4062     {
4063       alloc_cprop_mem (n_basic_blocks, n_sets);
4064       compute_cprop_data ();
4065       changed = cprop (alter_jumps);
4066       free_cprop_mem ();
4067     }
4068   free_set_hash_table ();
4069
4070   if (gcse_file)
4071     {
4072       fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, %d const props, %d copy props\n",
4073                current_function_name, pass,
4074                bytes_used, const_prop_count, copy_prop_count);
4075       fprintf (gcse_file, "\n");
4076     }
4077
4078   return changed;
4079 }
4080 \f
4081 /* Compute PRE+LCM working variables.  */
4082
4083 /* Local properties of expressions.  */
4084 /* Nonzero for expressions that are transparent in the block.  */
4085 static sbitmap *transp;
4086
4087 /* Nonzero for expressions that are transparent at the end of the block.
4088    This is only zero for expressions killed by abnormal critical edge
4089    created by a calls.  */
4090 static sbitmap *transpout;
4091
4092 /* Nonzero for expressions that are computed (available) in the block.  */
4093 static sbitmap *comp;
4094
4095 /* Nonzero for expressions that are locally anticipatable in the block.  */
4096 static sbitmap *antloc;
4097
4098 /* Nonzero for expressions where this block is an optimal computation
4099    point.  */
4100 static sbitmap *pre_optimal;
4101
4102 /* Nonzero for expressions which are redundant in a particular block.  */
4103 static sbitmap *pre_redundant;
4104
4105 static sbitmap *temp_bitmap;
4106
4107 /* Redundant insns.  */
4108 static sbitmap pre_redundant_insns;
4109
4110 /* Allocate vars used for PRE analysis.  */
4111
4112 static void
4113 alloc_pre_mem (n_blocks, n_exprs)
4114      int n_blocks, n_exprs;
4115 {
4116   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4117   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4118   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4119
4120   temp_bitmap = sbitmap_vector_alloc (n_blocks, n_exprs);
4121   pre_optimal = sbitmap_vector_alloc (n_blocks, n_exprs);
4122   pre_redundant = sbitmap_vector_alloc (n_blocks, n_exprs);
4123   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
4124 }
4125
4126 /* Free vars used for PRE analysis.  */
4127
4128 static void
4129 free_pre_mem ()
4130 {
4131   free (transp);
4132   free (comp);
4133   free (antloc);
4134
4135   free (temp_bitmap);
4136   free (pre_optimal);
4137   free (pre_redundant);
4138   free (transpout);
4139 }
4140
4141 /* Top level routine to do the dataflow analysis needed by PRE.  */
4142
4143 static void
4144 compute_pre_data ()
4145 {
4146   compute_local_properties (transp, comp, antloc, 0);
4147   compute_transpout ();
4148   pre_lcm (n_basic_blocks, n_exprs, s_preds, s_succs, transp,
4149            antloc, pre_redundant, pre_optimal);
4150 }
4151
4152 \f
4153 /* PRE utilities */
4154
4155 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4156    block BB.
4157
4158    VISITED is a pointer to a working buffer for tracking which BB's have
4159    been visited.  It is NULL for the top-level call.
4160
4161    CHECK_PRE_COMP controls whether or not we check for a computation of
4162    EXPR in OCCR_BB.
4163
4164    We treat reaching expressions that go through blocks containing the same
4165    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
4166    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4167    2 as not reaching.  The intent is to improve the probability of finding
4168    only one reaching expression and to reduce register lifetimes by picking
4169    the closest such expression.  */
4170
4171 static int
4172 pre_expr_reaches_here_p (occr_bb, expr, bb, check_pre_comp, visited)
4173      int occr_bb;
4174      struct expr *expr;
4175      int bb;
4176      int check_pre_comp;
4177      char *visited;
4178 {
4179   edge pred;
4180
4181   if (visited == NULL)
4182     {
4183       visited = (char *) alloca (n_basic_blocks);
4184       bzero (visited, n_basic_blocks);
4185     }
4186
4187   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4188     {
4189       int pred_bb = pred->src->index;
4190
4191       if (pred->src == ENTRY_BLOCK_PTR
4192           /* Has predecessor has already been visited?  */
4193           || visited[pred_bb])
4194         {
4195           /* Nothing to do.  */
4196         }
4197       /* Does this predecessor generate this expression?  */
4198       else if ((!check_pre_comp && occr_bb == pred_bb)
4199                || TEST_BIT (comp[pred_bb], expr->bitmap_index))
4200         {
4201           /* Is this the occurrence we're looking for?
4202              Note that there's only one generating occurrence per block
4203              so we just need to check the block number.  */
4204           if (occr_bb == pred_bb)
4205             return 1;
4206           visited[pred_bb] = 1;
4207         }
4208       /* Ignore this predecessor if it kills the expression.  */
4209       else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4210         visited[pred_bb] = 1;
4211       /* Neither gen nor kill.  */
4212       else
4213         {
4214           visited[pred_bb] = 1;
4215           if (pre_expr_reaches_here_p (occr_bb, expr, pred_bb,
4216                                        check_pre_comp, visited))
4217             return 1;
4218         }
4219     }
4220
4221   /* All paths have been checked.  */
4222   return 0;
4223 }
4224 \f
4225 /* Add EXPR to the end of basic block BB.
4226
4227    This is used by both the PRE and code hoisting.
4228
4229    For PRE, we want to verify that the expr is either transparent
4230    or locally anticipatable in the target block.  This check makes
4231    no sense for code hoisting.  */
4232
4233 static void
4234 insert_insn_end_bb (expr, bb, pre)
4235      struct expr *expr;
4236      int bb;
4237      int pre;
4238 {
4239   rtx insn = BLOCK_END (bb);
4240   rtx new_insn;
4241   rtx reg = expr->reaching_reg;
4242   int regno = REGNO (reg);
4243   rtx pat, copied_expr;
4244   rtx first_new_insn;
4245
4246   start_sequence ();
4247   copied_expr = copy_rtx (expr->expr);
4248   emit_move_insn (reg, copied_expr);
4249   first_new_insn = get_insns ();
4250   pat = gen_sequence ();
4251   end_sequence ();
4252
4253   /* If the last insn is a jump, insert EXPR in front [taking care to
4254      handle cc0, etc. properly].  */
4255
4256   if (GET_CODE (insn) == JUMP_INSN)
4257     {
4258 #ifdef HAVE_cc0
4259       rtx note;
4260 #endif
4261
4262       /* If this is a jump table, then we can't insert stuff here.  Since
4263          we know the previous real insn must be the tablejump, we insert
4264          the new instruction just before the tablejump.  */
4265       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4266           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4267         insn = prev_real_insn (insn);
4268
4269 #ifdef HAVE_cc0
4270       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4271          if cc0 isn't set.  */
4272       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4273       if (note)
4274         insn = XEXP (note, 0);
4275       else
4276         {
4277           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4278           if (maybe_cc0_setter
4279               && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i'
4280               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4281             insn = maybe_cc0_setter;
4282         }
4283 #endif
4284       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
4285       new_insn = emit_insn_before (pat, insn);
4286       if (BLOCK_HEAD (bb) == insn)
4287         BLOCK_HEAD (bb) = new_insn;
4288     }
4289   /* Likewise if the last insn is a call, as will happen in the presence
4290      of exception handling.  */
4291   else if (GET_CODE (insn) == CALL_INSN)
4292     {
4293       HARD_REG_SET parm_regs;
4294       int nparm_regs;
4295       rtx p;
4296
4297       /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4298          we search backward and place the instructions before the first
4299          parameter is loaded.  Do this for everyone for consistency and a
4300          presumtion that we'll get better code elsewhere as well.  */
4301
4302       /* It should always be the case that we can put these instructions
4303          anywhere in the basic block with performing PRE optimizations.
4304          Check this.  */
4305       if (pre
4306           && !TEST_BIT (antloc[bb], expr->bitmap_index)
4307           && !TEST_BIT (transp[bb], expr->bitmap_index))
4308         abort ();
4309
4310       /* Since different machines initialize their parameter registers
4311          in different orders, assume nothing.  Collect the set of all
4312          parameter registers.  */
4313       CLEAR_HARD_REG_SET (parm_regs);
4314       nparm_regs = 0;
4315       for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4316         if (GET_CODE (XEXP (p, 0)) == USE
4317             && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4318           {
4319             int regno = REGNO (XEXP (XEXP (p, 0), 0));
4320             if (regno >= FIRST_PSEUDO_REGISTER)
4321               abort ();
4322             SET_HARD_REG_BIT (parm_regs, regno);
4323             nparm_regs++;
4324           }
4325
4326       /* Search backward for the first set of a register in this set.  */
4327       while (nparm_regs && BLOCK_HEAD (bb) != insn)
4328         {
4329           insn = PREV_INSN (insn);
4330           p = single_set (insn);
4331           if (p && GET_CODE (SET_DEST (p)) == REG
4332               && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4333               && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4334             {
4335               CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4336               nparm_regs--;
4337             }
4338         }
4339
4340       /* If we found all the parameter loads, then we want to insert
4341          before the first parameter load.
4342
4343          If we did not find all the parameter loads, then we might have
4344          stopped on the head of the block, which could be a CODE_LABEL.
4345          If we inserted before the CODE_LABEL, then we would be putting
4346          the insn in the wrong basic block.  In that case, put the insn
4347          after the CODE_LABEL.
4348
4349          ?!? Do we need to account for NOTE_INSN_BASIC_BLOCK here?  */
4350       if (GET_CODE (insn) != CODE_LABEL)
4351         {
4352           new_insn = emit_insn_before (pat, insn);
4353           if (BLOCK_HEAD (bb) == insn)
4354             BLOCK_HEAD (bb) = new_insn;
4355         }
4356       else
4357         {
4358           new_insn = emit_insn_after (pat, insn);
4359         }
4360     }
4361   else
4362     {
4363       new_insn = emit_insn_after (pat, insn);
4364       BLOCK_END (bb) = new_insn;
4365     }
4366
4367   /* Keep block number table up to date.
4368      Note, PAT could be a multiple insn sequence, we have to make
4369      sure that each insn in the sequence is handled.  */
4370   if (GET_CODE (pat) == SEQUENCE)
4371     {
4372       int i;
4373
4374       for (i = 0; i < XVECLEN (pat, 0); i++)
4375         {
4376           rtx insn = XVECEXP (pat, 0, i);
4377           set_block_num (insn, bb);
4378           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4379             add_label_notes (PATTERN (insn), new_insn);
4380           record_set_insn = insn;
4381           note_stores (PATTERN (insn), record_set_info);
4382         }
4383     }
4384   else
4385     {
4386       add_label_notes (SET_SRC (pat), new_insn);
4387       set_block_num (new_insn, bb);
4388       /* Keep register set table up to date.  */
4389       record_one_set (regno, new_insn);
4390     }
4391
4392   gcse_create_count++;
4393
4394   if (gcse_file)
4395     {
4396       fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, copying expression %d to reg %d\n",
4397                bb, INSN_UID (new_insn), expr->bitmap_index, regno);
4398     }
4399 }
4400
4401 /* Insert partially redundant expressions at the ends of appropriate basic
4402    blocks making them fully redundant.  */
4403
4404 static void
4405 pre_insert (index_map)
4406      struct expr **index_map;
4407 {
4408   int bb, i, set_size;
4409   sbitmap *inserted;
4410
4411   /* Compute INSERT = PRE_OPTIMAL & ~PRE_REDUNDANT.
4412      Where INSERT is nonzero, we add the expression at the end of the basic
4413      block if it reaches any of the deleted expressions.  */
4414
4415   set_size = pre_optimal[0]->size;
4416   inserted = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
4417   sbitmap_vector_zero (inserted, n_basic_blocks);
4418
4419   for (bb = 0; bb < n_basic_blocks; bb++)
4420     {
4421       int indx;
4422
4423       /* This computes the number of potential insertions we need.  */
4424       sbitmap_not (temp_bitmap[bb], pre_redundant[bb]);
4425       sbitmap_a_and_b (temp_bitmap[bb], temp_bitmap[bb], pre_optimal[bb]);
4426
4427       /* TEMP_BITMAP[bb] now contains a bitmap of the expressions that we need
4428          to insert at the end of this basic block.  */
4429       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4430         {
4431           SBITMAP_ELT_TYPE insert = temp_bitmap[bb]->elms[i];
4432           int j;
4433
4434           for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4435             {
4436               if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4437                 {
4438                   struct expr *expr = index_map[j];
4439                   struct occr *occr;
4440
4441                   /* Now look at each deleted occurence of this expression.  */
4442                   for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4443                     {
4444                       if (! occr->deleted_p)
4445                         continue;
4446
4447                       /* Insert this expression at the end of BB if it would
4448                          reach the deleted occurence.  */
4449                       if (!TEST_BIT (inserted[bb], j)
4450                           && pre_expr_reaches_here_p (bb, expr,
4451                                                    BLOCK_NUM (occr->insn), 0,
4452                                                    NULL))
4453                         {
4454                           SET_BIT (inserted[bb], j);
4455                           insert_insn_end_bb (index_map[j], bb, 1);
4456                         }
4457                     }
4458                 }
4459             }
4460         }
4461     }
4462
4463   sbitmap_vector_free (inserted);
4464 }
4465
4466 /* Copy the result of INSN to REG.
4467    INDX is the expression number.  */
4468
4469 static void
4470 pre_insert_copy_insn (expr, insn)
4471      struct expr *expr;
4472      rtx insn;
4473 {
4474   rtx reg = expr->reaching_reg;
4475   int regno = REGNO (reg);
4476   int indx = expr->bitmap_index;
4477   rtx set = single_set (insn);
4478   rtx new_insn;
4479
4480   if (!set)
4481     abort ();
4482   new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4483                               insn);
4484   /* Keep block number table up to date.  */
4485   set_block_num (new_insn, BLOCK_NUM (insn));
4486   /* Keep register set table up to date.  */
4487   record_one_set (regno, new_insn);
4488
4489   gcse_create_count++;
4490
4491   if (gcse_file)
4492     {
4493       fprintf (gcse_file, "PRE: bb %d, insn %d, copying expression %d in insn %d to reg %d\n",
4494                BLOCK_NUM (insn), INSN_UID (new_insn), indx, INSN_UID (insn), regno);
4495     }
4496 }
4497
4498 /* Copy available expressions that reach the redundant expression
4499    to `reaching_reg'.  */
4500
4501 static void
4502 pre_insert_copies ()
4503 {
4504   int i, bb;
4505
4506   for (bb = 0; bb < n_basic_blocks; bb++)
4507     {
4508       sbitmap_a_and_b (temp_bitmap[bb], pre_optimal[bb], pre_redundant[bb]);
4509     }
4510
4511   /* For each available expression in the table, copy the result to
4512      `reaching_reg' if the expression reaches a deleted one.
4513
4514      ??? The current algorithm is rather brute force.
4515      Need to do some profiling.  */
4516
4517   for (i = 0; i < expr_hash_table_size; i++)
4518     {
4519       struct expr *expr;
4520
4521       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4522         {
4523           struct occr *occr;
4524
4525           /* If the basic block isn't reachable, PPOUT will be TRUE.
4526              However, we don't want to insert a copy here because the
4527              expression may not really be redundant.  So only insert
4528              an insn if the expression was deleted.
4529              This test also avoids further processing if the expression
4530              wasn't deleted anywhere.  */
4531           if (expr->reaching_reg == NULL)
4532             continue;
4533
4534           for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4535             {
4536               struct occr *avail;
4537
4538               if (! occr->deleted_p)
4539                 continue;
4540
4541               for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4542                 {
4543                   rtx insn = avail->insn;
4544                   int bb = BLOCK_NUM (insn);
4545
4546                   if (!TEST_BIT (temp_bitmap[bb], expr->bitmap_index))
4547                     continue;
4548
4549                   /* No need to handle this one if handled already.  */
4550                   if (avail->copied_p)
4551                     continue;
4552                   /* Don't handle this one if it's a redundant one.  */
4553                   if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4554                     continue;
4555                   /* Or if the expression doesn't reach the deleted one.  */
4556                   if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4557                                                  BLOCK_NUM (occr->insn),
4558                                                  1, NULL))
4559                     continue;
4560
4561                   /* Copy the result of avail to reaching_reg.  */
4562                   pre_insert_copy_insn (expr, insn);
4563                   avail->copied_p = 1;
4564                 }
4565             }
4566         }
4567     }
4568 }
4569
4570 /* Delete redundant computations.
4571    Deletion is done by changing the insn to copy the `reaching_reg' of
4572    the expression into the result of the SET.  It is left to later passes
4573    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4574
4575    Returns non-zero if a change is made.  */
4576
4577 static int
4578 pre_delete ()
4579 {
4580   int i, bb, changed;
4581
4582   /* Compute the expressions which are redundant and need to be replaced by
4583      copies from the reaching reg to the target reg.  */
4584   for (bb = 0; bb < n_basic_blocks; bb++)
4585     {
4586       sbitmap_not (temp_bitmap[bb], pre_optimal[bb]);
4587       sbitmap_a_and_b (temp_bitmap[bb], temp_bitmap[bb], pre_redundant[bb]);
4588     }
4589
4590   changed = 0;
4591   for (i = 0; i < expr_hash_table_size; i++)
4592     {
4593       struct expr *expr;
4594
4595       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4596         {
4597           struct occr *occr;
4598           int indx = expr->bitmap_index;
4599
4600           /* We only need to search antic_occr since we require
4601              ANTLOC != 0.  */
4602
4603           for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4604             {
4605               rtx insn = occr->insn;
4606               rtx set;
4607               int bb = BLOCK_NUM (insn);
4608
4609               if (TEST_BIT (temp_bitmap[bb], indx))
4610                 {
4611                   set = single_set (insn);
4612                   if (! set)
4613                     abort ();
4614
4615                   /* Create a pseudo-reg to store the result of reaching
4616                      expressions into.  Get the mode for the new pseudo
4617                      from the mode of the original destination pseudo.  */
4618                   if (expr->reaching_reg == NULL)
4619                     expr->reaching_reg
4620                       = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4621
4622                   /* In theory this should never fail since we're creating
4623                      a reg->reg copy.
4624
4625                      However, on the x86 some of the movXX patterns actually
4626                      contain clobbers of scratch regs.  This may cause the
4627                      insn created by validate_change to not match any pattern
4628                      and thus cause validate_change to fail.   */
4629                   if (validate_change (insn, &SET_SRC (set),
4630                                        expr->reaching_reg, 0))
4631                     {
4632                       occr->deleted_p = 1;
4633                       SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4634                       changed = 1;
4635                       gcse_subst_count++;
4636                     }
4637
4638                   if (gcse_file)
4639                     {
4640                       fprintf (gcse_file,
4641                                "PRE: redundant insn %d (expression %d) in bb %d, reaching reg is %d\n",
4642                                INSN_UID (insn), indx, bb, REGNO (expr->reaching_reg));
4643                     }
4644                 }
4645             }
4646         }
4647     }
4648
4649   return changed;
4650 }
4651
4652 /* Perform GCSE optimizations using PRE.
4653    This is called by one_pre_gcse_pass after all the dataflow analysis
4654    has been done.
4655
4656    This is based on the original Morel-Renvoise paper Fred Chow's thesis,
4657    and lazy code motion from Knoop, Ruthing and Steffen as described in
4658    Advanced Compiler Design and Implementation.
4659
4660    ??? A new pseudo reg is created to hold the reaching expression.
4661    The nice thing about the classical approach is that it would try to
4662    use an existing reg.  If the register can't be adequately optimized
4663    [i.e. we introduce reload problems], one could add a pass here to
4664    propagate the new register through the block.
4665
4666    ??? We don't handle single sets in PARALLELs because we're [currently]
4667    not able to copy the rest of the parallel when we insert copies to create
4668    full redundancies from partial redundancies.  However, there's no reason
4669    why we can't handle PARALLELs in the cases where there are no partial
4670    redundancies.  */
4671
4672 static int
4673 pre_gcse ()
4674 {
4675   int i;
4676   int changed;
4677   struct expr **index_map;
4678
4679   /* Compute a mapping from expression number (`bitmap_index') to
4680      hash table entry.  */
4681
4682   index_map = (struct expr **) alloca (n_exprs * sizeof (struct expr *));
4683   bzero ((char *) index_map, n_exprs * sizeof (struct expr *));
4684   for (i = 0; i < expr_hash_table_size; i++)
4685     {
4686       struct expr *expr;
4687
4688       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4689         index_map[expr->bitmap_index] = expr;
4690     }
4691
4692   /* Reset bitmap used to track which insns are redundant.  */
4693   pre_redundant_insns = sbitmap_alloc (max_cuid);
4694   sbitmap_zero (pre_redundant_insns);
4695
4696   /* Delete the redundant insns first so that
4697      - we know what register to use for the new insns and for the other
4698        ones with reaching expressions
4699      - we know which insns are redundant when we go to create copies  */
4700   changed = pre_delete ();
4701
4702   /* Insert insns in places that make partially redundant expressions
4703      fully redundant.  */
4704   pre_insert (index_map);
4705
4706   /* In other places with reaching expressions, copy the expression to the
4707      specially allocated pseudo-reg that reaches the redundant expression.  */
4708   pre_insert_copies ();
4709
4710   free (pre_redundant_insns);
4711
4712   return changed;
4713 }
4714
4715 /* Top level routine to perform one PRE GCSE pass.
4716
4717    Return non-zero if a change was made.  */
4718
4719 static int
4720 one_pre_gcse_pass (pass)
4721      int pass;
4722 {
4723   int changed = 0;
4724
4725   gcse_subst_count = 0;
4726   gcse_create_count = 0;
4727
4728   alloc_expr_hash_table (max_cuid);
4729   compute_expr_hash_table ();
4730   if (gcse_file)
4731     dump_hash_table (gcse_file, "Expression", expr_hash_table,
4732                      expr_hash_table_size, n_exprs);
4733   if (n_exprs > 0)
4734     {
4735       alloc_pre_mem (n_basic_blocks, n_exprs);
4736       compute_pre_data ();
4737       changed |= pre_gcse ();
4738       free_pre_mem ();
4739     }
4740   free_expr_hash_table ();
4741
4742   if (gcse_file)
4743     {
4744       fprintf (gcse_file, "\n");
4745       fprintf (gcse_file, "PRE GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
4746                current_function_name, pass,
4747                bytes_used, gcse_subst_count, gcse_create_count);
4748     }
4749
4750   return changed;
4751 }
4752 \f
4753 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4754    We have to add REG_LABEL notes, because the following loop optimization
4755    pass requires them.  */
4756
4757 /* ??? This is very similar to the loop.c add_label_notes function.  We
4758    could probably share code here.  */
4759
4760 /* ??? If there was a jump optimization pass after gcse and before loop,
4761    then we would not need to do this here, because jump would add the
4762    necessary REG_LABEL notes.  */
4763
4764 static void
4765 add_label_notes (x, insn)
4766      rtx x;
4767      rtx insn;
4768 {
4769   enum rtx_code code = GET_CODE (x);
4770   int i, j;
4771   const char *fmt;
4772
4773   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4774     {
4775       /* This code used to ignore labels that referred to dispatch tables to
4776          avoid flow generating (slighly) worse code.
4777
4778          We no longer ignore such label references (see LABEL_REF handling in
4779          mark_jump_label for additional information).  */
4780       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4781                                             REG_NOTES (insn));
4782       return;
4783     }
4784
4785   fmt = GET_RTX_FORMAT (code);
4786   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4787     {
4788       if (fmt[i] == 'e')
4789         add_label_notes (XEXP (x, i), insn);
4790       else if (fmt[i] == 'E')
4791         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4792           add_label_notes (XVECEXP (x, i, j), insn);
4793     }
4794 }
4795
4796 /* Compute transparent outgoing information for each block.
4797
4798    An expression is transparent to an edge unless it is killed by
4799    the edge itself.  This can only happen with abnormal control flow,
4800    when the edge is traversed through a call.  This happens with
4801    non-local labels and exceptions.
4802
4803    This would not be necessary if we split the edge.  While this is
4804    normally impossible for abnormal critical edges, with some effort
4805    it should be possible with exception handling, since we still have
4806    control over which handler should be invoked.  But due to increased
4807    EH table sizes, this may not be worthwhile.  */
4808
4809 static void
4810 compute_transpout ()
4811 {
4812   int bb;
4813
4814   sbitmap_vector_ones (transpout, n_basic_blocks);
4815
4816   for (bb = 0; bb < n_basic_blocks; ++bb)
4817     {
4818       int i;
4819
4820       /* Note that flow inserted a nop a the end of basic blocks that
4821          end in call instructions for reasons other than abnormal
4822          control flow.  */
4823       if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4824         continue;
4825
4826       for (i = 0; i < expr_hash_table_size; i++)
4827         {
4828           struct expr *expr;
4829           for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4830             if (GET_CODE (expr->expr) == MEM)
4831               {
4832                 rtx addr = XEXP (expr->expr, 0);
4833
4834                 if (GET_CODE (addr) == SYMBOL_REF
4835                     && CONSTANT_POOL_ADDRESS_P (addr))
4836                   continue;
4837
4838                 /* ??? Optimally, we would use interprocedural alias
4839                    analysis to determine if this mem is actually killed
4840                    by this call.  */
4841                 RESET_BIT (transpout[bb], expr->bitmap_index);
4842               }
4843         }
4844     }
4845 }
4846
4847 /* Removal of useless null pointer checks */
4848
4849 /* These need to be file static for communication between
4850    invalidate_nonnull_info and delete_null_pointer_checks.  */
4851 static int current_block;
4852 static sbitmap *nonnull_local;
4853 static sbitmap *nonnull_killed;
4854
4855 /* Called via note_stores.  X is set by SETTER.  If X is a register we must
4856    invalidate nonnull_local and set nonnull_killed.
4857
4858    We ignore hard registers.  */
4859 static void
4860 invalidate_nonnull_info (x, setter)
4861      rtx x;
4862      rtx setter ATTRIBUTE_UNUSED;
4863 {
4864   int offset, regno;
4865
4866   offset = 0;
4867   while (GET_CODE (x) == SUBREG)
4868     x = SUBREG_REG (x);
4869
4870   /* Ignore anything that is not a register or is a hard register.  */
4871   if (GET_CODE (x) != REG
4872       || REGNO (x) < FIRST_PSEUDO_REGISTER)
4873     return;
4874
4875   regno = REGNO (x);
4876
4877   RESET_BIT (nonnull_local[current_block], regno);
4878   SET_BIT (nonnull_killed[current_block], regno);
4879
4880 }
4881
4882 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
4883    at compile time.
4884
4885    This is conceptually similar to global constant/copy propagation and
4886    classic global CSE (it even uses the same dataflow equations as cprop).
4887
4888    If a register is used as memory address with the form (mem (reg)), then we
4889    know that REG can not be zero at that point in the program.  Any instruction
4890    which sets REG "kills" this property.
4891
4892    So, if every path leading to a conditional branch has an available memory
4893    reference of that form, then we know the register can not have the value
4894    zero at the conditional branch.
4895
4896    So we merely need to compute the local properies and propagate that data
4897    around the cfg, then optimize where possible.
4898
4899    We run this pass two times.  Once before CSE, then again after CSE.  This
4900    has proven to be the most profitable approach.  It is rare for new
4901    optimization opportunities of this nature to appear after the first CSE
4902    pass.
4903
4904    This could probably be integrated with global cprop with a little work.  */
4905
4906 void
4907 delete_null_pointer_checks (f)
4908      rtx f;
4909 {
4910   int_list_ptr *s_preds, *s_succs;
4911   int *num_preds, *num_succs;
4912   int changed, bb;
4913   sbitmap *nonnull_avin, *nonnull_avout;
4914
4915   /* First break the program into basic blocks.  */
4916   find_basic_blocks (f, max_reg_num (), NULL, 1);
4917
4918   /* If we have only a single block, then there's nothing to do.  */
4919   if (n_basic_blocks <= 1)
4920     {
4921       /* Free storage allocated by find_basic_blocks.  */
4922       free_basic_block_vars (0);
4923       return;
4924     }
4925
4926   /* Trying to perform global optimizations on flow graphs which have
4927      a high connectivity will take a long time and is unlikely to be
4928      particularly useful.
4929
4930      In normal circumstances a cfg should have about twice has many edges
4931      as blocks.  But we do not want to punish small functions which have
4932      a couple switch statements.  So we require a relatively large number
4933      of basic blocks and the ratio of edges to blocks to be high.  */
4934   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
4935     {
4936       /* Free storage allocated by find_basic_blocks.  */
4937       free_basic_block_vars (0);
4938       return;
4939     }
4940
4941   /* We need predecessor/successor lists as well as pred/succ counts for
4942      each basic block.  */
4943   s_preds = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
4944   s_succs = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
4945   num_preds = (int *) alloca (n_basic_blocks * sizeof (int));
4946   num_succs = (int *) alloca (n_basic_blocks * sizeof (int));
4947   compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
4948
4949   /* Allocate bitmaps to hold local and global properties.  */
4950   nonnull_local = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
4951   nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
4952   nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
4953   nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
4954
4955   /* Compute local properties, nonnull and killed.  A register will have
4956      the nonnull property if at the end of the current block its value is
4957      known to be nonnull.  The killed property indicates that somewhere in
4958      the block any information we had about the register is killed.
4959
4960      Note that a register can have both properties in a single block.  That
4961      indicates that it's killed, then later in the block a new value is
4962      computed.  */
4963   sbitmap_vector_zero (nonnull_local, n_basic_blocks);
4964   sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
4965   for (current_block = 0; current_block < n_basic_blocks; current_block++)
4966     {
4967       rtx insn, stop_insn;
4968
4969       /* Scan each insn in the basic block looking for memory references and
4970          register sets.  */
4971       stop_insn = NEXT_INSN (BLOCK_END (current_block));
4972       for (insn = BLOCK_HEAD (current_block);
4973            insn != stop_insn;
4974            insn = NEXT_INSN (insn))
4975         {
4976           rtx set;
4977
4978           /* Ignore anything that is not a normal insn.  */
4979           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4980             continue;
4981
4982           /* Basically ignore anything that is not a simple SET.  We do have
4983              to make sure to invalidate nonnull_local and set nonnull_killed
4984              for such insns though.  */
4985           set = single_set (insn);
4986           if (!set)
4987             {
4988               note_stores (PATTERN (insn), invalidate_nonnull_info);
4989               continue;
4990             }
4991
4992           /* See if we've got a useable memory load.  We handle it first
4993              in case it uses its address register as a dest (which kills
4994              the nonnull property).  */
4995           if (GET_CODE (SET_SRC (set)) == MEM
4996               && GET_CODE (XEXP (SET_SRC (set), 0)) == REG
4997               && REGNO (XEXP (SET_SRC (set), 0)) >= FIRST_PSEUDO_REGISTER)
4998             SET_BIT (nonnull_local[current_block],
4999                      REGNO (XEXP (SET_SRC (set), 0)));
5000
5001           /* Now invalidate stuff clobbered by this insn.  */
5002           note_stores (PATTERN (insn), invalidate_nonnull_info);
5003
5004           /* And handle stores, we do these last since any sets in INSN can
5005              not kill the nonnull property if it is derived from a MEM
5006              appearing in a SET_DEST.  */
5007           if (GET_CODE (SET_DEST (set)) == MEM
5008               && GET_CODE (XEXP (SET_DEST (set), 0)) == REG)
5009             SET_BIT (nonnull_local[current_block],
5010                      REGNO (XEXP (SET_DEST (set), 0)));
5011         }
5012     }
5013
5014   /* Now compute global properties based on the local properties.   This
5015      is a classic global availablity algorithm.  */
5016   sbitmap_zero (nonnull_avin[0]);
5017   sbitmap_vector_ones (nonnull_avout, n_basic_blocks);
5018   changed = 1;
5019   while (changed)
5020     {
5021       changed = 0;
5022
5023       for (bb = 0; bb < n_basic_blocks; bb++)
5024         {
5025           if (bb != 0)
5026             sbitmap_intersect_of_predecessors (nonnull_avin[bb],
5027                                                nonnull_avout, bb, s_preds);
5028
5029           changed |= sbitmap_union_of_diff (nonnull_avout[bb],
5030                                             nonnull_local[bb],
5031                                             nonnull_avin[bb],
5032                                             nonnull_killed[bb]);
5033         }
5034     }
5035
5036   /* Now look at each bb and see if it ends with a compare of a value
5037      against zero.  */
5038   for (bb = 0; bb < n_basic_blocks; bb++)
5039     {
5040       rtx last_insn = BLOCK_END (bb);
5041       rtx condition, earliest, reg;
5042       int compare_and_branch;
5043
5044       /* We only want conditional branches.  */
5045       if (GET_CODE (last_insn) != JUMP_INSN
5046           || !condjump_p (last_insn)
5047           || simplejump_p (last_insn))
5048         continue;
5049
5050       /* LAST_INSN is a conditional jump.  Get its condition.  */
5051       condition = get_condition (last_insn, &earliest);
5052
5053       /* If we were unable to get the condition, or it is not a equality
5054          comparison against zero then there's nothing we can do.  */
5055       if (!condition
5056           || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5057           || GET_CODE (XEXP (condition, 1)) != CONST_INT
5058           || XEXP (condition, 1) != CONST0_RTX (GET_MODE (XEXP (condition, 0))))
5059         continue;
5060
5061       /* We must be checking a register against zero.  */
5062       reg = XEXP (condition, 0);
5063       if (GET_CODE (reg) != REG)
5064         continue;
5065
5066       /* Is the register known to have a nonzero value?  */
5067       if (!TEST_BIT (nonnull_avout[bb], REGNO (reg)))
5068         continue;
5069
5070       /* Try to compute whether the compare/branch at the loop end is one or
5071          two instructions.  */
5072       if (earliest == last_insn)
5073         compare_and_branch = 1;
5074       else if (earliest == prev_nonnote_insn (last_insn))
5075         compare_and_branch = 2;
5076       else
5077         continue;
5078
5079       /* We know the register in this comparison is nonnull at exit from
5080          this block.  We can optimize this comparison.  */
5081       if (GET_CODE (condition) == NE)
5082         {
5083           rtx new_jump;
5084
5085           new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5086                                             last_insn);
5087           JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5088           LABEL_NUSES (JUMP_LABEL (new_jump))++;
5089           emit_barrier_after (new_jump);
5090         }
5091       delete_insn (last_insn);
5092       if (compare_and_branch == 2)
5093         delete_insn (earliest);
5094     }
5095
5096   /* Free storage allocated by find_basic_blocks.  */
5097   free_basic_block_vars (0);
5098
5099   /* Free bitmaps.  */
5100   free (nonnull_local);
5101   free (nonnull_killed);
5102   free (nonnull_avin);
5103   free (nonnull_avout);
5104 }
5105
5106 /* Code Hoisting variables and subroutines.  */
5107
5108 /* Very busy expressions.  */
5109 static sbitmap *hoist_vbein;
5110 static sbitmap *hoist_vbeout;
5111
5112 /* Hoistable expressions.  */
5113 static sbitmap *hoist_exprs;
5114
5115 /* Dominator bitmaps.  */
5116 static sbitmap *dominators;
5117 static sbitmap *post_dominators;
5118
5119 /* ??? We could compute post dominators and run this algorithm in
5120    reverse to to perform tail merging, doing so would probably be
5121    more effective than the tail merging code in jump.c.
5122
5123    It's unclear if tail merging could be run in parallel with
5124    code hoisting.  It would be nice.  */
5125
5126 /* Allocate vars used for code hoisting analysis.  */
5127
5128 static void
5129 alloc_code_hoist_mem (n_blocks, n_exprs)
5130      int n_blocks, n_exprs;
5131 {
5132   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5133   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5134   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5135
5136   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5137   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5138   hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5139   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5140
5141   dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5142   post_dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5143 }
5144
5145 /* Free vars used for code hoisting analysis.  */
5146
5147 static void
5148 free_code_hoist_mem ()
5149 {
5150   free (antloc);
5151   free (transp);
5152   free (comp);
5153
5154   free (hoist_vbein);
5155   free (hoist_vbeout);
5156   free (hoist_exprs);
5157   free (transpout);
5158
5159   free (dominators);
5160   free (post_dominators);
5161 }
5162
5163 /* Compute the very busy expressions at entry/exit from each block.
5164
5165    An expression is very busy if all paths from a given point
5166    compute the expression.  */
5167
5168 static void
5169 compute_code_hoist_vbeinout ()
5170 {
5171   int bb, changed, passes;
5172
5173   sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5174   sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5175
5176   passes = 0;
5177   changed = 1;
5178   while (changed)
5179     {
5180       changed = 0;
5181       /* We scan the blocks in the reverse order to speed up
5182          the convergence.  */
5183       for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5184         {
5185           changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5186                                            hoist_vbeout[bb], transp[bb]);
5187           if (bb != n_basic_blocks - 1)
5188             sbitmap_intersect_of_successors (hoist_vbeout[bb], hoist_vbein,
5189                                              bb, s_succs);
5190         }
5191       passes++;
5192     }
5193
5194   if (gcse_file)
5195     fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5196 }
5197
5198 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
5199
5200 static void
5201 compute_code_hoist_data ()
5202 {
5203   compute_local_properties (transp, comp, antloc, 0);
5204   compute_transpout ();
5205   compute_code_hoist_vbeinout ();
5206   compute_flow_dominators (dominators, post_dominators);
5207   if (gcse_file)
5208     fprintf (gcse_file, "\n");
5209 }
5210
5211 /* Determine if the expression identified by EXPR_INDEX would
5212    reach BB unimpared if it was placed at the end of EXPR_BB.
5213
5214    It's unclear exactly what Muchnick meant by "unimpared".  It seems
5215    to me that the expression must either be computed or transparent in
5216    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
5217    would allow the expression to be hoisted out of loops, even if
5218    the expression wasn't a loop invariant.
5219
5220    Contrast this to reachability for PRE where an expression is
5221    considered reachable if *any* path reaches instead of *all*
5222    paths.  */
5223
5224 static int
5225 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5226      int expr_bb;
5227      int expr_index;
5228      int bb;
5229      char *visited;
5230 {
5231   edge pred;
5232
5233   if (visited == NULL)
5234     {
5235       visited = (char *) alloca (n_basic_blocks);
5236       bzero (visited, n_basic_blocks);
5237     }
5238
5239   visited[expr_bb] = 1;
5240   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5241     {
5242       int pred_bb = pred->src->index;
5243
5244       if (pred->src == ENTRY_BLOCK_PTR)
5245         break;
5246       else if (visited[pred_bb])
5247         continue;
5248       /* Does this predecessor generate this expression?  */
5249       else if (TEST_BIT (comp[pred_bb], expr_index))
5250         break;
5251       else if (! TEST_BIT (transp[pred_bb], expr_index))
5252         break;
5253       /* Not killed.  */
5254       else
5255         {
5256           visited[pred_bb] = 1;
5257           if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5258                                            pred_bb, visited))
5259             break;
5260         }
5261     }
5262
5263   return (pred == NULL);
5264 }
5265 \f
5266 /* Actually perform code hoisting.  */
5267 static void
5268 hoist_code ()
5269 {
5270   int bb, dominated, i;
5271   struct expr **index_map;
5272
5273   sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5274
5275   /* Compute a mapping from expression number (`bitmap_index') to
5276      hash table entry.  */
5277
5278   index_map = (struct expr **) alloca (n_exprs * sizeof (struct expr *));
5279   bzero ((char *) index_map, n_exprs * sizeof (struct expr *));
5280   for (i = 0; i < expr_hash_table_size; i++)
5281     {
5282       struct expr *expr;
5283
5284       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5285         index_map[expr->bitmap_index] = expr;
5286     }
5287
5288   /* Walk over each basic block looking for potentially hoistable
5289      expressions, nothing gets hoisted from the entry block.  */
5290   for (bb = 0; bb < n_basic_blocks; bb++)
5291     {
5292       int found = 0;
5293       int insn_inserted_p;
5294
5295       /* Examine each expression that is very busy at the exit of this
5296          block.  These are the potentially hoistable expressions.  */
5297       for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5298         {
5299           int hoistable = 0;
5300           if (TEST_BIT (hoist_vbeout[bb], i)
5301               && TEST_BIT (transpout[bb], i))
5302             {
5303               /* We've found a potentially hoistable expression, now
5304                  we look at every block BB dominates to see if it
5305                  computes the expression.  */
5306               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5307                 {
5308                   /* Ignore self dominance.  */
5309                   if (bb == dominated
5310                       || ! TEST_BIT (dominators[dominated], bb))
5311                     continue;
5312
5313                   /* We've found a dominated block, now see if it computes
5314                      the busy expression and whether or not moving that
5315                      expression to the "beginning" of that block is safe.  */
5316                   if (!TEST_BIT (antloc[dominated], i))
5317                     continue;
5318
5319                   /* Note if the expression would reach the dominated block
5320                      unimpared if it was placed at the end of BB.
5321
5322                      Keep track of how many times this expression is hoistable
5323                      from a dominated block into BB.  */
5324                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5325                     hoistable++;
5326                 }
5327
5328               /* If we found more than one hoistable occurence of this
5329                  expression, then note it in the bitmap of expressions to
5330                  hoist.  It makes no sense to hoist things which are computed
5331                  in only one BB, and doing so tends to pessimize register
5332                  allocation.  One could increase this value to try harder
5333                  to avoid any possible code expansion due to register
5334                  allocation issues; however experiments have shown that
5335                  the vast majority of hoistable expressions are only movable
5336                  from two successors, so raising this threshhold is likely
5337                  to nullify any benefit we get from code hoisting.  */
5338               if (hoistable > 1)
5339                 {
5340                   SET_BIT (hoist_exprs[bb], i);
5341                   found = 1;
5342                 }
5343             }
5344         }
5345
5346       /* If we found nothing to hoist, then quit now.  */
5347       if (! found)
5348         continue;
5349
5350       /* Loop over all the hoistable expressions.  */
5351       for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5352         {
5353           /* We want to insert the expression into BB only once, so
5354              note when we've inserted it.  */
5355           insn_inserted_p = 0;
5356
5357           /* These tests should be the same as the tests above.  */
5358           if (TEST_BIT (hoist_vbeout[bb], i))
5359             {
5360               /* We've found a potentially hoistable expression, now
5361                  we look at every block BB dominates to see if it
5362                  computes the expression.  */
5363               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5364                 {
5365                   /* Ignore self dominance.  */
5366                   if (bb == dominated
5367                       || ! TEST_BIT (dominators[dominated], bb))
5368                     continue;
5369
5370                   /* We've found a dominated block, now see if it computes
5371                      the busy expression and whether or not moving that
5372                      expression to the "beginning" of that block is safe.  */
5373                   if (!TEST_BIT (antloc[dominated], i))
5374                     continue;
5375
5376                   /* The expression is computed in the dominated block and
5377                      it would be safe to compute it at the start of the
5378                      dominated block.  Now we have to determine if the
5379                      expresion would reach the dominated block if it was
5380                      placed at the end of BB.  */
5381                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5382                     {
5383                       struct expr *expr = index_map[i];
5384                       struct occr *occr = expr->antic_occr;
5385                       rtx insn;
5386                       rtx set;
5387
5388
5389                       /* Find the right occurence of this expression.  */
5390                       while (BLOCK_NUM (occr->insn) != dominated && occr)
5391                         occr = occr->next;
5392
5393                       /* Should never happen.  */
5394                       if (!occr)
5395                         abort ();
5396
5397                       insn = occr->insn;
5398
5399                       set = single_set (insn);
5400                       if (! set)
5401                         abort ();
5402
5403                       /* Create a pseudo-reg to store the result of reaching
5404                          expressions into.  Get the mode for the new pseudo
5405                          from the mode of the original destination pseudo.  */
5406                       if (expr->reaching_reg == NULL)
5407                         expr->reaching_reg
5408                           = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5409
5410                       /* In theory this should never fail since we're creating
5411                          a reg->reg copy.
5412
5413                          However, on the x86 some of the movXX patterns actually
5414                          contain clobbers of scratch regs.  This may cause the
5415                          insn created by validate_change to not match any
5416                          pattern and thus cause validate_change to fail.   */
5417                       if (validate_change (insn, &SET_SRC (set),
5418                                            expr->reaching_reg, 0))
5419                         {
5420                           occr->deleted_p = 1;
5421                           if (!insn_inserted_p)
5422                             {
5423                               insert_insn_end_bb (index_map[i], bb, 0);
5424                               insn_inserted_p = 1;
5425                             }
5426                         }
5427                     }
5428                 }
5429             }
5430         }
5431     }
5432 }
5433
5434 /* Top level routine to perform one code hoisting (aka unification) pass
5435
5436    Return non-zero if a change was made.  */
5437
5438 static int
5439 one_code_hoisting_pass ()
5440 {
5441   int changed = 0;
5442
5443   alloc_expr_hash_table (max_cuid);
5444   compute_expr_hash_table ();
5445   if (gcse_file)
5446     dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5447                      expr_hash_table_size, n_exprs);
5448   if (n_exprs > 0)
5449     {
5450       alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5451       compute_code_hoist_data ();
5452       hoist_code ();
5453       free_code_hoist_mem ();
5454     }
5455   free_expr_hash_table ();
5456
5457   return changed;
5458 }