gcc/tree-ssa-threadupdate.c

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Free Software Foundation,
   3    Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "tree.h"
  26 #include "flags.h"
  27 #include "tm_p.h"
  28 #include "basic-block.h"
  29 #include "output.h"
  30 #include "function.h"
  31 #include "tree-flow.h"
  32 #include "tree-dump.h"
  33 #include "tree-pass.h"
  34 #include "cfgloop.h"
  35
  36 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  37    one or more in-edges to B to instead reach the destination of an
  38    out-edge from B while preserving any side effects in B.
  39
  40    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
  41    side effects of executing B.
  42
  43      1. Make a copy of B (including its outgoing edges and statements).  Call
  44         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  45
  46      2. Remove the control statement at the end of B' and all outgoing edges
  47         except B'->C.
  48
  49      3. Add a new argument to each PHI in C with the same value as the existing
  50         argument associated with edge B->C.  Associate the new PHI arguments
  51         with the edge B'->C.
  52
  53      4. For each PHI in B, find or create a PHI in B' with an identical
  54         PHI_RESULT.  Add an argument to the PHI in B' which has the same
  55         value as the PHI in B associated with the edge A->B.  Associate
  56         the new argument in the PHI in B' with the edge A->B.
  57
  58      5. Change the edge A->B to A->B'.
  59
  60         5a. This automatically deletes any PHI arguments associated with the
  61             edge A->B in B.
  62
  63         5b. This automatically associates each new argument added in step 4
  64             with the edge A->B'.
  65
  66      6. Repeat for other incoming edges into B.
  67
  68      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  69
  70    Note that block duplication can be minimized by first collecting the
  71    set of unique destination blocks that the incoming edges should
  72    be threaded to.
  73
  74    Block duplication can be further minimized by using B instead of
  75    creating B' for one destination if all edges into B are going to be
  76    threaded to a successor of B.  We had code to do this at one time, but
  77    I'm not convinced it is correct with the changes to avoid mucking up
  78    the loop structure (which may cancel threading requests, thus a block
  79    which we thought was going to become unreachable may still be reachable).
  80    This code was also going to get ugly with the introduction of the ability
  81    for a single jump thread request to bypass multiple blocks.
  82
  83    We further reduce the number of edges and statements we create by
  84    not copying all the outgoing edges and the control statement in
  85    step #1.  We instead create a template block without the outgoing
  86    edges and duplicate the template.  */
  87
  88
  89 /* Steps #5 and #6 of the above algorithm are best implemented by walking
  90    all the incoming edges which thread to the same destination edge at
  91    the same time.  That avoids lots of table lookups to get information
  92    for the destination edge.
  93
  94    To realize that implementation we create a list of incoming edges
  95    which thread to the same outgoing edge.  Thus to implement steps
  96    #5 and #6 we traverse our hash table of outgoing edge information.
  97    For each entry we walk the list of incoming edges which thread to
  98    the current outgoing edge.  */
  99
 100 struct el
 101 {
 102   edge e;
 103   struct el *next;
 104 };
 105
 106 /* Main data structure recording information regarding B's duplicate
 107    blocks.  */
 108
 109 /* We need to efficiently record the unique thread destinations of this
 110    block and specific information associated with those destinations.  We
 111    may have many incoming edges threaded to the same outgoing edge.  This
 112    can be naturally implemented with a hash table.  */
 113
 114 struct redirection_data
 115 {
 116   /* A duplicate of B with the trailing control statement removed and which
 117      targets a single successor of B.  */
 118   basic_block dup_block;
 119
 120   /* An outgoing edge from B.  DUP_BLOCK will have OUTGOING_EDGE->dest as
 121      its single successor.  */
 122   edge outgoing_edge;
 123
 124   /* A list of incoming edges which we want to thread to
 125      OUTGOING_EDGE->dest.  */
 126   struct el *incoming_edges;
 127 };
 128
 129 /* Main data structure to hold information for duplicates of BB.  */
 130 static htab_t redirection_data;
 131
 132 /* Data structure of information to pass to hash table traversal routines.  */
 133 struct local_info
 134 {
 135   /* The current block we are working on.  */
 136   basic_block bb;
 137
 138   /* A template copy of BB with no outgoing edges or control statement that
 139      we use for creating copies.  */
 140   basic_block template_block;
 141
 142   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
 143   bool jumps_threaded;
 144 };
 145
 146 /* Passes which use the jump threading code register jump threading
 147    opportunities as they are discovered.  We keep the registered
 148    jump threading opportunities in this vector as edge pairs
 149    (original_edge, target_edge).  */
 150 static VEC(edge,heap) *threaded_edges;
 151
 152
 153 /* Jump threading statistics.  */
 154
 155 struct thread_stats_d
 156 {
 157   unsigned long num_threaded_edges;
 158 };
 159
 160 struct thread_stats_d thread_stats;
 161
 162
 163 /* Remove the last statement in block BB if it is a control statement
 164    Also remove all outgoing edges except the edge which reaches DEST_BB.
 165    If DEST_BB is NULL, then remove all outgoing edges.  */
 166
 167 static void
 168 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 169 {
 170   gimple_stmt_iterator gsi;
 171   edge e;
 172   edge_iterator ei;
 173
 174   gsi = gsi_last_bb (bb);
 175
 176   /* If the duplicate ends with a control statement, then remove it.
 177
 178      Note that if we are duplicating the template block rather than the
 179      original basic block, then the duplicate might not have any real
 180      statements in it.  */
 181   if (!gsi_end_p (gsi)
 182       && gsi_stmt (gsi)
 183       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 184           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 185           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
 186     gsi_remove (&gsi, true);
 187
 188   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 189     {
 190       if (e->dest != dest_bb)
 191         remove_edge (e);
 192       else
 193         ei_next (&ei);
 194     }
 195 }
 196
 197 /* Create a duplicate of BB which only reaches the destination of the edge
 198    stored in RD.  Record the duplicate block in RD.  */
 199
 200 static void
 201 create_block_for_threading (basic_block bb, struct redirection_data *rd)
 202 {
 203   /* We can use the generic block duplication code and simply remove
 204      the stuff we do not need.  */
 205   rd->dup_block = duplicate_block (bb, NULL, NULL);
 206
 207   /* Zero out the profile, since the block is unreachable for now.  */
 208   rd->dup_block->frequency = 0;
 209   rd->dup_block->count = 0;
 210
 211   /* The call to duplicate_block will copy everything, including the
 212      useless COND_EXPR or SWITCH_EXPR at the end of BB.  We just remove
 213      the useless COND_EXPR or SWITCH_EXPR here rather than having a
 214      specialized block copier.  We also remove all outgoing edges
 215      from the duplicate block.  The appropriate edge will be created
 216      later.  */
 217   remove_ctrl_stmt_and_useless_edges (rd->dup_block, NULL);
 218 }
 219
 220 /* Hashing and equality routines for our hash table.  */
 221 static hashval_t
 222 redirection_data_hash (const void *p)
 223 {
 224   edge e = ((const struct redirection_data *)p)->outgoing_edge;
 225   return e->dest->index;
 226 }
 227
 228 static int
 229 redirection_data_eq (const void *p1, const void *p2)
 230 {
 231   edge e1 = ((const struct redirection_data *)p1)->outgoing_edge;
 232   edge e2 = ((const struct redirection_data *)p2)->outgoing_edge;
 233
 234   return e1 == e2;
 235 }
 236
 237 /* Given an outgoing edge E lookup and return its entry in our hash table.
 238
 239    If INSERT is true, then we insert the entry into the hash table if
 240    it is not already present.  INCOMING_EDGE is added to the list of incoming
 241    edges associated with E in the hash table.  */
 242
 243 static struct redirection_data *
 244 lookup_redirection_data (edge e, edge incoming_edge, enum insert_option insert)
 245 {
 246   void **slot;
 247   struct redirection_data *elt;
 248
 249  /* Build a hash table element so we can see if E is already
 250      in the table.  */
 251   elt = XNEW (struct redirection_data);
 252   elt->outgoing_edge = e;
 253   elt->dup_block = NULL;
 254   elt->incoming_edges = NULL;
 255
 256   slot = htab_find_slot (redirection_data, elt, insert);
 257
 258   /* This will only happen if INSERT is false and the entry is not
 259      in the hash table.  */
 260   if (slot == NULL)
 261     {
 262       free (elt);
 263       return NULL;
 264     }
 265
 266   /* This will only happen if E was not in the hash table and
 267      INSERT is true.  */
 268   if (*slot == NULL)
 269     {
 270       *slot = (void *)elt;
 271       elt->incoming_edges = XNEW (struct el);
 272       elt->incoming_edges->e = incoming_edge;
 273       elt->incoming_edges->next = NULL;
 274       return elt;
 275     }
 276   /* E was in the hash table.  */
 277   else
 278     {
 279       /* Free ELT as we do not need it anymore, we will extract the
 280          relevant entry from the hash table itself.  */
 281       free (elt);
 282
 283       /* Get the entry stored in the hash table.  */
 284       elt = (struct redirection_data *) *slot;
 285
 286       /* If insertion was requested, then we need to add INCOMING_EDGE
 287          to the list of incoming edges associated with E.  */
 288       if (insert)
 289         {
 290           struct el *el = XNEW (struct el);
 291           el->next = elt->incoming_edges;
 292           el->e = incoming_edge;
 293           elt->incoming_edges = el;
 294         }
 295
 296       return elt;
 297     }
 298 }
 299
 300 /* Given a duplicate block and its single destination (both stored
 301    in RD).  Create an edge between the duplicate and its single
 302    destination.
 303
 304    Add an additional argument to any PHI nodes at the single
 305    destination.  */
 306
 307 static void
 308 create_edge_and_update_destination_phis (struct redirection_data *rd,
 309                                          basic_block bb)
 310 {
 311   edge e = make_edge (bb, rd->outgoing_edge->dest, EDGE_FALLTHRU);
 312   gimple_stmt_iterator gsi;
 313
 314   rescan_loop_exit (e, true, false);
 315   e->probability = REG_BR_PROB_BASE;
 316   e->count = bb->count;
 317   e->aux = rd->outgoing_edge->aux;
 318
 319   /* If there are any PHI nodes at the destination of the outgoing edge
 320      from the duplicate block, then we will need to add a new argument
 321      to them.  The argument should have the same value as the argument
 322      associated with the outgoing edge stored in RD.  */
 323   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
 324     {
 325       gimple phi = gsi_stmt (gsi);
 326       source_location locus;
 327       int indx = rd->outgoing_edge->dest_idx;
 328
 329       locus = gimple_phi_arg_location (phi, indx);
 330       add_phi_arg (phi, gimple_phi_arg_def (phi, indx), e, locus);
 331     }
 332 }
 333
 334 /* Hash table traversal callback routine to create duplicate blocks.  */
 335
 336 static int
 337 create_duplicates (void **slot, void *data)
 338 {
 339   struct redirection_data *rd = (struct redirection_data *) *slot;
 340   struct local_info *local_info = (struct local_info *)data;
 341
 342   /* Create a template block if we have not done so already.  Otherwise
 343      use the template to create a new block.  */
 344   if (local_info->template_block == NULL)
 345     {
 346       create_block_for_threading (local_info->bb, rd);
 347       local_info->template_block = rd->dup_block;
 348
 349       /* We do not create any outgoing edges for the template.  We will
 350          take care of that in a later traversal.  That way we do not
 351          create edges that are going to just be deleted.  */
 352     }
 353   else
 354     {
 355       create_block_for_threading (local_info->template_block, rd);
 356
 357       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
 358          block.  */
 359       create_edge_and_update_destination_phis (rd, rd->dup_block);
 360     }
 361
 362   /* Keep walking the hash table.  */
 363   return 1;
 364 }
 365
 366 /* We did not create any outgoing edges for the template block during
 367    block creation.  This hash table traversal callback creates the
 368    outgoing edge for the template block.  */
 369
 370 static int
 371 fixup_template_block (void **slot, void *data)
 372 {
 373   struct redirection_data *rd = (struct redirection_data *) *slot;
 374   struct local_info *local_info = (struct local_info *)data;
 375
 376   /* If this is the template block, then create its outgoing edges
 377      and halt the hash table traversal.  */
 378   if (rd->dup_block && rd->dup_block == local_info->template_block)
 379     {
 380       create_edge_and_update_destination_phis (rd, rd->dup_block);
 381       return 0;
 382     }
 383
 384   return 1;
 385 }
 386
 387 /* Hash table traversal callback to redirect each incoming edge
 388    associated with this hash table element to its new destination.  */
 389
 390 static int
 391 redirect_edges (void **slot, void *data)
 392 {
 393   struct redirection_data *rd = (struct redirection_data *) *slot;
 394   struct local_info *local_info = (struct local_info *)data;
 395   struct el *next, *el;
 396
 397   /* Walk over all the incoming edges associated associated with this
 398      hash table entry.  */
 399   for (el = rd->incoming_edges; el; el = next)
 400     {
 401       edge e = el->e;
 402
 403       /* Go ahead and free this element from the list.  Doing this now
 404          avoids the need for another list walk when we destroy the hash
 405          table.  */
 406       next = el->next;
 407       free (el);
 408
 409       /* Go ahead and clear E->aux.  It's not needed anymore and failure
 410          to clear it will cause all kinds of unpleasant problems later.  */
 411       e->aux = NULL;
 412
 413       thread_stats.num_threaded_edges++;
 414
 415       if (rd->dup_block)
 416         {
 417           edge e2;
 418
 419           if (dump_file && (dump_flags & TDF_DETAILS))
 420             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
 421                      e->src->index, e->dest->index, rd->dup_block->index);
 422
 423           rd->dup_block->count += e->count;
 424           rd->dup_block->frequency += EDGE_FREQUENCY (e);
 425           EDGE_SUCC (rd->dup_block, 0)->count += e->count;
 426           /* Redirect the incoming edge to the appropriate duplicate
 427              block.  */
 428           e2 = redirect_edge_and_branch (e, rd->dup_block);
 429           gcc_assert (e == e2);
 430           flush_pending_stmts (e2);
 431         }
 432     }
 433
 434   /* Indicate that we actually threaded one or more jumps.  */
 435   if (rd->incoming_edges)
 436     local_info->jumps_threaded = true;
 437
 438   return 1;
 439 }
 440
 441 /* Return true if this block has no executable statements other than
 442    a simple ctrl flow instruction.  When the number of outgoing edges
 443    is one, this is equivalent to a "forwarder" block.  */
 444
 445 static bool
 446 redirection_block_p (basic_block bb)
 447 {
 448   gimple_stmt_iterator gsi;
 449
 450   /* Advance to the first executable statement.  */
 451   gsi = gsi_start_bb (bb);
 452   while (!gsi_end_p (gsi)
 453          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
 454              || is_gimple_debug (gsi_stmt (gsi))
 455              || gimple_nop_p (gsi_stmt (gsi))))
 456     gsi_next (&gsi);
 457
 458   /* Check if this is an empty block.  */
 459   if (gsi_end_p (gsi))
 460     return true;
 461
 462   /* Test that we've reached the terminating control statement.  */
 463   return gsi_stmt (gsi)
 464          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 465              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 466              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
 467 }
 468
 469 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
 470    is reached via one or more specific incoming edges, we know which
 471    outgoing edge from BB will be traversed.
 472
 473    We want to redirect those incoming edges to the target of the
 474    appropriate outgoing edge.  Doing so avoids a conditional branch
 475    and may expose new optimization opportunities.  Note that we have
 476    to update dominator tree and SSA graph after such changes.
 477
 478    The key to keeping the SSA graph update manageable is to duplicate
 479    the side effects occurring in BB so that those side effects still
 480    occur on the paths which bypass BB after redirecting edges.
 481
 482    We accomplish this by creating duplicates of BB and arranging for
 483    the duplicates to unconditionally pass control to one specific
 484    successor of BB.  We then revector the incoming edges into BB to
 485    the appropriate duplicate of BB.
 486
 487    If NOLOOP_ONLY is true, we only perform the threading as long as it
 488    does not affect the structure of the loops in a nontrivial way.  */
 489
 490 static bool
 491 thread_block (basic_block bb, bool noloop_only)
 492 {
 493   /* E is an incoming edge into BB that we may or may not want to
 494      redirect to a duplicate of BB.  */
 495   edge e, e2;
 496   edge_iterator ei;
 497   struct local_info local_info;
 498   struct loop *loop = bb->loop_father;
 499
 500   /* To avoid scanning a linear array for the element we need we instead
 501      use a hash table.  For normal code there should be no noticeable
 502      difference.  However, if we have a block with a large number of
 503      incoming and outgoing edges such linear searches can get expensive.  */
 504   redirection_data = htab_create (EDGE_COUNT (bb->succs),
 505                                   redirection_data_hash,
 506                                   redirection_data_eq,
 507                                   free);
 508
 509   /* If we thread the latch of the loop to its exit, the loop ceases to
 510      exist.  Make sure we do not restrict ourselves in order to preserve
 511      this loop.  */
 512   if (loop->header == bb)
 513     {
 514       e = loop_latch_edge (loop);
 515       e2 = (edge) e->aux;
 516
 517       if (e2 && loop_exit_edge_p (loop, e2))
 518         {
 519           loop->header = NULL;
 520           loop->latch = NULL;
 521         }
 522     }
 523
 524   /* Record each unique threaded destination into a hash table for
 525      efficient lookups.  */
 526   FOR_EACH_EDGE (e, ei, bb->preds)
 527     {
 528       e2 = (edge) e->aux;
 529
 530       if (!e2
 531           /* If NOLOOP_ONLY is true, we only allow threading through the
 532              header of a loop to exit edges.  */
 533           || (noloop_only
 534               && bb == bb->loop_father->header
 535               && !loop_exit_edge_p (bb->loop_father, e2)))
 536         continue;
 537
 538       if (e->dest == e2->src)
 539         update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e),
 540                                          e->count, (edge) e->aux);
 541
 542       /* Insert the outgoing edge into the hash table if it is not
 543          already in the hash table.  */
 544       lookup_redirection_data (e2, e, INSERT);
 545     }
 546
 547   /* We do not update dominance info.  */
 548   free_dominance_info (CDI_DOMINATORS);
 549
 550   /* Now create duplicates of BB.
 551
 552      Note that for a block with a high outgoing degree we can waste
 553      a lot of time and memory creating and destroying useless edges.
 554
 555      So we first duplicate BB and remove the control structure at the
 556      tail of the duplicate as well as all outgoing edges from the
 557      duplicate.  We then use that duplicate block as a template for
 558      the rest of the duplicates.  */
 559   local_info.template_block = NULL;
 560   local_info.bb = bb;
 561   local_info.jumps_threaded = false;
 562   htab_traverse (redirection_data, create_duplicates, &local_info);
 563
 564   /* The template does not have an outgoing edge.  Create that outgoing
 565      edge and update PHI nodes as the edge's target as necessary.
 566
 567      We do this after creating all the duplicates to avoid creating
 568      unnecessary edges.  */
 569   htab_traverse (redirection_data, fixup_template_block, &local_info);
 570
 571   /* The hash table traversals above created the duplicate blocks (and the
 572      statements within the duplicate blocks).  This loop creates PHI nodes for
 573      the duplicated blocks and redirects the incoming edges into BB to reach
 574      the duplicates of BB.  */
 575   htab_traverse (redirection_data, redirect_edges, &local_info);
 576
 577   /* Done with this block.  Clear REDIRECTION_DATA.  */
 578   htab_delete (redirection_data);
 579   redirection_data = NULL;
 580
 581   /* Indicate to our caller whether or not any jumps were threaded.  */
 582   return local_info.jumps_threaded;
 583 }
 584
 585 /* Threads edge E through E->dest to the edge E->aux.  Returns the copy
 586    of E->dest created during threading, or E->dest if it was not necessary
 587    to copy it (E is its single predecessor).  */
 588
 589 static basic_block
 590 thread_single_edge (edge e)
 591 {
 592   basic_block bb = e->dest;
 593   edge eto = (edge) e->aux;
 594   struct redirection_data rd;
 595
 596   e->aux = NULL;
 597
 598   thread_stats.num_threaded_edges++;
 599
 600   if (single_pred_p (bb))
 601     {
 602       /* If BB has just a single predecessor, we should only remove the
 603          control statements at its end, and successors except for ETO.  */
 604       remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
 605
 606       /* And fixup the flags on the single remaining edge.  */
 607       eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
 608       eto->flags |= EDGE_FALLTHRU;
 609
 610       return bb;
 611     }
 612
 613   /* Otherwise, we need to create a copy.  */
 614   if (e->dest == eto->src)
 615     update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
 616
 617   rd.outgoing_edge = eto;
 618
 619   create_block_for_threading (bb, &rd);
 620   create_edge_and_update_destination_phis (&rd, rd.dup_block);
 621
 622   if (dump_file && (dump_flags & TDF_DETAILS))
 623     fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
 624              e->src->index, e->dest->index, rd.dup_block->index);
 625
 626   rd.dup_block->count = e->count;
 627   rd.dup_block->frequency = EDGE_FREQUENCY (e);
 628   single_succ_edge (rd.dup_block)->count = e->count;
 629   redirect_edge_and_branch (e, rd.dup_block);
 630   flush_pending_stmts (e);
 631
 632   return rd.dup_block;
 633 }
 634
 635 /* Callback for dfs_enumerate_from.  Returns true if BB is different
 636    from STOP and DBDS_CE_STOP.  */
 637
 638 static basic_block dbds_ce_stop;
 639 static bool
 640 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
 641 {
 642   return (bb != (const_basic_block) stop
 643           && bb != dbds_ce_stop);
 644 }
 645
 646 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
 647    returns the state.  */
 648
 649 enum bb_dom_status
 650 {
 651   /* BB does not dominate latch of the LOOP.  */
 652   DOMST_NONDOMINATING,
 653   /* The LOOP is broken (there is no path from the header to its latch.  */
 654   DOMST_LOOP_BROKEN,
 655   /* BB dominates the latch of the LOOP.  */
 656   DOMST_DOMINATING
 657 };
 658
 659 static enum bb_dom_status
 660 determine_bb_domination_status (struct loop *loop, basic_block bb)
 661 {
 662   basic_block *bblocks;
 663   unsigned nblocks, i;
 664   bool bb_reachable = false;
 665   edge_iterator ei;
 666   edge e;
 667
 668 #ifdef ENABLE_CHECKING
 669   /* This function assumes BB is a successor of LOOP->header.
 670      If that is not the case return DOMST_NONDOMINATING which
 671      is always safe.  */
 672     {
 673       bool ok = false;
 674
 675       FOR_EACH_EDGE (e, ei, bb->preds)
 676         {
 677           if (e->src == loop->header)
 678             {
 679               ok = true;
 680               break;
 681             }
 682         }
 683
 684       if (!ok)
 685         return DOMST_NONDOMINATING;
 686     }
 687 #endif
 688
 689   if (bb == loop->latch)
 690     return DOMST_DOMINATING;
 691
 692   /* Check that BB dominates LOOP->latch, and that it is back-reachable
 693      from it.  */
 694
 695   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
 696   dbds_ce_stop = loop->header;
 697   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
 698                                 bblocks, loop->num_nodes, bb);
 699   for (i = 0; i < nblocks; i++)
 700     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
 701       {
 702         if (e->src == loop->header)
 703           {
 704             free (bblocks);
 705             return DOMST_NONDOMINATING;
 706           }
 707         if (e->src == bb)
 708           bb_reachable = true;
 709       }
 710
 711   free (bblocks);
 712   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
 713 }
 714
 715 /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
 716    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
 717    to the inside of the loop.  */
 718
 719 static bool
 720 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
 721 {
 722   basic_block header = loop->header;
 723   edge e, tgt_edge, latch = loop_latch_edge (loop);
 724   edge_iterator ei;
 725   basic_block tgt_bb, atgt_bb;
 726   enum bb_dom_status domst;
 727
 728   /* We have already threaded through headers to exits, so all the threading
 729      requests now are to the inside of the loop.  We need to avoid creating
 730      irreducible regions (i.e., loops with more than one entry block), and
 731      also loop with several latch edges, or new subloops of the loop (although
 732      there are cases where it might be appropriate, it is difficult to decide,
 733      and doing it wrongly may confuse other optimizers).
 734
 735      We could handle more general cases here.  However, the intention is to
 736      preserve some information about the loop, which is impossible if its
 737      structure changes significantly, in a way that is not well understood.
 738      Thus we only handle few important special cases, in which also updating
 739      of the loop-carried information should be feasible:
 740
 741      1) Propagation of latch edge to a block that dominates the latch block
 742         of a loop.  This aims to handle the following idiom:
 743
 744         first = 1;
 745         while (1)
 746           {
 747             if (first)
 748               initialize;
 749             first = 0;
 750             body;
 751           }
 752
 753         After threading the latch edge, this becomes
 754
 755         first = 1;
 756         if (first)
 757           initialize;
 758         while (1)
 759           {
 760             first = 0;
 761             body;
 762           }
 763
 764         The original header of the loop is moved out of it, and we may thread
 765         the remaining edges through it without further constraints.
 766
 767      2) All entry edges are propagated to a single basic block that dominates
 768         the latch block of the loop.  This aims to handle the following idiom
 769         (normally created for "for" loops):
 770
 771         i = 0;
 772         while (1)
 773           {
 774             if (i >= 100)
 775               break;
 776             body;
 777             i++;
 778           }
 779
 780         This becomes
 781
 782         i = 0;
 783         while (1)
 784           {
 785             body;
 786             i++;
 787             if (i >= 100)
 788               break;
 789           }
 790      */
 791
 792   /* Threading through the header won't improve the code if the header has just
 793      one successor.  */
 794   if (single_succ_p (header))
 795     goto fail;
 796
 797   if (latch->aux)
 798     {
 799       tgt_edge = (edge) latch->aux;
 800       tgt_bb = tgt_edge->dest;
 801     }
 802   else if (!may_peel_loop_headers
 803            && !redirection_block_p (loop->header))
 804     goto fail;
 805   else
 806     {
 807       tgt_bb = NULL;
 808       tgt_edge = NULL;
 809       FOR_EACH_EDGE (e, ei, header->preds)
 810         {
 811           if (!e->aux)
 812             {
 813               if (e == latch)
 814                 continue;
 815
 816               /* If latch is not threaded, and there is a header
 817                  edge that is not threaded, we would create loop
 818                  with multiple entries.  */
 819               goto fail;
 820             }
 821
 822           tgt_edge = (edge) e->aux;
 823           atgt_bb = tgt_edge->dest;
 824           if (!tgt_bb)
 825             tgt_bb = atgt_bb;
 826           /* Two targets of threading would make us create loop
 827              with multiple entries.  */
 828           else if (tgt_bb != atgt_bb)
 829             goto fail;
 830         }
 831
 832       if (!tgt_bb)
 833         {
 834           /* There are no threading requests.  */
 835           return false;
 836         }
 837
 838       /* Redirecting to empty loop latch is useless.  */
 839       if (tgt_bb == loop->latch
 840           && empty_block_p (loop->latch))
 841         goto fail;
 842     }
 843
 844   /* The target block must dominate the loop latch, otherwise we would be
 845      creating a subloop.  */
 846   domst = determine_bb_domination_status (loop, tgt_bb);
 847   if (domst == DOMST_NONDOMINATING)
 848     goto fail;
 849   if (domst == DOMST_LOOP_BROKEN)
 850     {
 851       /* If the loop ceased to exist, mark it as such, and thread through its
 852          original header.  */
 853       loop->header = NULL;
 854       loop->latch = NULL;
 855       return thread_block (header, false);
 856     }
 857
 858   if (tgt_bb->loop_father->header == tgt_bb)
 859     {
 860       /* If the target of the threading is a header of a subloop, we need
 861          to create a preheader for it, so that the headers of the two loops
 862          do not merge.  */
 863       if (EDGE_COUNT (tgt_bb->preds) > 2)
 864         {
 865           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
 866           gcc_assert (tgt_bb != NULL);
 867         }
 868       else
 869         tgt_bb = split_edge (tgt_edge);
 870     }
 871
 872   if (latch->aux)
 873     {
 874       /* First handle the case latch edge is redirected.  */
 875       loop->latch = thread_single_edge (latch);
 876       gcc_assert (single_succ (loop->latch) == tgt_bb);
 877       loop->header = tgt_bb;
 878
 879       /* Thread the remaining edges through the former header.  */
 880       thread_block (header, false);
 881     }
 882   else
 883     {
 884       basic_block new_preheader;
 885
 886       /* Now consider the case entry edges are redirected to the new entry
 887          block.  Remember one entry edge, so that we can find the new
 888         preheader (its destination after threading).  */
 889       FOR_EACH_EDGE (e, ei, header->preds)
 890         {
 891           if (e->aux)
 892             break;
 893         }
 894
 895       /* The duplicate of the header is the new preheader of the loop.  Ensure
 896          that it is placed correctly in the loop hierarchy.  */
 897       set_loop_copy (loop, loop_outer (loop));
 898
 899       thread_block (header, false);
 900       set_loop_copy (loop, NULL);
 901       new_preheader = e->dest;
 902
 903       /* Create the new latch block.  This is always necessary, as the latch
 904          must have only a single successor, but the original header had at
 905          least two successors.  */
 906       loop->latch = NULL;
 907       mfb_kj_edge = single_succ_edge (new_preheader);
 908       loop->header = mfb_kj_edge->dest;
 909       latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
 910       loop->header = latch->dest;
 911       loop->latch = latch->src;
 912     }
 913
 914   return true;
 915
 916 fail:
 917   /* We failed to thread anything.  Cancel the requests.  */
 918   FOR_EACH_EDGE (e, ei, header->preds)
 919     {
 920       e->aux = NULL;
 921     }
 922   return false;
 923 }
 924
 925 /* Walk through the registered jump threads and convert them into a
 926    form convenient for this pass.
 927
 928    Any block which has incoming edges threaded to outgoing edges
 929    will have its entry in THREADED_BLOCK set.
 930
 931    Any threaded edge will have its new outgoing edge stored in the
 932    original edge's AUX field.
 933
 934    This form avoids the need to walk all the edges in the CFG to
 935    discover blocks which need processing and avoids unnecessary
 936    hash table lookups to map from threaded edge to new target.  */
 937
 938 static void
 939 mark_threaded_blocks (bitmap threaded_blocks)
 940 {
 941   unsigned int i;
 942   bitmap_iterator bi;
 943   bitmap tmp = BITMAP_ALLOC (NULL);
 944   basic_block bb;
 945   edge e;
 946   edge_iterator ei;
 947
 948   for (i = 0; i < VEC_length (edge, threaded_edges); i += 2)
 949     {
 950       edge e = VEC_index (edge, threaded_edges, i);
 951       edge e2 = VEC_index (edge, threaded_edges, i + 1);
 952
 953       e->aux = e2;
 954       bitmap_set_bit (tmp, e->dest->index);
 955     }
 956
 957   /* If optimizing for size, only thread through block if we don't have
 958      to duplicate it or it's an otherwise empty redirection block.  */
 959   if (optimize_function_for_size_p (cfun))
 960     {
 961       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
 962         {
 963           bb = BASIC_BLOCK (i);
 964           if (EDGE_COUNT (bb->preds) > 1
 965               && !redirection_block_p (bb))
 966             {
 967               FOR_EACH_EDGE (e, ei, bb->preds)
 968                       e->aux = NULL;
 969             }
 970           else
 971             bitmap_set_bit (threaded_blocks, i);
 972         }
 973     }
 974   else
 975     bitmap_copy (threaded_blocks, tmp);
 976
 977   BITMAP_FREE(tmp);
 978 }
 979
 980
 981 /* Walk through all blocks and thread incoming edges to the appropriate
 982    outgoing edge for each edge pair recorded in THREADED_EDGES.
 983
 984    It is the caller's responsibility to fix the dominance information
 985    and rewrite duplicated SSA_NAMEs back into SSA form.
 986
 987    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
 988    loop headers if it does not simplify the loop.
 989
 990    Returns true if one or more edges were threaded, false otherwise.  */
 991
 992 bool
 993 thread_through_all_blocks (bool may_peel_loop_headers)
 994 {
 995   bool retval = false;
 996   unsigned int i;
 997   bitmap_iterator bi;
 998   bitmap threaded_blocks;
 999   struct loop *loop;
1000   loop_iterator li;
1001
1002   /* We must know about loops in order to preserve them.  */
1003   gcc_assert (current_loops != NULL);
1004
1005   if (threaded_edges == NULL)
1006     return false;
1007
1008   threaded_blocks = BITMAP_ALLOC (NULL);
1009   memset (&thread_stats, 0, sizeof (thread_stats));
1010
1011   mark_threaded_blocks (threaded_blocks);
1012
1013   initialize_original_copy_tables ();
1014
1015   /* First perform the threading requests that do not affect
1016      loop structure.  */
1017   EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
1018     {
1019       basic_block bb = BASIC_BLOCK (i);
1020
1021       if (EDGE_COUNT (bb->preds) > 0)
1022         retval |= thread_block (bb, true);
1023     }
1024
1025   /* Then perform the threading through loop headers.  We start with the
1026      innermost loop, so that the changes in cfg we perform won't affect
1027      further threading.  */
1028   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
1029     {
1030       if (!loop->header
1031           || !bitmap_bit_p (threaded_blocks, loop->header->index))
1032         continue;
1033
1034       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
1035     }
1036
1037   statistics_counter_event (cfun, "Jumps threaded",
1038                             thread_stats.num_threaded_edges);
1039
1040   free_original_copy_tables ();
1041
1042   BITMAP_FREE (threaded_blocks);
1043   threaded_blocks = NULL;
1044   VEC_free (edge, heap, threaded_edges);
1045   threaded_edges = NULL;
1046
1047   if (retval)
1048     loops_state_set (LOOPS_NEED_FIXUP);
1049
1050   return retval;
1051 }
1052
1053 /* Register a jump threading opportunity.  We queue up all the jump
1054    threading opportunities discovered by a pass and update the CFG
1055    and SSA form all at once.
1056
1057    E is the edge we can thread, E2 is the new target edge, i.e., we
1058    are effectively recording that E->dest can be changed to E2->dest
1059    after fixing the SSA graph.  */
1060
1061 void
1062 register_jump_thread (edge e, edge e2)
1063 {
1064   if (threaded_edges == NULL)
1065     threaded_edges = VEC_alloc (edge, heap, 10);
1066
1067   if (dump_file && (dump_flags & TDF_DETAILS)
1068       && e->dest != e2->src)
1069     fprintf (dump_file,
1070              "  Registering jump thread around one or more intermediate blocks\n");
1071
1072   VEC_safe_push (edge, heap, threaded_edges, e);
1073   VEC_safe_push (edge, heap, threaded_edges, e2);
1074 }