gcc/tree-ssa-threadupdate.c

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Free Software Foundation,
   3    Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "tree.h"
  26 #include "flags.h"
  27 #include "tm_p.h"
  28 #include "basic-block.h"
  29 #include "output.h"
  30 #include "function.h"
  31 #include "tree-flow.h"
  32 #include "tree-dump.h"
  33 #include "tree-pass.h"
  34 #include "cfgloop.h"
  35
  36 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  37    one or more in-edges to B to instead reach the destination of an
  38    out-edge from B while preserving any side effects in B.
  39
  40    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
  41    side effects of executing B.
  42
  43      1. Make a copy of B (including its outgoing edges and statements).  Call
  44         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  45
  46      2. Remove the control statement at the end of B' and all outgoing edges
  47         except B'->C.
  48
  49      3. Add a new argument to each PHI in C with the same value as the existing
  50         argument associated with edge B->C.  Associate the new PHI arguments
  51         with the edge B'->C.
  52
  53      4. For each PHI in B, find or create a PHI in B' with an identical
  54         PHI_RESULT.  Add an argument to the PHI in B' which has the same
  55         value as the PHI in B associated with the edge A->B.  Associate
  56         the new argument in the PHI in B' with the edge A->B.
  57
  58      5. Change the edge A->B to A->B'.
  59
  60         5a. This automatically deletes any PHI arguments associated with the
  61             edge A->B in B.
  62
  63         5b. This automatically associates each new argument added in step 4
  64             with the edge A->B'.
  65
  66      6. Repeat for other incoming edges into B.
  67
  68      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  69
  70    Note that block duplication can be minimized by first collecting the
  71    set of unique destination blocks that the incoming edges should
  72    be threaded to.
  73
  74    Block duplication can be further minimized by using B instead of
  75    creating B' for one destination if all edges into B are going to be
  76    threaded to a successor of B.  We had code to do this at one time, but
  77    I'm not convinced it is correct with the changes to avoid mucking up
  78    the loop structure (which may cancel threading requests, thus a block
  79    which we thought was going to become unreachable may still be reachable).
  80    This code was also going to get ugly with the introduction of the ability
  81    for a single jump thread request to bypass multiple blocks.
  82
  83    We further reduce the number of edges and statements we create by
  84    not copying all the outgoing edges and the control statement in
  85    step #1.  We instead create a template block without the outgoing
  86    edges and duplicate the template.  */
  87
  88
  89 /* Steps #5 and #6 of the above algorithm are best implemented by walking
  90    all the incoming edges which thread to the same destination edge at
  91    the same time.  That avoids lots of table lookups to get information
  92    for the destination edge.
  93
  94    To realize that implementation we create a list of incoming edges
  95    which thread to the same outgoing edge.  Thus to implement steps
  96    #5 and #6 we traverse our hash table of outgoing edge information.
  97    For each entry we walk the list of incoming edges which thread to
  98    the current outgoing edge.  */
  99
 100 struct el
 101 {
 102   edge e;
 103   struct el *next;
 104 };
 105
 106 /* Main data structure recording information regarding B's duplicate
 107    blocks.  */
 108
 109 /* We need to efficiently record the unique thread destinations of this
 110    block and specific information associated with those destinations.  We
 111    may have many incoming edges threaded to the same outgoing edge.  This
 112    can be naturally implemented with a hash table.  */
 113
 114 struct redirection_data
 115 {
 116   /* A duplicate of B with the trailing control statement removed and which
 117      targets a single successor of B.  */
 118   basic_block dup_block;
 119
 120   /* An outgoing edge from B.  DUP_BLOCK will have OUTGOING_EDGE->dest as
 121      its single successor.  */
 122   edge outgoing_edge;
 123
 124   /* A list of incoming edges which we want to thread to
 125      OUTGOING_EDGE->dest.  */
 126   struct el *incoming_edges;
 127 };
 128
 129 /* Main data structure to hold information for duplicates of BB.  */
 130 static htab_t redirection_data;
 131
 132 /* Data structure of information to pass to hash table traversal routines.  */
 133 struct local_info
 134 {
 135   /* The current block we are working on.  */
 136   basic_block bb;
 137
 138   /* A template copy of BB with no outgoing edges or control statement that
 139      we use for creating copies.  */
 140   basic_block template_block;
 141
 142   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
 143   bool jumps_threaded;
 144 };
 145
 146 /* Passes which use the jump threading code register jump threading
 147    opportunities as they are discovered.  We keep the registered
 148    jump threading opportunities in this vector as edge pairs
 149    (original_edge, target_edge).  */
 150 static VEC(edge,heap) *threaded_edges;
 151
 152
 153 /* Jump threading statistics.  */
 154
 155 struct thread_stats_d
 156 {
 157   unsigned long num_threaded_edges;
 158 };
 159
 160 struct thread_stats_d thread_stats;
 161
 162
 163 /* Remove the last statement in block BB if it is a control statement
 164    Also remove all outgoing edges except the edge which reaches DEST_BB.
 165    If DEST_BB is NULL, then remove all outgoing edges.  */
 166
 167 static void
 168 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 169 {
 170   gimple_stmt_iterator gsi;
 171   edge e;
 172   edge_iterator ei;
 173
 174   gsi = gsi_last_bb (bb);
 175
 176   /* If the duplicate ends with a control statement, then remove it.
 177
 178      Note that if we are duplicating the template block rather than the
 179      original basic block, then the duplicate might not have any real
 180      statements in it.  */
 181   if (!gsi_end_p (gsi)
 182       && gsi_stmt (gsi)
 183       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 184           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 185           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
 186     gsi_remove (&gsi, true);
 187
 188   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 189     {
 190       if (e->dest != dest_bb)
 191         remove_edge (e);
 192       else
 193         ei_next (&ei);
 194     }
 195 }
 196
 197 /* Create a duplicate of BB which only reaches the destination of the edge
 198    stored in RD.  Record the duplicate block in RD.  */
 199
 200 static void
 201 create_block_for_threading (basic_block bb, struct redirection_data *rd)
 202 {
 203   /* We can use the generic block duplication code and simply remove
 204      the stuff we do not need.  */
 205   rd->dup_block = duplicate_block (bb, NULL, NULL);
 206
 207   /* Zero out the profile, since the block is unreachable for now.  */
 208   rd->dup_block->frequency = 0;
 209   rd->dup_block->count = 0;
 210
 211   /* The call to duplicate_block will copy everything, including the
 212      useless COND_EXPR or SWITCH_EXPR at the end of BB.  We just remove
 213      the useless COND_EXPR or SWITCH_EXPR here rather than having a
 214      specialized block copier.  We also remove all outgoing edges
 215      from the duplicate block.  The appropriate edge will be created
 216      later.  */
 217   remove_ctrl_stmt_and_useless_edges (rd->dup_block, NULL);
 218 }
 219
 220 /* Hashing and equality routines for our hash table.  */
 221 static hashval_t
 222 redirection_data_hash (const void *p)
 223 {
 224   edge e = ((const struct redirection_data *)p)->outgoing_edge;
 225   return e->dest->index;
 226 }
 227
 228 static int
 229 redirection_data_eq (const void *p1, const void *p2)
 230 {
 231   edge e1 = ((const struct redirection_data *)p1)->outgoing_edge;
 232   edge e2 = ((const struct redirection_data *)p2)->outgoing_edge;
 233
 234   return e1 == e2;
 235 }
 236
 237 /* Given an outgoing edge E lookup and return its entry in our hash table.
 238
 239    If INSERT is true, then we insert the entry into the hash table if
 240    it is not already present.  INCOMING_EDGE is added to the list of incoming
 241    edges associated with E in the hash table.  */
 242
 243 static struct redirection_data *
 244 lookup_redirection_data (edge e, edge incoming_edge, enum insert_option insert)
 245 {
 246   void **slot;
 247   struct redirection_data *elt;
 248
 249  /* Build a hash table element so we can see if E is already
 250      in the table.  */
 251   elt = XNEW (struct redirection_data);
 252   elt->outgoing_edge = e;
 253   elt->dup_block = NULL;
 254   elt->incoming_edges = NULL;
 255
 256   slot = htab_find_slot (redirection_data, elt, insert);
 257
 258   /* This will only happen if INSERT is false and the entry is not
 259      in the hash table.  */
 260   if (slot == NULL)
 261     {
 262       free (elt);
 263       return NULL;
 264     }
 265
 266   /* This will only happen if E was not in the hash table and
 267      INSERT is true.  */
 268   if (*slot == NULL)
 269     {
 270       *slot = (void *)elt;
 271       elt->incoming_edges = XNEW (struct el);
 272       elt->incoming_edges->e = incoming_edge;
 273       elt->incoming_edges->next = NULL;
 274       return elt;
 275     }
 276   /* E was in the hash table.  */
 277   else
 278     {
 279       /* Free ELT as we do not need it anymore, we will extract the
 280          relevant entry from the hash table itself.  */
 281       free (elt);
 282
 283       /* Get the entry stored in the hash table.  */
 284       elt = (struct redirection_data *) *slot;
 285
 286       /* If insertion was requested, then we need to add INCOMING_EDGE
 287          to the list of incoming edges associated with E.  */
 288       if (insert)
 289         {
 290           struct el *el = XNEW (struct el);
 291           el->next = elt->incoming_edges;
 292           el->e = incoming_edge;
 293           elt->incoming_edges = el;
 294         }
 295
 296       return elt;
 297     }
 298 }
 299
 300 /* Given a duplicate block and its single destination (both stored
 301    in RD).  Create an edge between the duplicate and its single
 302    destination.
 303
 304    Add an additional argument to any PHI nodes at the single
 305    destination.  */
 306
 307 static void
 308 create_edge_and_update_destination_phis (struct redirection_data *rd,
 309                                          basic_block bb)
 310 {
 311   edge e = make_edge (bb, rd->outgoing_edge->dest, EDGE_FALLTHRU);
 312   gimple_stmt_iterator gsi;
 313
 314   rescan_loop_exit (e, true, false);
 315   e->probability = REG_BR_PROB_BASE;
 316   e->count = bb->count;
 317   e->aux = rd->outgoing_edge->aux;
 318
 319   /* If there are any PHI nodes at the destination of the outgoing edge
 320      from the duplicate block, then we will need to add a new argument
 321      to them.  The argument should have the same value as the argument
 322      associated with the outgoing edge stored in RD.  */
 323   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
 324     {
 325       gimple phi = gsi_stmt (gsi);
 326       source_location locus;
 327       int indx = rd->outgoing_edge->dest_idx;
 328
 329       locus = gimple_phi_arg_location (phi, indx);
 330       add_phi_arg (phi, gimple_phi_arg_def (phi, indx), e, locus);
 331     }
 332 }
 333
 334 /* Hash table traversal callback routine to create duplicate blocks.  */
 335
 336 static int
 337 create_duplicates (void **slot, void *data)
 338 {
 339   struct redirection_data *rd = (struct redirection_data *) *slot;
 340   struct local_info *local_info = (struct local_info *)data;
 341
 342   /* Create a template block if we have not done so already.  Otherwise
 343      use the template to create a new block.  */
 344   if (local_info->template_block == NULL)
 345     {
 346       create_block_for_threading (local_info->bb, rd);
 347       local_info->template_block = rd->dup_block;
 348
 349       /* We do not create any outgoing edges for the template.  We will
 350          take care of that in a later traversal.  That way we do not
 351          create edges that are going to just be deleted.  */
 352     }
 353   else
 354     {
 355       create_block_for_threading (local_info->template_block, rd);
 356
 357       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
 358          block.  */
 359       create_edge_and_update_destination_phis (rd, rd->dup_block);
 360     }
 361
 362   /* Keep walking the hash table.  */
 363   return 1;
 364 }
 365
 366 /* We did not create any outgoing edges for the template block during
 367    block creation.  This hash table traversal callback creates the
 368    outgoing edge for the template block.  */
 369
 370 static int
 371 fixup_template_block (void **slot, void *data)
 372 {
 373   struct redirection_data *rd = (struct redirection_data *) *slot;
 374   struct local_info *local_info = (struct local_info *)data;
 375
 376   /* If this is the template block, then create its outgoing edges
 377      and halt the hash table traversal.  */
 378   if (rd->dup_block && rd->dup_block == local_info->template_block)
 379     {
 380       create_edge_and_update_destination_phis (rd, rd->dup_block);
 381       return 0;
 382     }
 383
 384   return 1;
 385 }
 386
 387 /* Hash table traversal callback to redirect each incoming edge
 388    associated with this hash table element to its new destination.  */
 389
 390 static int
 391 redirect_edges (void **slot, void *data)
 392 {
 393   struct redirection_data *rd = (struct redirection_data *) *slot;
 394   struct local_info *local_info = (struct local_info *)data;
 395   struct el *next, *el;
 396
 397   /* Walk over all the incoming edges associated associated with this
 398      hash table entry.  */
 399   for (el = rd->incoming_edges; el; el = next)
 400     {
 401       edge e = el->e;
 402
 403       /* Go ahead and free this element from the list.  Doing this now
 404          avoids the need for another list walk when we destroy the hash
 405          table.  */
 406       next = el->next;
 407       free (el);
 408
 409       /* Go ahead and clear E->aux.  It's not needed anymore and failure
 410          to clear it will cause all kinds of unpleasant problems later.  */
 411       e->aux = NULL;
 412
 413       thread_stats.num_threaded_edges++;
 414
 415       if (rd->dup_block)
 416         {
 417           edge e2;
 418
 419           if (dump_file && (dump_flags & TDF_DETAILS))
 420             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
 421                      e->src->index, e->dest->index, rd->dup_block->index);
 422
 423           rd->dup_block->count += e->count;
 424           rd->dup_block->frequency += EDGE_FREQUENCY (e);
 425           EDGE_SUCC (rd->dup_block, 0)->count += e->count;
 426           /* Redirect the incoming edge to the appropriate duplicate
 427              block.  */
 428           e2 = redirect_edge_and_branch (e, rd->dup_block);
 429           gcc_assert (e == e2);
 430           flush_pending_stmts (e2);
 431         }
 432     }
 433
 434   /* Indicate that we actually threaded one or more jumps.  */
 435   if (rd->incoming_edges)
 436     local_info->jumps_threaded = true;
 437
 438   return 1;
 439 }
 440
 441 /* Return true if this block has no executable statements other than
 442    a simple ctrl flow instruction.  When the number of outgoing edges
 443    is one, this is equivalent to a "forwarder" block.  */
 444
 445 static bool
 446 redirection_block_p (basic_block bb)
 447 {
 448   gimple_stmt_iterator gsi;
 449
 450   /* Advance to the first executable statement.  */
 451   gsi = gsi_start_bb (bb);
 452   while (!gsi_end_p (gsi)
 453          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
 454              || is_gimple_debug (gsi_stmt (gsi))
 455              || gimple_nop_p (gsi_stmt (gsi))))
 456     gsi_next (&gsi);
 457
 458   /* Check if this is an empty block.  */
 459   if (gsi_end_p (gsi))
 460     return true;
 461
 462   /* Test that we've reached the terminating control statement.  */
 463   return gsi_stmt (gsi)
 464          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 465              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 466              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
 467 }
 468
 469 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
 470    is reached via one or more specific incoming edges, we know which
 471    outgoing edge from BB will be traversed.
 472
 473    We want to redirect those incoming edges to the target of the
 474    appropriate outgoing edge.  Doing so avoids a conditional branch
 475    and may expose new optimization opportunities.  Note that we have
 476    to update dominator tree and SSA graph after such changes.
 477
 478    The key to keeping the SSA graph update manageable is to duplicate
 479    the side effects occurring in BB so that those side effects still
 480    occur on the paths which bypass BB after redirecting edges.
 481
 482    We accomplish this by creating duplicates of BB and arranging for
 483    the duplicates to unconditionally pass control to one specific
 484    successor of BB.  We then revector the incoming edges into BB to
 485    the appropriate duplicate of BB.
 486
 487    If NOLOOP_ONLY is true, we only perform the threading as long as it
 488    does not affect the structure of the loops in a nontrivial way.  */
 489
 490 static bool
 491 thread_block (basic_block bb, bool noloop_only)
 492 {
 493   /* E is an incoming edge into BB that we may or may not want to
 494      redirect to a duplicate of BB.  */
 495   edge e, e2;
 496   edge_iterator ei;
 497   struct local_info local_info;
 498   struct loop *loop = bb->loop_father;
 499
 500   /* To avoid scanning a linear array for the element we need we instead
 501      use a hash table.  For normal code there should be no noticeable
 502      difference.  However, if we have a block with a large number of
 503      incoming and outgoing edges such linear searches can get expensive.  */
 504   redirection_data = htab_create (EDGE_COUNT (bb->succs),
 505                                   redirection_data_hash,
 506                                   redirection_data_eq,
 507                                   free);
 508
 509   /* If we thread the latch of the loop to its exit, the loop ceases to
 510      exist.  Make sure we do not restrict ourselves in order to preserve
 511      this loop.  */
 512   if (loop->header == bb)
 513     {
 514       e = loop_latch_edge (loop);
 515       e2 = (edge) e->aux;
 516
 517       if (e2 && loop_exit_edge_p (loop, e2))
 518         {
 519           loop->header = NULL;
 520           loop->latch = NULL;
 521         }
 522     }
 523
 524   /* Record each unique threaded destination into a hash table for
 525      efficient lookups.  */
 526   FOR_EACH_EDGE (e, ei, bb->preds)
 527     {
 528       e2 = (edge) e->aux;
 529
 530       if (!e2
 531           /* If NOLOOP_ONLY is true, we only allow threading through the
 532              header of a loop to exit edges.  */
 533           || (noloop_only
 534               && bb == bb->loop_father->header
 535               && !loop_exit_edge_p (bb->loop_father, e2)))
 536         continue;
 537
 538       if (e->dest == e2->src)
 539         update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e),
 540                                          e->count, (edge) e->aux);
 541
 542       /* Insert the outgoing edge into the hash table if it is not
 543          already in the hash table.  */
 544       lookup_redirection_data (e2, e, INSERT);
 545     }
 546
 547   /* We do not update dominance info.  */
 548   free_dominance_info (CDI_DOMINATORS);
 549
 550   /* Now create duplicates of BB.
 551
 552      Note that for a block with a high outgoing degree we can waste
 553      a lot of time and memory creating and destroying useless edges.
 554
 555      So we first duplicate BB and remove the control structure at the
 556      tail of the duplicate as well as all outgoing edges from the
 557      duplicate.  We then use that duplicate block as a template for
 558      the rest of the duplicates.  */
 559   local_info.template_block = NULL;
 560   local_info.bb = bb;
 561   local_info.jumps_threaded = false;
 562   htab_traverse (redirection_data, create_duplicates, &local_info);
 563
 564   /* The template does not have an outgoing edge.  Create that outgoing
 565      edge and update PHI nodes as the edge's target as necessary.
 566
 567      We do this after creating all the duplicates to avoid creating
 568      unnecessary edges.  */
 569   htab_traverse (redirection_data, fixup_template_block, &local_info);
 570
 571   /* The hash table traversals above created the duplicate blocks (and the
 572      statements within the duplicate blocks).  This loop creates PHI nodes for
 573      the duplicated blocks and redirects the incoming edges into BB to reach
 574      the duplicates of BB.  */
 575   htab_traverse (redirection_data, redirect_edges, &local_info);
 576
 577   /* Done with this block.  Clear REDIRECTION_DATA.  */
 578   htab_delete (redirection_data);
 579   redirection_data = NULL;
 580
 581   /* Indicate to our caller whether or not any jumps were threaded.  */
 582   return local_info.jumps_threaded;
 583 }
 584
 585 /* Threads edge E through E->dest to the edge E->aux.  Returns the copy
 586    of E->dest created during threading, or E->dest if it was not necessary
 587    to copy it (E is its single predecessor).  */
 588
 589 static basic_block
 590 thread_single_edge (edge e)
 591 {
 592   basic_block bb = e->dest;
 593   edge eto = (edge) e->aux;
 594   struct redirection_data rd;
 595
 596   e->aux = NULL;
 597
 598   thread_stats.num_threaded_edges++;
 599
 600   if (single_pred_p (bb))
 601     {
 602       /* If BB has just a single predecessor, we should only remove the
 603          control statements at its end, and successors except for ETO.  */
 604       remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
 605
 606       /* And fixup the flags on the single remaining edge.  */
 607       eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
 608       eto->flags |= EDGE_FALLTHRU;
 609
 610       return bb;
 611     }
 612
 613   /* Otherwise, we need to create a copy.  */
 614   if (e->dest == eto->src)
 615     update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
 616
 617   rd.outgoing_edge = eto;
 618
 619   create_block_for_threading (bb, &rd);
 620   create_edge_and_update_destination_phis (&rd, rd.dup_block);
 621
 622   if (dump_file && (dump_flags & TDF_DETAILS))
 623     fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
 624              e->src->index, e->dest->index, rd.dup_block->index);
 625
 626   rd.dup_block->count = e->count;
 627   rd.dup_block->frequency = EDGE_FREQUENCY (e);
 628   single_succ_edge (rd.dup_block)->count = e->count;
 629   redirect_edge_and_branch (e, rd.dup_block);
 630   flush_pending_stmts (e);
 631
 632   return rd.dup_block;
 633 }
 634
 635 /* Callback for dfs_enumerate_from.  Returns true if BB is different
 636    from STOP and DBDS_CE_STOP.  */
 637
 638 static basic_block dbds_ce_stop;
 639 static bool
 640 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
 641 {
 642   return (bb != (const_basic_block) stop
 643           && bb != dbds_ce_stop);
 644 }
 645
 646 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
 647    returns the state.  */
 648
 649 enum bb_dom_status
 650 {
 651   /* BB does not dominate latch of the LOOP.  */
 652   DOMST_NONDOMINATING,
 653   /* The LOOP is broken (there is no path from the header to its latch.  */
 654   DOMST_LOOP_BROKEN,
 655   /* BB dominates the latch of the LOOP.  */
 656   DOMST_DOMINATING
 657 };
 658
 659 static enum bb_dom_status
 660 determine_bb_domination_status (struct loop *loop, basic_block bb)
 661 {
 662   basic_block *bblocks;
 663   unsigned nblocks, i;
 664   bool bb_reachable = false;
 665   edge_iterator ei;
 666   edge e;
 667
 668   /* This function assumes BB is a successor of LOOP->header.
 669      If that is not the case return DOMST_NONDOMINATING which
 670      is always safe.  */
 671     {
 672       bool ok = false;
 673
 674       FOR_EACH_EDGE (e, ei, bb->preds)
 675         {
 676           if (e->src == loop->header)
 677             {
 678               ok = true;
 679               break;
 680             }
 681         }
 682
 683       if (!ok)
 684         return DOMST_NONDOMINATING;
 685     }
 686
 687   if (bb == loop->latch)
 688     return DOMST_DOMINATING;
 689
 690   /* Check that BB dominates LOOP->latch, and that it is back-reachable
 691      from it.  */
 692
 693   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
 694   dbds_ce_stop = loop->header;
 695   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
 696                                 bblocks, loop->num_nodes, bb);
 697   for (i = 0; i < nblocks; i++)
 698     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
 699       {
 700         if (e->src == loop->header)
 701           {
 702             free (bblocks);
 703             return DOMST_NONDOMINATING;
 704           }
 705         if (e->src == bb)
 706           bb_reachable = true;
 707       }
 708
 709   free (bblocks);
 710   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
 711 }
 712
 713 /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
 714    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
 715    to the inside of the loop.  */
 716
 717 static bool
 718 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
 719 {
 720   basic_block header = loop->header;
 721   edge e, tgt_edge, latch = loop_latch_edge (loop);
 722   edge_iterator ei;
 723   basic_block tgt_bb, atgt_bb;
 724   enum bb_dom_status domst;
 725
 726   /* We have already threaded through headers to exits, so all the threading
 727      requests now are to the inside of the loop.  We need to avoid creating
 728      irreducible regions (i.e., loops with more than one entry block), and
 729      also loop with several latch edges, or new subloops of the loop (although
 730      there are cases where it might be appropriate, it is difficult to decide,
 731      and doing it wrongly may confuse other optimizers).
 732
 733      We could handle more general cases here.  However, the intention is to
 734      preserve some information about the loop, which is impossible if its
 735      structure changes significantly, in a way that is not well understood.
 736      Thus we only handle few important special cases, in which also updating
 737      of the loop-carried information should be feasible:
 738
 739      1) Propagation of latch edge to a block that dominates the latch block
 740         of a loop.  This aims to handle the following idiom:
 741
 742         first = 1;
 743         while (1)
 744           {
 745             if (first)
 746               initialize;
 747             first = 0;
 748             body;
 749           }
 750
 751         After threading the latch edge, this becomes
 752
 753         first = 1;
 754         if (first)
 755           initialize;
 756         while (1)
 757           {
 758             first = 0;
 759             body;
 760           }
 761
 762         The original header of the loop is moved out of it, and we may thread
 763         the remaining edges through it without further constraints.
 764
 765      2) All entry edges are propagated to a single basic block that dominates
 766         the latch block of the loop.  This aims to handle the following idiom
 767         (normally created for "for" loops):
 768
 769         i = 0;
 770         while (1)
 771           {
 772             if (i >= 100)
 773               break;
 774             body;
 775             i++;
 776           }
 777
 778         This becomes
 779
 780         i = 0;
 781         while (1)
 782           {
 783             body;
 784             i++;
 785             if (i >= 100)
 786               break;
 787           }
 788      */
 789
 790   /* Threading through the header won't improve the code if the header has just
 791      one successor.  */
 792   if (single_succ_p (header))
 793     goto fail;
 794
 795   if (latch->aux)
 796     {
 797       tgt_edge = (edge) latch->aux;
 798       tgt_bb = tgt_edge->dest;
 799     }
 800   else if (!may_peel_loop_headers
 801            && !redirection_block_p (loop->header))
 802     goto fail;
 803   else
 804     {
 805       tgt_bb = NULL;
 806       tgt_edge = NULL;
 807       FOR_EACH_EDGE (e, ei, header->preds)
 808         {
 809           if (!e->aux)
 810             {
 811               if (e == latch)
 812                 continue;
 813
 814               /* If latch is not threaded, and there is a header
 815                  edge that is not threaded, we would create loop
 816                  with multiple entries.  */
 817               goto fail;
 818             }
 819
 820           tgt_edge = (edge) e->aux;
 821           atgt_bb = tgt_edge->dest;
 822           if (!tgt_bb)
 823             tgt_bb = atgt_bb;
 824           /* Two targets of threading would make us create loop
 825              with multiple entries.  */
 826           else if (tgt_bb != atgt_bb)
 827             goto fail;
 828         }
 829
 830       if (!tgt_bb)
 831         {
 832           /* There are no threading requests.  */
 833           return false;
 834         }
 835
 836       /* Redirecting to empty loop latch is useless.  */
 837       if (tgt_bb == loop->latch
 838           && empty_block_p (loop->latch))
 839         goto fail;
 840     }
 841
 842   /* The target block must dominate the loop latch, otherwise we would be
 843      creating a subloop.  */
 844   domst = determine_bb_domination_status (loop, tgt_bb);
 845   if (domst == DOMST_NONDOMINATING)
 846     goto fail;
 847   if (domst == DOMST_LOOP_BROKEN)
 848     {
 849       /* If the loop ceased to exist, mark it as such, and thread through its
 850          original header.  */
 851       loop->header = NULL;
 852       loop->latch = NULL;
 853       return thread_block (header, false);
 854     }
 855
 856   if (tgt_bb->loop_father->header == tgt_bb)
 857     {
 858       /* If the target of the threading is a header of a subloop, we need
 859          to create a preheader for it, so that the headers of the two loops
 860          do not merge.  */
 861       if (EDGE_COUNT (tgt_bb->preds) > 2)
 862         {
 863           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
 864           gcc_assert (tgt_bb != NULL);
 865         }
 866       else
 867         tgt_bb = split_edge (tgt_edge);
 868     }
 869
 870   if (latch->aux)
 871     {
 872       /* First handle the case latch edge is redirected.  */
 873       loop->latch = thread_single_edge (latch);
 874       gcc_assert (single_succ (loop->latch) == tgt_bb);
 875       loop->header = tgt_bb;
 876
 877       /* Thread the remaining edges through the former header.  */
 878       thread_block (header, false);
 879     }
 880   else
 881     {
 882       basic_block new_preheader;
 883
 884       /* Now consider the case entry edges are redirected to the new entry
 885          block.  Remember one entry edge, so that we can find the new
 886         preheader (its destination after threading).  */
 887       FOR_EACH_EDGE (e, ei, header->preds)
 888         {
 889           if (e->aux)
 890             break;
 891         }
 892
 893       /* The duplicate of the header is the new preheader of the loop.  Ensure
 894          that it is placed correctly in the loop hierarchy.  */
 895       set_loop_copy (loop, loop_outer (loop));
 896
 897       thread_block (header, false);
 898       set_loop_copy (loop, NULL);
 899       new_preheader = e->dest;
 900
 901       /* Create the new latch block.  This is always necessary, as the latch
 902          must have only a single successor, but the original header had at
 903          least two successors.  */
 904       loop->latch = NULL;
 905       mfb_kj_edge = single_succ_edge (new_preheader);
 906       loop->header = mfb_kj_edge->dest;
 907       latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
 908       loop->header = latch->dest;
 909       loop->latch = latch->src;
 910     }
 911
 912   return true;
 913
 914 fail:
 915   /* We failed to thread anything.  Cancel the requests.  */
 916   FOR_EACH_EDGE (e, ei, header->preds)
 917     {
 918       e->aux = NULL;
 919     }
 920   return false;
 921 }
 922
 923 /* Walk through the registered jump threads and convert them into a
 924    form convenient for this pass.
 925
 926    Any block which has incoming edges threaded to outgoing edges
 927    will have its entry in THREADED_BLOCK set.
 928
 929    Any threaded edge will have its new outgoing edge stored in the
 930    original edge's AUX field.
 931
 932    This form avoids the need to walk all the edges in the CFG to
 933    discover blocks which need processing and avoids unnecessary
 934    hash table lookups to map from threaded edge to new target.  */
 935
 936 static void
 937 mark_threaded_blocks (bitmap threaded_blocks)
 938 {
 939   unsigned int i;
 940   bitmap_iterator bi;
 941   bitmap tmp = BITMAP_ALLOC (NULL);
 942   basic_block bb;
 943   edge e;
 944   edge_iterator ei;
 945
 946   for (i = 0; i < VEC_length (edge, threaded_edges); i += 2)
 947     {
 948       edge e = VEC_index (edge, threaded_edges, i);
 949       edge e2 = VEC_index (edge, threaded_edges, i + 1);
 950
 951       e->aux = e2;
 952       bitmap_set_bit (tmp, e->dest->index);
 953     }
 954
 955   /* If optimizing for size, only thread through block if we don't have
 956      to duplicate it or it's an otherwise empty redirection block.  */
 957   if (optimize_function_for_size_p (cfun))
 958     {
 959       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
 960         {
 961           bb = BASIC_BLOCK (i);
 962           if (EDGE_COUNT (bb->preds) > 1
 963               && !redirection_block_p (bb))
 964             {
 965               FOR_EACH_EDGE (e, ei, bb->preds)
 966                       e->aux = NULL;
 967             }
 968           else
 969             bitmap_set_bit (threaded_blocks, i);
 970         }
 971     }
 972   else
 973     bitmap_copy (threaded_blocks, tmp);
 974
 975   BITMAP_FREE(tmp);
 976 }
 977
 978
 979 /* Walk through all blocks and thread incoming edges to the appropriate
 980    outgoing edge for each edge pair recorded in THREADED_EDGES.
 981
 982    It is the caller's responsibility to fix the dominance information
 983    and rewrite duplicated SSA_NAMEs back into SSA form.
 984
 985    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
 986    loop headers if it does not simplify the loop.
 987
 988    Returns true if one or more edges were threaded, false otherwise.  */
 989
 990 bool
 991 thread_through_all_blocks (bool may_peel_loop_headers)
 992 {
 993   bool retval = false;
 994   unsigned int i;
 995   bitmap_iterator bi;
 996   bitmap threaded_blocks;
 997   struct loop *loop;
 998   loop_iterator li;
 999
1000   /* We must know about loops in order to preserve them.  */
1001   gcc_assert (current_loops != NULL);
1002
1003   if (threaded_edges == NULL)
1004     return false;
1005
1006   threaded_blocks = BITMAP_ALLOC (NULL);
1007   memset (&thread_stats, 0, sizeof (thread_stats));
1008
1009   mark_threaded_blocks (threaded_blocks);
1010
1011   initialize_original_copy_tables ();
1012
1013   /* First perform the threading requests that do not affect
1014      loop structure.  */
1015   EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
1016     {
1017       basic_block bb = BASIC_BLOCK (i);
1018
1019       if (EDGE_COUNT (bb->preds) > 0)
1020         retval |= thread_block (bb, true);
1021     }
1022
1023   /* Then perform the threading through loop headers.  We start with the
1024      innermost loop, so that the changes in cfg we perform won't affect
1025      further threading.  */
1026   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
1027     {
1028       if (!loop->header
1029           || !bitmap_bit_p (threaded_blocks, loop->header->index))
1030         continue;
1031
1032       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
1033     }
1034
1035   statistics_counter_event (cfun, "Jumps threaded",
1036                             thread_stats.num_threaded_edges);
1037
1038   free_original_copy_tables ();
1039
1040   BITMAP_FREE (threaded_blocks);
1041   threaded_blocks = NULL;
1042   VEC_free (edge, heap, threaded_edges);
1043   threaded_edges = NULL;
1044
1045   if (retval)
1046     loops_state_set (LOOPS_NEED_FIXUP);
1047
1048   return retval;
1049 }
1050
1051 /* Register a jump threading opportunity.  We queue up all the jump
1052    threading opportunities discovered by a pass and update the CFG
1053    and SSA form all at once.
1054
1055    E is the edge we can thread, E2 is the new target edge, i.e., we
1056    are effectively recording that E->dest can be changed to E2->dest
1057    after fixing the SSA graph.  */
1058
1059 void
1060 register_jump_thread (edge e, edge e2)
1061 {
1062   if (threaded_edges == NULL)
1063     threaded_edges = VEC_alloc (edge, heap, 10);
1064
1065   if (dump_file && (dump_flags & TDF_DETAILS)
1066       && e->dest != e2->src)
1067     fprintf (dump_file,
1068              "  Registering jump thread around one or more intermediate blocks\n");
1069
1070   VEC_safe_push (edge, heap, threaded_edges, e);
1071   VEC_safe_push (edge, heap, threaded_edges, e2);
1072 }