gcc/tree-ssa-loop-manip.c

   1 /* High-level loop manipulation functions.
   2    Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "tree.h"
  26 #include "rtl.h"
  27 #include "tm_p.h"
  28 #include "hard-reg-set.h"
  29 #include "basic-block.h"
  30 #include "output.h"
  31 #include "diagnostic.h"
  32 #include "tree-flow.h"
  33 #include "tree-dump.h"
  34 #include "timevar.h"
  35 #include "cfgloop.h"
  36 #include "tree-pass.h"
  37 #include "cfglayout.h"
  38 #include "tree-scalar-evolution.h"
  39 #include "params.h"
  40 #include "tree-inline.h"
  41
  42 /* Creates an induction variable with value BASE + STEP * iteration in LOOP.
  43    It is expected that neither BASE nor STEP are shared with other expressions
  44    (unless the sharing rules allow this).  Use VAR as a base var_decl for it
  45    (if NULL, a new temporary will be created).  The increment will occur at
  46    INCR_POS (after it if AFTER is true, before it otherwise).  INCR_POS and
  47    AFTER can be computed using standard_iv_increment_position.  The ssa versions
  48    of the variable before and after increment will be stored in VAR_BEFORE and
  49    VAR_AFTER (unless they are NULL).  */
  50
  51 void
  52 create_iv (tree base, tree step, tree var, struct loop *loop,
  53            block_stmt_iterator *incr_pos, bool after,
  54            tree *var_before, tree *var_after)
  55 {
  56   tree stmt, initial, step1, stmts;
  57   tree vb, va;
  58   enum tree_code incr_op = PLUS_EXPR;
  59   edge pe = loop_preheader_edge (loop);
  60
  61   if (!var)
  62     {
  63       var = create_tmp_var (TREE_TYPE (base), "ivtmp");
  64       add_referenced_var (var);
  65     }
  66
  67   vb = make_ssa_name (var, NULL_TREE);
  68   if (var_before)
  69     *var_before = vb;
  70   va = make_ssa_name (var, NULL_TREE);
  71   if (var_after)
  72     *var_after = va;
  73
  74   /* For easier readability of the created code, produce MINUS_EXPRs
  75      when suitable.  */
  76   if (TREE_CODE (step) == INTEGER_CST)
  77     {
  78       if (TYPE_UNSIGNED (TREE_TYPE (step)))
  79         {
  80           step1 = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
  81           if (tree_int_cst_lt (step1, step))
  82             {
  83               incr_op = MINUS_EXPR;
  84               step = step1;
  85             }
  86         }
  87       else
  88         {
  89           bool ovf;
  90
  91           if (!tree_expr_nonnegative_warnv_p (step, &ovf)
  92               && may_negate_without_overflow_p (step))
  93             {
  94               incr_op = MINUS_EXPR;
  95               step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
  96             }
  97         }
  98     }
  99   if (POINTER_TYPE_P (TREE_TYPE (base)))
 100     {
 101       step = fold_convert (sizetype, step);
 102       if (incr_op == MINUS_EXPR)
 103         step = fold_build1 (NEGATE_EXPR, sizetype, step);
 104       incr_op = POINTER_PLUS_EXPR;
 105     }
 106   /* Gimplify the step if necessary.  We put the computations in front of the
 107      loop (i.e. the step should be loop invariant).  */
 108   step = force_gimple_operand (step, &stmts, true, var);
 109   if (stmts)
 110     bsi_insert_on_edge_immediate (pe, stmts);
 111
 112   stmt = build_gimple_modify_stmt (va,
 113                                    build2 (incr_op, TREE_TYPE (base),
 114                                            vb, step));
 115   SSA_NAME_DEF_STMT (va) = stmt;
 116   if (after)
 117     bsi_insert_after (incr_pos, stmt, BSI_NEW_STMT);
 118   else
 119     bsi_insert_before (incr_pos, stmt, BSI_NEW_STMT);
 120
 121   initial = force_gimple_operand (base, &stmts, true, var);
 122   if (stmts)
 123     bsi_insert_on_edge_immediate (pe, stmts);
 124
 125   stmt = create_phi_node (vb, loop->header);
 126   SSA_NAME_DEF_STMT (vb) = stmt;
 127   add_phi_arg (stmt, initial, loop_preheader_edge (loop));
 128   add_phi_arg (stmt, va, loop_latch_edge (loop));
 129 }
 130
 131 /* Add exit phis for the USE on EXIT.  */
 132
 133 static void
 134 add_exit_phis_edge (basic_block exit, tree use)
 135 {
 136   tree phi, def_stmt = SSA_NAME_DEF_STMT (use);
 137   basic_block def_bb = bb_for_stmt (def_stmt);
 138   struct loop *def_loop;
 139   edge e;
 140   edge_iterator ei;
 141
 142   /* Check that some of the edges entering the EXIT block exits a loop in
 143      that USE is defined.  */
 144   FOR_EACH_EDGE (e, ei, exit->preds)
 145     {
 146       def_loop = find_common_loop (def_bb->loop_father, e->src->loop_father);
 147       if (!flow_bb_inside_loop_p (def_loop, e->dest))
 148         break;
 149     }
 150
 151   if (!e)
 152     return;
 153
 154   phi = create_phi_node (use, exit);
 155   create_new_def_for (PHI_RESULT (phi), phi, PHI_RESULT_PTR (phi));
 156   FOR_EACH_EDGE (e, ei, exit->preds)
 157     add_phi_arg (phi, use, e);
 158 }
 159
 160 /* Add exit phis for VAR that is used in LIVEIN.
 161    Exits of the loops are stored in EXITS.  */
 162
 163 static void
 164 add_exit_phis_var (tree var, bitmap livein, bitmap exits)
 165 {
 166   bitmap def;
 167   unsigned index;
 168   basic_block def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 169   bitmap_iterator bi;
 170
 171   if (is_gimple_reg (var))
 172     bitmap_clear_bit (livein, def_bb->index);
 173   else
 174     bitmap_set_bit (livein, def_bb->index);
 175
 176   def = BITMAP_ALLOC (NULL);
 177   bitmap_set_bit (def, def_bb->index);
 178   compute_global_livein (livein, def);
 179   BITMAP_FREE (def);
 180
 181   EXECUTE_IF_AND_IN_BITMAP (exits, livein, 0, index, bi)
 182     {
 183       add_exit_phis_edge (BASIC_BLOCK (index), var);
 184     }
 185 }
 186
 187 /* Add exit phis for the names marked in NAMES_TO_RENAME.
 188    Exits of the loops are stored in EXITS.  Sets of blocks where the ssa
 189    names are used are stored in USE_BLOCKS.  */
 190
 191 static void
 192 add_exit_phis (bitmap names_to_rename, bitmap *use_blocks, bitmap loop_exits)
 193 {
 194   unsigned i;
 195   bitmap_iterator bi;
 196
 197   EXECUTE_IF_SET_IN_BITMAP (names_to_rename, 0, i, bi)
 198     {
 199       add_exit_phis_var (ssa_name (i), use_blocks[i], loop_exits);
 200     }
 201 }
 202
 203 /* Returns a bitmap of all loop exit edge targets.  */
 204
 205 static bitmap
 206 get_loops_exits (void)
 207 {
 208   bitmap exits = BITMAP_ALLOC (NULL);
 209   basic_block bb;
 210   edge e;
 211   edge_iterator ei;
 212
 213   FOR_EACH_BB (bb)
 214     {
 215       FOR_EACH_EDGE (e, ei, bb->preds)
 216         if (e->src != ENTRY_BLOCK_PTR
 217             && !flow_bb_inside_loop_p (e->src->loop_father, bb))
 218           {
 219             bitmap_set_bit (exits, bb->index);
 220             break;
 221           }
 222     }
 223
 224   return exits;
 225 }
 226
 227 /* For USE in BB, if it is used outside of the loop it is defined in,
 228    mark it for rewrite.  Record basic block BB where it is used
 229    to USE_BLOCKS.  Record the ssa name index to NEED_PHIS bitmap.  */
 230
 231 static void
 232 find_uses_to_rename_use (basic_block bb, tree use, bitmap *use_blocks,
 233                          bitmap need_phis)
 234 {
 235   unsigned ver;
 236   basic_block def_bb;
 237   struct loop *def_loop;
 238
 239   if (TREE_CODE (use) != SSA_NAME)
 240     return;
 241
 242   /* We don't need to keep virtual operands in loop-closed form.  */
 243   if (!is_gimple_reg (use))
 244     return;
 245
 246   ver = SSA_NAME_VERSION (use);
 247   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
 248   if (!def_bb)
 249     return;
 250   def_loop = def_bb->loop_father;
 251
 252   /* If the definition is not inside loop, it is not interesting.  */
 253   if (!loop_outer (def_loop))
 254     return;
 255
 256   if (!use_blocks[ver])
 257     use_blocks[ver] = BITMAP_ALLOC (NULL);
 258   bitmap_set_bit (use_blocks[ver], bb->index);
 259
 260   bitmap_set_bit (need_phis, ver);
 261 }
 262
 263 /* For uses in STMT, mark names that are used outside of the loop they are
 264    defined to rewrite.  Record the set of blocks in that the ssa
 265    names are defined to USE_BLOCKS and the ssa names themselves to
 266    NEED_PHIS.  */
 267
 268 static void
 269 find_uses_to_rename_stmt (tree stmt, bitmap *use_blocks, bitmap need_phis)
 270 {
 271   ssa_op_iter iter;
 272   tree var;
 273   basic_block bb = bb_for_stmt (stmt);
 274
 275   FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_ALL_USES)
 276     find_uses_to_rename_use (bb, var, use_blocks, need_phis);
 277 }
 278
 279 /* Marks names that are used in BB and outside of the loop they are
 280    defined in for rewrite.  Records the set of blocks in that the ssa
 281    names are defined to USE_BLOCKS.  Record the SSA names that will
 282    need exit PHIs in NEED_PHIS.  */
 283
 284 static void
 285 find_uses_to_rename_bb (basic_block bb, bitmap *use_blocks, bitmap need_phis)
 286 {
 287   block_stmt_iterator bsi;
 288   edge e;
 289   edge_iterator ei;
 290   tree phi;
 291
 292   FOR_EACH_EDGE (e, ei, bb->succs)
 293     for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 294       find_uses_to_rename_use (bb, PHI_ARG_DEF_FROM_EDGE (phi, e),
 295                                use_blocks, need_phis);
 296
 297   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 298     find_uses_to_rename_stmt (bsi_stmt (bsi), use_blocks, need_phis);
 299 }
 300
 301 /* Marks names that are used outside of the loop they are defined in
 302    for rewrite.  Records the set of blocks in that the ssa
 303    names are defined to USE_BLOCKS.  If CHANGED_BBS is not NULL,
 304    scan only blocks in this set.  */
 305
 306 static void
 307 find_uses_to_rename (bitmap changed_bbs, bitmap *use_blocks, bitmap need_phis)
 308 {
 309   basic_block bb;
 310   unsigned index;
 311   bitmap_iterator bi;
 312
 313   if (changed_bbs && !bitmap_empty_p (changed_bbs))
 314     {
 315       EXECUTE_IF_SET_IN_BITMAP (changed_bbs, 0, index, bi)
 316         {
 317           find_uses_to_rename_bb (BASIC_BLOCK (index), use_blocks, need_phis);
 318         }
 319     }
 320   else
 321     {
 322       FOR_EACH_BB (bb)
 323         {
 324           find_uses_to_rename_bb (bb, use_blocks, need_phis);
 325         }
 326     }
 327 }
 328
 329 /* Rewrites the program into a loop closed ssa form -- i.e. inserts extra
 330    phi nodes to ensure that no variable is used outside the loop it is
 331    defined in.
 332
 333    This strengthening of the basic ssa form has several advantages:
 334
 335    1) Updating it during unrolling/peeling/versioning is trivial, since
 336       we do not need to care about the uses outside of the loop.
 337    2) The behavior of all uses of an induction variable is the same.
 338       Without this, you need to distinguish the case when the variable
 339       is used outside of the loop it is defined in, for example
 340
 341       for (i = 0; i < 100; i++)
 342         {
 343           for (j = 0; j < 100; j++)
 344             {
 345               k = i + j;
 346               use1 (k);
 347             }
 348           use2 (k);
 349         }
 350
 351       Looking from the outer loop with the normal SSA form, the first use of k
 352       is not well-behaved, while the second one is an induction variable with
 353       base 99 and step 1.
 354
 355       If CHANGED_BBS is not NULL, we look for uses outside loops only in
 356       the basic blocks in this set.
 357
 358       UPDATE_FLAG is used in the call to update_ssa.  See
 359       TODO_update_ssa* for documentation.  */
 360
 361 void
 362 rewrite_into_loop_closed_ssa (bitmap changed_bbs, unsigned update_flag)
 363 {
 364   bitmap loop_exits;
 365   bitmap *use_blocks;
 366   unsigned i, old_num_ssa_names;
 367   bitmap names_to_rename;
 368
 369   current_loops->state |= LOOP_CLOSED_SSA;
 370   if (number_of_loops () <= 1)
 371     return;
 372
 373   loop_exits = get_loops_exits ();
 374   names_to_rename = BITMAP_ALLOC (NULL);
 375
 376   /* If the pass has caused the SSA form to be out-of-date, update it
 377      now.  */
 378   update_ssa (update_flag);
 379
 380   old_num_ssa_names = num_ssa_names;
 381   use_blocks = XCNEWVEC (bitmap, old_num_ssa_names);
 382
 383   /* Find the uses outside loops.  */
 384   find_uses_to_rename (changed_bbs, use_blocks, names_to_rename);
 385
 386   /* Add the PHI nodes on exits of the loops for the names we need to
 387      rewrite.  */
 388   add_exit_phis (names_to_rename, use_blocks, loop_exits);
 389
 390   for (i = 0; i < old_num_ssa_names; i++)
 391     BITMAP_FREE (use_blocks[i]);
 392   free (use_blocks);
 393   BITMAP_FREE (loop_exits);
 394   BITMAP_FREE (names_to_rename);
 395
 396   /* Fix up all the names found to be used outside their original
 397      loops.  */
 398   update_ssa (TODO_update_ssa);
 399 }
 400
 401 /* Check invariants of the loop closed ssa form for the USE in BB.  */
 402
 403 static void
 404 check_loop_closed_ssa_use (basic_block bb, tree use)
 405 {
 406   tree def;
 407   basic_block def_bb;
 408
 409   if (TREE_CODE (use) != SSA_NAME || !is_gimple_reg (use))
 410     return;
 411
 412   def = SSA_NAME_DEF_STMT (use);
 413   def_bb = bb_for_stmt (def);
 414   gcc_assert (!def_bb
 415               || flow_bb_inside_loop_p (def_bb->loop_father, bb));
 416 }
 417
 418 /* Checks invariants of loop closed ssa form in statement STMT in BB.  */
 419
 420 static void
 421 check_loop_closed_ssa_stmt (basic_block bb, tree stmt)
 422 {
 423   ssa_op_iter iter;
 424   tree var;
 425
 426   FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_ALL_USES)
 427     check_loop_closed_ssa_use (bb, var);
 428 }
 429
 430 /* Checks that invariants of the loop closed ssa form are preserved.  */
 431
 432 void
 433 verify_loop_closed_ssa (void)
 434 {
 435   basic_block bb;
 436   block_stmt_iterator bsi;
 437   tree phi;
 438   unsigned i;
 439
 440   if (number_of_loops () <= 1)
 441     return;
 442
 443   verify_ssa (false);
 444
 445   FOR_EACH_BB (bb)
 446     {
 447       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
 448         for (i = 0; i < (unsigned) PHI_NUM_ARGS (phi); i++)
 449           check_loop_closed_ssa_use (PHI_ARG_EDGE (phi, i)->src,
 450                                      PHI_ARG_DEF (phi, i));
 451
 452       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 453         check_loop_closed_ssa_stmt (bb, bsi_stmt (bsi));
 454     }
 455 }
 456
 457 /* Split loop exit edge EXIT.  The things are a bit complicated by a need to
 458    preserve the loop closed ssa form.  */
 459
 460 void
 461 split_loop_exit_edge (edge exit)
 462 {
 463   basic_block dest = exit->dest;
 464   basic_block bb = split_edge (exit);
 465   tree phi, new_phi, new_name, name;
 466   use_operand_p op_p;
 467
 468   for (phi = phi_nodes (dest); phi; phi = PHI_CHAIN (phi))
 469     {
 470       op_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (bb));
 471
 472       name = USE_FROM_PTR (op_p);
 473
 474       /* If the argument of the PHI node is a constant, we do not need
 475          to keep it inside loop.  */
 476       if (TREE_CODE (name) != SSA_NAME)
 477         continue;
 478
 479       /* Otherwise create an auxiliary phi node that will copy the value
 480          of the SSA name out of the loop.  */
 481       new_name = duplicate_ssa_name (name, NULL);
 482       new_phi = create_phi_node (new_name, bb);
 483       SSA_NAME_DEF_STMT (new_name) = new_phi;
 484       add_phi_arg (new_phi, name, exit);
 485       SET_USE (op_p, new_name);
 486     }
 487 }
 488
 489 /* Returns the basic block in that statements should be emitted for induction
 490    variables incremented at the end of the LOOP.  */
 491
 492 basic_block
 493 ip_end_pos (struct loop *loop)
 494 {
 495   return loop->latch;
 496 }
 497
 498 /* Returns the basic block in that statements should be emitted for induction
 499    variables incremented just before exit condition of a LOOP.  */
 500
 501 basic_block
 502 ip_normal_pos (struct loop *loop)
 503 {
 504   tree last;
 505   basic_block bb;
 506   edge exit;
 507
 508   if (!single_pred_p (loop->latch))
 509     return NULL;
 510
 511   bb = single_pred (loop->latch);
 512   last = last_stmt (bb);
 513   if (!last
 514       || TREE_CODE (last) != COND_EXPR)
 515     return NULL;
 516
 517   exit = EDGE_SUCC (bb, 0);
 518   if (exit->dest == loop->latch)
 519     exit = EDGE_SUCC (bb, 1);
 520
 521   if (flow_bb_inside_loop_p (loop, exit->dest))
 522     return NULL;
 523
 524   return bb;
 525 }
 526
 527 /* Stores the standard position for induction variable increment in LOOP
 528    (just before the exit condition if it is available and latch block is empty,
 529    end of the latch block otherwise) to BSI.  INSERT_AFTER is set to true if
 530    the increment should be inserted after *BSI.  */
 531
 532 void
 533 standard_iv_increment_position (struct loop *loop, block_stmt_iterator *bsi,
 534                                 bool *insert_after)
 535 {
 536   basic_block bb = ip_normal_pos (loop), latch = ip_end_pos (loop);
 537   tree last = last_stmt (latch);
 538
 539   if (!bb
 540       || (last && TREE_CODE (last) != LABEL_EXPR))
 541     {
 542       *bsi = bsi_last (latch);
 543       *insert_after = true;
 544     }
 545   else
 546     {
 547       *bsi = bsi_last (bb);
 548       *insert_after = false;
 549     }
 550 }
 551
 552 /* Copies phi node arguments for duplicated blocks.  The index of the first
 553    duplicated block is FIRST_NEW_BLOCK.  */
 554
 555 static void
 556 copy_phi_node_args (unsigned first_new_block)
 557 {
 558   unsigned i;
 559
 560   for (i = first_new_block; i < (unsigned) last_basic_block; i++)
 561     BASIC_BLOCK (i)->flags |= BB_DUPLICATED;
 562
 563   for (i = first_new_block; i < (unsigned) last_basic_block; i++)
 564     add_phi_args_after_copy_bb (BASIC_BLOCK (i));
 565
 566   for (i = first_new_block; i < (unsigned) last_basic_block; i++)
 567     BASIC_BLOCK (i)->flags &= ~BB_DUPLICATED;
 568 }
 569
 570
 571 /* The same as cfgloopmanip.c:duplicate_loop_to_header_edge, but also
 572    updates the PHI nodes at start of the copied region.  In order to
 573    achieve this, only loops whose exits all lead to the same location
 574    are handled.
 575
 576    Notice that we do not completely update the SSA web after
 577    duplication.  The caller is responsible for calling update_ssa
 578    after the loop has been duplicated.  */
 579
 580 bool
 581 tree_duplicate_loop_to_header_edge (struct loop *loop, edge e,
 582                                     unsigned int ndupl, sbitmap wont_exit,
 583                                     edge orig, VEC (edge, heap) **to_remove,
 584                                     int flags)
 585 {
 586   unsigned first_new_block;
 587
 588   if (!(current_loops->state & LOOPS_HAVE_SIMPLE_LATCHES))
 589     return false;
 590   if (!(current_loops->state & LOOPS_HAVE_PREHEADERS))
 591     return false;
 592
 593 #ifdef ENABLE_CHECKING
 594   verify_loop_closed_ssa ();
 595 #endif
 596
 597   first_new_block = last_basic_block;
 598   if (!duplicate_loop_to_header_edge (loop, e, ndupl, wont_exit,
 599                                       orig, to_remove, flags))
 600     return false;
 601
 602   /* Readd the removed phi args for e.  */
 603   flush_pending_stmts (e);
 604
 605   /* Copy the phi node arguments.  */
 606   copy_phi_node_args (first_new_block);
 607
 608   scev_reset ();
 609
 610   return true;
 611 }
 612
 613 /* Returns true if we can unroll LOOP FACTOR times.  Number
 614    of iterations of the loop is returned in NITER.  */
 615
 616 bool
 617 can_unroll_loop_p (struct loop *loop, unsigned factor,
 618                    struct tree_niter_desc *niter)
 619 {
 620   edge exit;
 621
 622   /* Check whether unrolling is possible.  We only want to unroll loops
 623      for that we are able to determine number of iterations.  We also
 624      want to split the extra iterations of the loop from its end,
 625      therefore we require that the loop has precisely one
 626      exit.  */
 627
 628   exit = single_dom_exit (loop);
 629   if (!exit)
 630     return false;
 631
 632   if (!number_of_iterations_exit (loop, exit, niter, false)
 633       || niter->cmp == ERROR_MARK
 634       /* Scalar evolutions analysis might have copy propagated
 635          the abnormal ssa names into these expressions, hence
 636          emitting the computations based on them during loop
 637          unrolling might create overlapping life ranges for
 638          them, and failures in out-of-ssa.  */
 639       || contains_abnormal_ssa_name_p (niter->may_be_zero)
 640       || contains_abnormal_ssa_name_p (niter->control.base)
 641       || contains_abnormal_ssa_name_p (niter->control.step)
 642       || contains_abnormal_ssa_name_p (niter->bound))
 643     return false;
 644
 645   /* And of course, we must be able to duplicate the loop.  */
 646   if (!can_duplicate_loop_p (loop))
 647     return false;
 648
 649   /* The final loop should be small enough.  */
 650   if (tree_num_loop_insns (loop, &eni_size_weights) * factor
 651       > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS))
 652     return false;
 653
 654   return true;
 655 }
 656
 657 /* Determines the conditions that control execution of LOOP unrolled FACTOR
 658    times.  DESC is number of iterations of LOOP.  ENTER_COND is set to
 659    condition that must be true if the main loop can be entered.
 660    EXIT_BASE, EXIT_STEP, EXIT_CMP and EXIT_BOUND are set to values describing
 661    how the exit from the unrolled loop should be controlled.  */
 662
 663 static void
 664 determine_exit_conditions (struct loop *loop, struct tree_niter_desc *desc,
 665                            unsigned factor, tree *enter_cond,
 666                            tree *exit_base, tree *exit_step,
 667                            enum tree_code *exit_cmp, tree *exit_bound)
 668 {
 669   tree stmts;
 670   tree base = desc->control.base;
 671   tree step = desc->control.step;
 672   tree bound = desc->bound;
 673   tree type = TREE_TYPE (base);
 674   tree bigstep, delta;
 675   tree min = lower_bound_in_type (type, type);
 676   tree max = upper_bound_in_type (type, type);
 677   enum tree_code cmp = desc->cmp;
 678   tree cond = boolean_true_node, assum;
 679
 680   *enter_cond = boolean_false_node;
 681   *exit_base = NULL_TREE;
 682   *exit_step = NULL_TREE;
 683   *exit_cmp = ERROR_MARK;
 684   *exit_bound = NULL_TREE;
 685   gcc_assert (cmp != ERROR_MARK);
 686
 687   /* We only need to be correct when we answer question
 688      "Do at least FACTOR more iterations remain?" in the unrolled loop.
 689      Thus, transforming BASE + STEP * i <> BOUND to
 690      BASE + STEP * i < BOUND is ok.  */
 691   if (cmp == NE_EXPR)
 692     {
 693       if (tree_int_cst_sign_bit (step))
 694         cmp = GT_EXPR;
 695       else
 696         cmp = LT_EXPR;
 697     }
 698   else if (cmp == LT_EXPR)
 699     {
 700       gcc_assert (!tree_int_cst_sign_bit (step));
 701     }
 702   else if (cmp == GT_EXPR)
 703     {
 704       gcc_assert (tree_int_cst_sign_bit (step));
 705     }
 706   else
 707     gcc_unreachable ();
 708
 709   /* The main body of the loop may be entered iff:
 710
 711      1) desc->may_be_zero is false.
 712      2) it is possible to check that there are at least FACTOR iterations
 713         of the loop, i.e., BOUND - step * FACTOR does not overflow.
 714      3) # of iterations is at least FACTOR  */
 715
 716   if (!integer_zerop (desc->may_be_zero))
 717     cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
 718                         invert_truthvalue (desc->may_be_zero),
 719                         cond);
 720
 721   bigstep = fold_build2 (MULT_EXPR, type, step,
 722                          build_int_cst_type (type, factor));
 723   delta = fold_build2 (MINUS_EXPR, type, bigstep, step);
 724   if (cmp == LT_EXPR)
 725     assum = fold_build2 (GE_EXPR, boolean_type_node,
 726                          bound,
 727                          fold_build2 (PLUS_EXPR, type, min, delta));
 728   else
 729     assum = fold_build2 (LE_EXPR, boolean_type_node,
 730                          bound,
 731                          fold_build2 (PLUS_EXPR, type, max, delta));
 732   cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
 733
 734   bound = fold_build2 (MINUS_EXPR, type, bound, delta);
 735   assum = fold_build2 (cmp, boolean_type_node, base, bound);
 736   cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
 737
 738   cond = force_gimple_operand (unshare_expr (cond), &stmts, false, NULL_TREE);
 739   if (stmts)
 740     bsi_insert_on_edge_immediate (loop_preheader_edge (loop), stmts);
 741   /* cond now may be a gimple comparison, which would be OK, but also any
 742      other gimple rhs (say a && b).  In this case we need to force it to
 743      operand.  */
 744   if (!is_gimple_condexpr (cond))
 745     {
 746       cond = force_gimple_operand (cond, &stmts, true, NULL_TREE);
 747       if (stmts)
 748         bsi_insert_on_edge_immediate (loop_preheader_edge (loop), stmts);
 749     }
 750   *enter_cond = cond;
 751
 752   base = force_gimple_operand (unshare_expr (base), &stmts, true, NULL_TREE);
 753   if (stmts)
 754     bsi_insert_on_edge_immediate (loop_preheader_edge (loop), stmts);
 755   bound = force_gimple_operand (unshare_expr (bound), &stmts, true, NULL_TREE);
 756   if (stmts)
 757     bsi_insert_on_edge_immediate (loop_preheader_edge (loop), stmts);
 758
 759   *exit_base = base;
 760   *exit_step = bigstep;
 761   *exit_cmp = cmp;
 762   *exit_bound = bound;
 763 }
 764
 765 /* Scales the frequencies of all basic blocks in LOOP that are strictly
 766    dominated by BB by NUM/DEN.  */
 767
 768 static void
 769 scale_dominated_blocks_in_loop (struct loop *loop, basic_block bb,
 770                                 int num, int den)
 771 {
 772   basic_block son;
 773
 774   if (den == 0)
 775     return;
 776
 777   for (son = first_dom_son (CDI_DOMINATORS, bb);
 778        son;
 779        son = next_dom_son (CDI_DOMINATORS, son))
 780     {
 781       if (!flow_bb_inside_loop_p (loop, son))
 782         continue;
 783       scale_bbs_frequencies_int (&son, 1, num, den);
 784       scale_dominated_blocks_in_loop (loop, son, num, den);
 785     }
 786 }
 787
 788 /* Unroll LOOP FACTOR times.  DESC describes number of iterations of LOOP.
 789    EXIT is the exit of the loop to that DESC corresponds.
 790
 791    If N is number of iterations of the loop and MAY_BE_ZERO is the condition
 792    under that loop exits in the first iteration even if N != 0,
 793
 794    while (1)
 795      {
 796        x = phi (init, next);
 797
 798        pre;
 799        if (st)
 800          break;
 801        post;
 802      }
 803
 804    becomes (with possibly the exit conditions formulated a bit differently,
 805    avoiding the need to create a new iv):
 806
 807    if (MAY_BE_ZERO || N < FACTOR)
 808      goto rest;
 809
 810    do
 811      {
 812        x = phi (init, next);
 813
 814        pre;
 815        post;
 816        pre;
 817        post;
 818        ...
 819        pre;
 820        post;
 821        N -= FACTOR;
 822
 823      } while (N >= FACTOR);
 824
 825    rest:
 826      init' = phi (init, x);
 827
 828    while (1)
 829      {
 830        x = phi (init', next);
 831
 832        pre;
 833        if (st)
 834          break;
 835        post;
 836      }
 837
 838    Before the loop is unrolled, TRANSFORM is called for it (only for the
 839    unrolled loop, but not for its versioned copy).  DATA is passed to
 840    TRANSFORM.  */
 841
 842 /* Probability in % that the unrolled loop is entered.  Just a guess.  */
 843 #define PROB_UNROLLED_LOOP_ENTERED 90
 844
 845 void
 846 tree_transform_and_unroll_loop (struct loop *loop, unsigned factor,
 847                                 edge exit, struct tree_niter_desc *desc,
 848                                 transform_callback transform,
 849                                 void *data)
 850 {
 851   tree  exit_if, ctr_before, ctr_after;
 852   tree enter_main_cond, exit_base, exit_step, exit_bound;
 853   enum tree_code exit_cmp;
 854   tree phi_old_loop, phi_new_loop, phi_rest, init, next, new_init, var;
 855   struct loop *new_loop;
 856   basic_block rest, exit_bb;
 857   edge old_entry, new_entry, old_latch, precond_edge, new_exit;
 858   edge new_nonexit, e;
 859   block_stmt_iterator bsi;
 860   use_operand_p op;
 861   bool ok;
 862   unsigned est_niter, prob_entry, scale_unrolled, scale_rest, freq_e, freq_h;
 863   unsigned new_est_niter, i, prob;
 864   unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
 865   sbitmap wont_exit;
 866   VEC (edge, heap) *to_remove = NULL;
 867
 868   est_niter = expected_loop_iterations (loop);
 869   determine_exit_conditions (loop, desc, factor,
 870                              &enter_main_cond, &exit_base, &exit_step,
 871                              &exit_cmp, &exit_bound);
 872
 873   /* Let us assume that the unrolled loop is quite likely to be entered.  */
 874   if (integer_nonzerop (enter_main_cond))
 875     prob_entry = REG_BR_PROB_BASE;
 876   else
 877     prob_entry = PROB_UNROLLED_LOOP_ENTERED * REG_BR_PROB_BASE / 100;
 878
 879   /* The values for scales should keep profile consistent, and somewhat close
 880      to correct.
 881
 882      TODO: The current value of SCALE_REST makes it appear that the loop that
 883      is created by splitting the remaining iterations of the unrolled loop is
 884      executed the same number of times as the original loop, and with the same
 885      frequencies, which is obviously wrong.  This does not appear to cause
 886      problems, so we do not bother with fixing it for now.  To make the profile
 887      correct, we would need to change the probability of the exit edge of the
 888      loop, and recompute the distribution of frequencies in its body because
 889      of this change (scale the frequencies of blocks before and after the exit
 890      by appropriate factors).  */
 891   scale_unrolled = prob_entry;
 892   scale_rest = REG_BR_PROB_BASE;
 893
 894   new_loop = loop_version (loop, enter_main_cond, NULL,
 895                            prob_entry, scale_unrolled, scale_rest, true);
 896   gcc_assert (new_loop != NULL);
 897   update_ssa (TODO_update_ssa);
 898
 899   /* Determine the probability of the exit edge of the unrolled loop.  */
 900   new_est_niter = est_niter / factor;
 901
 902   /* Without profile feedback, loops for that we do not know a better estimate
 903      are assumed to roll 10 times.  When we unroll such loop, it appears to
 904      roll too little, and it may even seem to be cold.  To avoid this, we
 905      ensure that the created loop appears to roll at least 5 times (but at
 906      most as many times as before unrolling).  */
 907   if (new_est_niter < 5)
 908     {
 909       if (est_niter < 5)
 910         new_est_niter = est_niter;
 911       else
 912         new_est_niter = 5;
 913     }
 914
 915   /* Prepare the cfg and update the phi nodes.  Move the loop exit to the
 916      loop latch (and make its condition dummy, for the moment).  */
 917   rest = loop_preheader_edge (new_loop)->src;
 918   precond_edge = single_pred_edge (rest);
 919   split_edge (loop_latch_edge (loop));
 920   exit_bb = single_pred (loop->latch);
 921
 922   /* Since the exit edge will be removed, the frequency of all the blocks
 923      in the loop that are dominated by it must be scaled by
 924      1 / (1 - exit->probability).  */
 925   scale_dominated_blocks_in_loop (loop, exit->src,
 926                                   REG_BR_PROB_BASE,
 927                                   REG_BR_PROB_BASE - exit->probability);
 928
 929   bsi = bsi_last (exit_bb);
 930   exit_if = build3 (COND_EXPR, void_type_node, boolean_true_node,
 931                     NULL_TREE, NULL_TREE);
 932
 933   bsi_insert_after (&bsi, exit_if, BSI_NEW_STMT);
 934   new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE | irr);
 935   rescan_loop_exit (new_exit, true, false);
 936
 937   /* Set the probability of new exit to the same of the old one.  Fix
 938      the frequency of the latch block, by scaling it back by
 939      1 - exit->probability.  */
 940   new_exit->count = exit->count;
 941   new_exit->probability = exit->probability;
 942   new_nonexit = single_pred_edge (loop->latch);
 943   new_nonexit->probability = REG_BR_PROB_BASE - exit->probability;
 944   new_nonexit->flags = EDGE_TRUE_VALUE;
 945   new_nonexit->count -= exit->count;
 946   if (new_nonexit->count < 0)
 947     new_nonexit->count = 0;
 948   scale_bbs_frequencies_int (&loop->latch, 1, new_nonexit->probability,
 949                              REG_BR_PROB_BASE);
 950
 951   old_entry = loop_preheader_edge (loop);
 952   new_entry = loop_preheader_edge (new_loop);
 953   old_latch = loop_latch_edge (loop);
 954   for (phi_old_loop = phi_nodes (loop->header),
 955        phi_new_loop = phi_nodes (new_loop->header);
 956        phi_old_loop;
 957        phi_old_loop = PHI_CHAIN (phi_old_loop),
 958        phi_new_loop = PHI_CHAIN (phi_new_loop))
 959     {
 960       init = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_entry);
 961       op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_new_loop, new_entry);
 962       gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
 963       next = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_latch);
 964
 965       /* Prefer using original variable as a base for the new ssa name.
 966          This is necessary for virtual ops, and useful in order to avoid
 967          losing debug info for real ops.  */
 968       if (TREE_CODE (next) == SSA_NAME)
 969         var = SSA_NAME_VAR (next);
 970       else if (TREE_CODE (init) == SSA_NAME)
 971         var = SSA_NAME_VAR (init);
 972       else
 973         {
 974           var = create_tmp_var (TREE_TYPE (init), "unrinittmp");
 975           add_referenced_var (var);
 976         }
 977
 978       new_init = make_ssa_name (var, NULL_TREE);
 979       phi_rest = create_phi_node (new_init, rest);
 980       SSA_NAME_DEF_STMT (new_init) = phi_rest;
 981
 982       add_phi_arg (phi_rest, init, precond_edge);
 983       add_phi_arg (phi_rest, next, new_exit);
 984       SET_USE (op, new_init);
 985     }
 986
 987   remove_path (exit);
 988
 989   /* Transform the loop.  */
 990   if (transform)
 991     (*transform) (loop, data);
 992
 993   /* Unroll the loop and remove the exits in all iterations except for the
 994      last one.  */
 995   wont_exit = sbitmap_alloc (factor);
 996   sbitmap_ones (wont_exit);
 997   RESET_BIT (wont_exit, factor - 1);
 998
 999   ok = tree_duplicate_loop_to_header_edge
1000           (loop, loop_latch_edge (loop), factor - 1,
1001            wont_exit, new_exit, &to_remove, DLTHE_FLAG_UPDATE_FREQ);
1002   free (wont_exit);
1003   gcc_assert (ok);
1004
1005   for (i = 0; VEC_iterate (edge, to_remove, i, e); i++)
1006     {
1007       ok = remove_path (e);
1008       gcc_assert (ok);
1009     }
1010   VEC_free (edge, heap, to_remove);
1011   update_ssa (TODO_update_ssa);
1012
1013   /* Ensure that the frequencies in the loop match the new estimated
1014      number of iterations, and change the probability of the new
1015      exit edge.  */
1016   freq_h = loop->header->frequency;
1017   freq_e = EDGE_FREQUENCY (loop_preheader_edge (loop));
1018   if (freq_h != 0)
1019     scale_loop_frequencies (loop, freq_e * (new_est_niter + 1), freq_h);
1020
1021   exit_bb = single_pred (loop->latch);
1022   new_exit = find_edge (exit_bb, rest);
1023   new_exit->count = loop_preheader_edge (loop)->count;
1024   new_exit->probability = REG_BR_PROB_BASE / (new_est_niter + 1);
1025
1026   rest->count += new_exit->count;
1027   rest->frequency += EDGE_FREQUENCY (new_exit);
1028
1029   new_nonexit = single_pred_edge (loop->latch);
1030   prob = new_nonexit->probability;
1031   new_nonexit->probability = REG_BR_PROB_BASE - new_exit->probability;
1032   new_nonexit->count = exit_bb->count - new_exit->count;
1033   if (new_nonexit->count < 0)
1034     new_nonexit->count = 0;
1035   scale_bbs_frequencies_int (&loop->latch, 1, new_nonexit->probability,
1036                              prob);
1037
1038   /* Finally create the new counter for number of iterations and add the new
1039      exit instruction.  */
1040   bsi = bsi_last (exit_bb);
1041   exit_if = bsi_stmt (bsi);
1042   create_iv (exit_base, exit_step, NULL_TREE, loop,
1043              &bsi, false, &ctr_before, &ctr_after);
1044   COND_EXPR_COND (exit_if) = build2 (exit_cmp, boolean_type_node, ctr_after,
1045                                      exit_bound);
1046   update_stmt (exit_if);
1047
1048 #ifdef ENABLE_CHECKING
1049   verify_flow_info ();
1050   verify_dominators (CDI_DOMINATORS);
1051   verify_loop_structure ();
1052   verify_loop_closed_ssa ();
1053 #endif
1054 }
1055
1056 /* Wrapper over tree_transform_and_unroll_loop for case we do not
1057    want to transform the loop before unrolling.  The meaning
1058    of the arguments is the same as for tree_transform_and_unroll_loop.  */
1059
1060 void
1061 tree_unroll_loop (struct loop *loop, unsigned factor,
1062                   edge exit, struct tree_niter_desc *desc)
1063 {
1064   tree_transform_and_unroll_loop (loop, factor, exit, desc,
1065                                   NULL, NULL);
1066 }