gcc/tree-ssa-loop-ivcanon.c

   1 /* Induction variable canonicalization.
   2    Copyright (C) 2004, 2005, 2007, 2008, 2010
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass detects the loops that iterate a constant number of times,
  22    adds a canonical induction variable (step -1, tested against 0)
  23    and replaces the exit test.  This enables the less powerful rtl
  24    level analysis to use this information.
  25
  26    This might spoil the code in some cases (by increasing register pressure).
  27    Note that in the case the new variable is not needed, ivopts will get rid
  28    of it, so it might only be a problem when there are no other linear induction
  29    variables.  In that case the created optimization possibilities are likely
  30    to pay up.
  31
  32    Additionally in case we detect that it is beneficial to unroll the
  33    loop completely, we do it right here to expose the optimization
  34    possibilities to the following passes.  */
  35
  36 #include "config.h"
  37 #include "system.h"
  38 #include "coretypes.h"
  39 #include "tm.h"
  40 #include "tree.h"
  41 #include "tm_p.h"
  42 #include "basic-block.h"
  43 #include "tree-pretty-print.h"
  44 #include "gimple-pretty-print.h"
  45 #include "tree-flow.h"
  46 #include "tree-dump.h"
  47 #include "cfgloop.h"
  48 #include "tree-pass.h"
  49 #include "tree-chrec.h"
  50 #include "tree-scalar-evolution.h"
  51 #include "params.h"
  52 #include "flags.h"
  53 #include "tree-inline.h"
  54 #include "target.h"
  55
  56 /* Specifies types of loops that may be unrolled.  */
  57
  58 enum unroll_level
  59 {
  60   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
  61                            iteration.  */
  62   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
  63                            of code size.  */
  64   UL_ALL                /* All suitable loops.  */
  65 };
  66
  67 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
  68    is the exit edge whose condition is replaced.  */
  69
  70 static void
  71 create_canonical_iv (struct loop *loop, edge exit, tree niter)
  72 {
  73   edge in;
  74   tree type, var;
  75   gimple cond;
  76   gimple_stmt_iterator incr_at;
  77   enum tree_code cmp;
  78
  79   if (dump_file && (dump_flags & TDF_DETAILS))
  80     {
  81       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
  82       print_generic_expr (dump_file, niter, TDF_SLIM);
  83       fprintf (dump_file, " iterations.\n");
  84     }
  85
  86   cond = last_stmt (exit->src);
  87   in = EDGE_SUCC (exit->src, 0);
  88   if (in == exit)
  89     in = EDGE_SUCC (exit->src, 1);
  90
  91   /* Note that we do not need to worry about overflows, since
  92      type of niter is always unsigned and all comparisons are
  93      just for equality/nonequality -- i.e. everything works
  94      with a modulo arithmetics.  */
  95
  96   type = TREE_TYPE (niter);
  97   niter = fold_build2 (PLUS_EXPR, type,
  98                        niter,
  99                        build_int_cst (type, 1));
 100   incr_at = gsi_last_bb (in->src);
 101   create_iv (niter,
 102              build_int_cst (type, -1),
 103              NULL_TREE, loop,
 104              &incr_at, false, NULL, &var);
 105
 106   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 107   gimple_cond_set_code (cond, cmp);
 108   gimple_cond_set_lhs (cond, var);
 109   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 110   update_stmt (cond);
 111 }
 112
 113 /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.  */
 114
 115 unsigned
 116 tree_num_loop_insns (struct loop *loop, eni_weights *weights)
 117 {
 118   basic_block *body = get_loop_body (loop);
 119   gimple_stmt_iterator gsi;
 120   unsigned size = 0, i;
 121
 122   for (i = 0; i < loop->num_nodes; i++)
 123     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 124       size += estimate_num_insns (gsi_stmt (gsi), weights);
 125   free (body);
 126
 127   return size;
 128 }
 129
 130 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 131 struct loop_size
 132 {
 133   /* Number of instructions in the loop.  */
 134   int overall;
 135
 136   /* Number of instructions that will be likely optimized out in
 137      peeled iterations of loop  (i.e. computation based on induction
 138      variable where induction variable starts at known constant.)  */
 139   int eliminated_by_peeling;
 140
 141   /* Same statistics for last iteration of loop: it is smaller because
 142      instructions after exit are not executed.  */
 143   int last_iteration;
 144   int last_iteration_eliminated_by_peeling;
 145 };
 146
 147 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 148
 149 static bool
 150 constant_after_peeling (tree op, gimple stmt, struct loop *loop)
 151 {
 152   affine_iv iv;
 153
 154   if (is_gimple_min_invariant (op))
 155     return true;
 156
 157   /* We can still fold accesses to constant arrays when index is known.  */
 158   if (TREE_CODE (op) != SSA_NAME)
 159     {
 160       tree base = op;
 161
 162       /* First make fast look if we see constant array inside.  */
 163       while (handled_component_p (base))
 164         base = TREE_OPERAND (base, 0);
 165       if ((DECL_P (base) == VAR_DECL
 166            && const_value_known_p (base))
 167           || CONSTANT_CLASS_P (base))
 168         {
 169           /* If so, see if we understand all the indices.  */
 170           base = op;
 171           while (handled_component_p (base))
 172             {
 173               if (TREE_CODE (base) == ARRAY_REF
 174                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
 175                 return false;
 176               base = TREE_OPERAND (base, 0);
 177             }
 178           return true;
 179         }
 180       return false;
 181     }
 182
 183   /* Induction variables are constants.  */
 184   if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
 185     return false;
 186   if (!is_gimple_min_invariant (iv.base))
 187     return false;
 188   if (!is_gimple_min_invariant (iv.step))
 189     return false;
 190   return true;
 191 }
 192
 193 /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.
 194    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.  */
 195
 196 static void
 197 tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size)
 198 {
 199   basic_block *body = get_loop_body (loop);
 200   gimple_stmt_iterator gsi;
 201   unsigned int i;
 202   bool after_exit;
 203
 204   size->overall = 0;
 205   size->eliminated_by_peeling = 0;
 206   size->last_iteration = 0;
 207   size->last_iteration_eliminated_by_peeling = 0;
 208
 209   if (dump_file && (dump_flags & TDF_DETAILS))
 210     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
 211   for (i = 0; i < loop->num_nodes; i++)
 212     {
 213       if (exit && body[i] != exit->src
 214           && dominated_by_p (CDI_DOMINATORS, body[i], exit->src))
 215         after_exit = true;
 216       else
 217         after_exit = false;
 218       if (dump_file && (dump_flags & TDF_DETAILS))
 219         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);
 220
 221       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 222         {
 223           gimple stmt = gsi_stmt (gsi);
 224           int num = estimate_num_insns (stmt, &eni_size_weights);
 225           bool likely_eliminated = false;
 226
 227           if (dump_file && (dump_flags & TDF_DETAILS))
 228             {
 229               fprintf (dump_file, "  size: %3i ", num);
 230               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
 231             }
 232
 233           /* Look for reasons why we might optimize this stmt away. */
 234
 235           /* Exit conditional.  */
 236           if (body[i] == exit->src && stmt == last_stmt (exit->src))
 237             {
 238               if (dump_file && (dump_flags & TDF_DETAILS))
 239                 fprintf (dump_file, "   Exit condition will be eliminated.\n");
 240               likely_eliminated = true;
 241             }
 242           /* Sets of IV variables  */
 243           else if (gimple_code (stmt) == GIMPLE_ASSIGN
 244               && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
 245             {
 246               if (dump_file && (dump_flags & TDF_DETAILS))
 247                 fprintf (dump_file, "   Induction variable computation will"
 248                          " be folded away.\n");
 249               likely_eliminated = true;
 250             }
 251           /* Assignments of IV variables.  */
 252           else if (gimple_code (stmt) == GIMPLE_ASSIGN
 253                    && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
 254                    && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt,loop)
 255                    && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
 256                        || constant_after_peeling (gimple_assign_rhs2 (stmt),
 257                                                   stmt, loop)))
 258             {
 259               if (dump_file && (dump_flags & TDF_DETAILS))
 260                 fprintf (dump_file, "   Constant expression will be folded away.\n");
 261               likely_eliminated = true;
 262             }
 263           /* Conditionals.  */
 264           else if (gimple_code (stmt) == GIMPLE_COND
 265                    && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 266                    && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
 267             {
 268               if (dump_file && (dump_flags & TDF_DETAILS))
 269                 fprintf (dump_file, "   Constant conditional.\n");
 270               likely_eliminated = true;
 271             }
 272
 273           size->overall += num;
 274           if (likely_eliminated)
 275             size->eliminated_by_peeling += num;
 276           if (!after_exit)
 277             {
 278               size->last_iteration += num;
 279               if (likely_eliminated)
 280                 size->last_iteration_eliminated_by_peeling += num;
 281             }
 282         }
 283     }
 284   if (dump_file && (dump_flags & TDF_DETAILS))
 285     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
 286              size->eliminated_by_peeling, size->last_iteration,
 287              size->last_iteration_eliminated_by_peeling);
 288
 289   free (body);
 290 }
 291
 292 /* Estimate number of insns of completely unrolled loop.
 293    It is (NUNROLL + 1) * size of loop body with taking into account
 294    the fact that in last copy everything after exit conditional
 295    is dead and that some instructions will be eliminated after
 296    peeling.
 297
 298    Loop body is likely going to simplify futher, this is difficult
 299    to guess, we just decrease the result by 1/3.  */
 300
 301 static unsigned HOST_WIDE_INT
 302 estimated_unrolled_size (struct loop_size *size,
 303                          unsigned HOST_WIDE_INT nunroll)
 304 {
 305   HOST_WIDE_INT unr_insns = ((nunroll)
 306                              * (HOST_WIDE_INT) (size->overall
 307                                                 - size->eliminated_by_peeling));
 308   if (!nunroll)
 309     unr_insns = 0;
 310   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
 311
 312   unr_insns = unr_insns * 2 / 3;
 313   if (unr_insns <= 0)
 314     unr_insns = 1;
 315
 316   return unr_insns;
 317 }
 318
 319 /* Tries to unroll LOOP completely, i.e. NITER times.
 320    UL determines which loops we are allowed to unroll.
 321    EXIT is the exit of the loop that should be eliminated.  */
 322
 323 static bool
 324 try_unroll_loop_completely (struct loop *loop,
 325                             edge exit, tree niter,
 326                             enum unroll_level ul)
 327 {
 328   unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
 329   gimple cond;
 330   struct loop_size size;
 331
 332   if (loop->inner)
 333     return false;
 334
 335   if (!host_integerp (niter, 1))
 336     return false;
 337   n_unroll = tree_low_cst (niter, 1);
 338
 339   max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 340   if (n_unroll > max_unroll)
 341     return false;
 342
 343   if (n_unroll)
 344     {
 345       if (ul == UL_SINGLE_ITER)
 346         return false;
 347
 348       tree_estimate_loop_size (loop, exit, &size);
 349       ninsns = size.overall;
 350
 351       unr_insns = estimated_unrolled_size (&size, n_unroll);
 352       if (dump_file && (dump_flags & TDF_DETAILS))
 353         {
 354           fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
 355           fprintf (dump_file, "  Estimated size after unrolling: %d\n",
 356                    (int) unr_insns);
 357         }
 358
 359       if (unr_insns > ninsns
 360           && (unr_insns
 361               > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)))
 362         {
 363           if (dump_file && (dump_flags & TDF_DETAILS))
 364             fprintf (dump_file, "Not unrolling loop %d "
 365                      "(--param max-completely-peeled-insns limit reached).\n",
 366                      loop->num);
 367           return false;
 368         }
 369
 370       if (ul == UL_NO_GROWTH
 371           && unr_insns > ninsns)
 372         {
 373           if (dump_file && (dump_flags & TDF_DETAILS))
 374             fprintf (dump_file, "Not unrolling loop %d.\n", loop->num);
 375           return false;
 376         }
 377     }
 378
 379   if (n_unroll)
 380     {
 381       sbitmap wont_exit;
 382       edge e;
 383       unsigned i;
 384       VEC (edge, heap) *to_remove = NULL;
 385
 386       initialize_original_copy_tables ();
 387       wont_exit = sbitmap_alloc (n_unroll + 1);
 388       sbitmap_ones (wont_exit);
 389       RESET_BIT (wont_exit, 0);
 390
 391       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 392                                                  n_unroll, wont_exit,
 393                                                  exit, &to_remove,
 394                                                  DLTHE_FLAG_UPDATE_FREQ
 395                                                  | DLTHE_FLAG_COMPLETTE_PEEL))
 396         {
 397           free_original_copy_tables ();
 398           free (wont_exit);
 399           return false;
 400         }
 401
 402       FOR_EACH_VEC_ELT (edge, to_remove, i, e)
 403         {
 404           bool ok = remove_path (e);
 405           gcc_assert (ok);
 406         }
 407
 408       VEC_free (edge, heap, to_remove);
 409       free (wont_exit);
 410       free_original_copy_tables ();
 411     }
 412
 413   cond = last_stmt (exit->src);
 414   if (exit->flags & EDGE_TRUE_VALUE)
 415     gimple_cond_make_true (cond);
 416   else
 417     gimple_cond_make_false (cond);
 418   update_stmt (cond);
 419   update_ssa (TODO_update_ssa);
 420
 421   if (dump_file && (dump_flags & TDF_DETAILS))
 422     fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num);
 423
 424   return true;
 425 }
 426
 427 /* Adds a canonical induction variable to LOOP if suitable.
 428    CREATE_IV is true if we may create a new iv.  UL determines
 429    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
 430    to determine the number of iterations of a loop by direct evaluation.
 431    Returns true if cfg is changed.  */
 432
 433 static bool
 434 canonicalize_loop_induction_variables (struct loop *loop,
 435                                        bool create_iv, enum unroll_level ul,
 436                                        bool try_eval)
 437 {
 438   edge exit = NULL;
 439   tree niter;
 440
 441   niter = number_of_latch_executions (loop);
 442   if (TREE_CODE (niter) == INTEGER_CST)
 443     {
 444       exit = single_exit (loop);
 445       if (!just_once_each_iteration_p (loop, exit->src))
 446         return false;
 447     }
 448   else
 449     {
 450       /* If the loop has more than one exit, try checking all of them
 451          for # of iterations determinable through scev.  */
 452       if (!single_exit (loop))
 453         niter = find_loop_niter (loop, &exit);
 454
 455       /* Finally if everything else fails, try brute force evaluation.  */
 456       if (try_eval
 457           && (chrec_contains_undetermined (niter)
 458               || TREE_CODE (niter) != INTEGER_CST))
 459         niter = find_loop_niter_by_eval (loop, &exit);
 460
 461       if (chrec_contains_undetermined (niter)
 462           || TREE_CODE (niter) != INTEGER_CST)
 463         return false;
 464     }
 465
 466   if (dump_file && (dump_flags & TDF_DETAILS))
 467     {
 468       fprintf (dump_file, "Loop %d iterates ", loop->num);
 469       print_generic_expr (dump_file, niter, TDF_SLIM);
 470       fprintf (dump_file, " times.\n");
 471     }
 472
 473   if (try_unroll_loop_completely (loop, exit, niter, ul))
 474     return true;
 475
 476   if (create_iv)
 477     create_canonical_iv (loop, exit, niter);
 478
 479   return false;
 480 }
 481
 482 /* The main entry point of the pass.  Adds canonical induction variables
 483    to the suitable loops.  */
 484
 485 unsigned int
 486 canonicalize_induction_variables (void)
 487 {
 488   loop_iterator li;
 489   struct loop *loop;
 490   bool changed = false;
 491
 492   FOR_EACH_LOOP (li, loop, 0)
 493     {
 494       changed |= canonicalize_loop_induction_variables (loop,
 495                                                         true, UL_SINGLE_ITER,
 496                                                         true);
 497     }
 498
 499   /* Clean up the information about numbers of iterations, since brute force
 500      evaluation could reveal new information.  */
 501   scev_reset ();
 502
 503   if (changed)
 504     return TODO_cleanup_cfg;
 505   return 0;
 506 }
 507
 508 /* Unroll LOOPS completely if they iterate just few times.  Unless
 509    MAY_INCREASE_SIZE is true, perform the unrolling only if the
 510    size of the code does not increase.  */
 511
 512 unsigned int
 513 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
 514 {
 515   loop_iterator li;
 516   struct loop *loop;
 517   bool changed;
 518   enum unroll_level ul;
 519   int iteration = 0;
 520
 521   do
 522     {
 523       changed = false;
 524
 525       FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
 526         {
 527           if (may_increase_size && optimize_loop_for_speed_p (loop)
 528               /* Unroll outermost loops only if asked to do so or they do
 529                  not cause code growth.  */
 530               && (unroll_outer
 531                   || loop_outer (loop_outer (loop))))
 532             ul = UL_ALL;
 533           else
 534             ul = UL_NO_GROWTH;
 535           changed |= canonicalize_loop_induction_variables
 536                        (loop, false, ul, !flag_tree_loop_ivcanon);
 537         }
 538
 539       if (changed)
 540         {
 541           /* This will take care of removing completely unrolled loops
 542              from the loop structures so we can continue unrolling now
 543              innermost loops.  */
 544           if (cleanup_tree_cfg ())
 545             update_ssa (TODO_update_ssa_only_virtuals);
 546
 547           /* Clean up the information about numbers of iterations, since
 548              complete unrolling might have invalidated it.  */
 549           scev_reset ();
 550         }
 551     }
 552   while (changed
 553          && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
 554
 555   return 0;
 556 }