gcc/tree-ssa-loop-ivcanon.c

   1 /* Induction variable canonicalization.
   2    Copyright (C) 2004, 2005, 2007, 2008, 2010
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass detects the loops that iterate a constant number of times,
  22    adds a canonical induction variable (step -1, tested against 0)
  23    and replaces the exit test.  This enables the less powerful rtl
  24    level analysis to use this information.
  25
  26    This might spoil the code in some cases (by increasing register pressure).
  27    Note that in the case the new variable is not needed, ivopts will get rid
  28    of it, so it might only be a problem when there are no other linear induction
  29    variables.  In that case the created optimization possibilities are likely
  30    to pay up.
  31
  32    Additionally in case we detect that it is beneficial to unroll the
  33    loop completely, we do it right here to expose the optimization
  34    possibilities to the following passes.  */
  35
  36 #include "config.h"
  37 #include "system.h"
  38 #include "coretypes.h"
  39 #include "tm.h"
  40 #include "tree.h"
  41 #include "tm_p.h"
  42 #include "basic-block.h"
  43 #include "diagnostic.h"
  44 #include "tree-pretty-print.h"
  45 #include "gimple-pretty-print.h"
  46 #include "tree-flow.h"
  47 #include "tree-dump.h"
  48 #include "cfgloop.h"
  49 #include "tree-pass.h"
  50 #include "tree-chrec.h"
  51 #include "tree-scalar-evolution.h"
  52 #include "params.h"
  53 #include "flags.h"
  54 #include "tree-inline.h"
  55 #include "target.h"
  56
  57 /* Specifies types of loops that may be unrolled.  */
  58
  59 enum unroll_level
  60 {
  61   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
  62                            iteration.  */
  63   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
  64                            of code size.  */
  65   UL_ALL                /* All suitable loops.  */
  66 };
  67
  68 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
  69    is the exit edge whose condition is replaced.  */
  70
  71 static void
  72 create_canonical_iv (struct loop *loop, edge exit, tree niter)
  73 {
  74   edge in;
  75   tree type, var;
  76   gimple cond;
  77   gimple_stmt_iterator incr_at;
  78   enum tree_code cmp;
  79
  80   if (dump_file && (dump_flags & TDF_DETAILS))
  81     {
  82       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
  83       print_generic_expr (dump_file, niter, TDF_SLIM);
  84       fprintf (dump_file, " iterations.\n");
  85     }
  86
  87   cond = last_stmt (exit->src);
  88   in = EDGE_SUCC (exit->src, 0);
  89   if (in == exit)
  90     in = EDGE_SUCC (exit->src, 1);
  91
  92   /* Note that we do not need to worry about overflows, since
  93      type of niter is always unsigned and all comparisons are
  94      just for equality/nonequality -- i.e. everything works
  95      with a modulo arithmetics.  */
  96
  97   type = TREE_TYPE (niter);
  98   niter = fold_build2 (PLUS_EXPR, type,
  99                        niter,
 100                        build_int_cst (type, 1));
 101   incr_at = gsi_last_bb (in->src);
 102   create_iv (niter,
 103              build_int_cst (type, -1),
 104              NULL_TREE, loop,
 105              &incr_at, false, NULL, &var);
 106
 107   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 108   gimple_cond_set_code (cond, cmp);
 109   gimple_cond_set_lhs (cond, var);
 110   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 111   update_stmt (cond);
 112 }
 113
 114 /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.  */
 115
 116 unsigned
 117 tree_num_loop_insns (struct loop *loop, eni_weights *weights)
 118 {
 119   basic_block *body = get_loop_body (loop);
 120   gimple_stmt_iterator gsi;
 121   unsigned size = 0, i;
 122
 123   for (i = 0; i < loop->num_nodes; i++)
 124     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 125       size += estimate_num_insns (gsi_stmt (gsi), weights);
 126   free (body);
 127
 128   return size;
 129 }
 130
 131 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 132 struct loop_size
 133 {
 134   /* Number of instructions in the loop.  */
 135   int overall;
 136
 137   /* Number of instructions that will be likely optimized out in
 138      peeled iterations of loop  (i.e. computation based on induction
 139      variable where induction variable starts at known constant.)  */
 140   int eliminated_by_peeling;
 141
 142   /* Same statistics for last iteration of loop: it is smaller because
 143      instructions after exit are not executed.  */
 144   int last_iteration;
 145   int last_iteration_eliminated_by_peeling;
 146 };
 147
 148 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 149
 150 static bool
 151 constant_after_peeling (tree op, gimple stmt, struct loop *loop)
 152 {
 153   affine_iv iv;
 154
 155   if (is_gimple_min_invariant (op))
 156     return true;
 157
 158   /* We can still fold accesses to constant arrays when index is known.  */
 159   if (TREE_CODE (op) != SSA_NAME)
 160     {
 161       tree base = op;
 162
 163       /* First make fast look if we see constant array inside.  */
 164       while (handled_component_p (base))
 165         base = TREE_OPERAND (base, 0);
 166       if ((DECL_P (base)
 167            && TREE_STATIC (base)
 168            && TREE_READONLY (base)
 169            && (DECL_INITIAL (base)
 170                || (!DECL_EXTERNAL (base)
 171                    && targetm.binds_local_p (base))))
 172           || CONSTANT_CLASS_P (base))
 173         {
 174           /* If so, see if we understand all the indices.  */
 175           base = op;
 176           while (handled_component_p (base))
 177             {
 178               if (TREE_CODE (base) == ARRAY_REF
 179                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
 180                 return false;
 181               base = TREE_OPERAND (base, 0);
 182             }
 183           return true;
 184         }
 185       return false;
 186     }
 187
 188   /* Induction variables are constants.  */
 189   if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
 190     return false;
 191   if (!is_gimple_min_invariant (iv.base))
 192     return false;
 193   if (!is_gimple_min_invariant (iv.step))
 194     return false;
 195   return true;
 196 }
 197
 198 /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.
 199    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.  */
 200
 201 static void
 202 tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size)
 203 {
 204   basic_block *body = get_loop_body (loop);
 205   gimple_stmt_iterator gsi;
 206   unsigned int i;
 207   bool after_exit;
 208
 209   size->overall = 0;
 210   size->eliminated_by_peeling = 0;
 211   size->last_iteration = 0;
 212   size->last_iteration_eliminated_by_peeling = 0;
 213
 214   if (dump_file && (dump_flags & TDF_DETAILS))
 215     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
 216   for (i = 0; i < loop->num_nodes; i++)
 217     {
 218       if (exit && body[i] != exit->src
 219           && dominated_by_p (CDI_DOMINATORS, body[i], exit->src))
 220         after_exit = true;
 221       else
 222         after_exit = false;
 223       if (dump_file && (dump_flags & TDF_DETAILS))
 224         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);
 225
 226       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 227         {
 228           gimple stmt = gsi_stmt (gsi);
 229           int num = estimate_num_insns (stmt, &eni_size_weights);
 230           bool likely_eliminated = false;
 231
 232           if (dump_file && (dump_flags & TDF_DETAILS))
 233             {
 234               fprintf (dump_file, "  size: %3i ", num);
 235               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
 236             }
 237
 238           /* Look for reasons why we might optimize this stmt away. */
 239
 240           /* Exit conditional.  */
 241           if (body[i] == exit->src && stmt == last_stmt (exit->src))
 242             {
 243               if (dump_file && (dump_flags & TDF_DETAILS))
 244                 fprintf (dump_file, "   Exit condition will be eliminated.\n");
 245               likely_eliminated = true;
 246             }
 247           /* Sets of IV variables  */
 248           else if (gimple_code (stmt) == GIMPLE_ASSIGN
 249               && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
 250             {
 251               if (dump_file && (dump_flags & TDF_DETAILS))
 252                 fprintf (dump_file, "   Induction variable computation will"
 253                          " be folded away.\n");
 254               likely_eliminated = true;
 255             }
 256           /* Assignments of IV variables.  */
 257           else if (gimple_code (stmt) == GIMPLE_ASSIGN
 258                    && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
 259                    && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt,loop)
 260                    && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
 261                        || constant_after_peeling (gimple_assign_rhs2 (stmt),
 262                                                   stmt, loop)))
 263             {
 264               if (dump_file && (dump_flags & TDF_DETAILS))
 265                 fprintf (dump_file, "   Constant expression will be folded away.\n");
 266               likely_eliminated = true;
 267             }
 268           /* Conditionals.  */
 269           else if (gimple_code (stmt) == GIMPLE_COND
 270                    && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 271                    && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
 272             {
 273               if (dump_file && (dump_flags & TDF_DETAILS))
 274                 fprintf (dump_file, "   Constant conditional.\n");
 275               likely_eliminated = true;
 276             }
 277
 278           size->overall += num;
 279           if (likely_eliminated)
 280             size->eliminated_by_peeling += num;
 281           if (!after_exit)
 282             {
 283               size->last_iteration += num;
 284               if (likely_eliminated)
 285                 size->last_iteration_eliminated_by_peeling += num;
 286             }
 287         }
 288     }
 289   if (dump_file && (dump_flags & TDF_DETAILS))
 290     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
 291              size->eliminated_by_peeling, size->last_iteration,
 292              size->last_iteration_eliminated_by_peeling);
 293
 294   free (body);
 295 }
 296
 297 /* Estimate number of insns of completely unrolled loop.
 298    It is (NUNROLL + 1) * size of loop body with taking into account
 299    the fact that in last copy everything after exit conditional
 300    is dead and that some instructions will be eliminated after
 301    peeling.
 302
 303    Loop body is likely going to simplify futher, this is difficult
 304    to guess, we just decrease the result by 1/3.  */
 305
 306 static unsigned HOST_WIDE_INT
 307 estimated_unrolled_size (struct loop_size *size,
 308                          unsigned HOST_WIDE_INT nunroll)
 309 {
 310   HOST_WIDE_INT unr_insns = ((nunroll)
 311                              * (HOST_WIDE_INT) (size->overall
 312                                                 - size->eliminated_by_peeling));
 313   if (!nunroll)
 314     unr_insns = 0;
 315   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
 316
 317   unr_insns = unr_insns * 2 / 3;
 318   if (unr_insns <= 0)
 319     unr_insns = 1;
 320
 321   return unr_insns;
 322 }
 323
 324 /* Tries to unroll LOOP completely, i.e. NITER times.
 325    UL determines which loops we are allowed to unroll.
 326    EXIT is the exit of the loop that should be eliminated.  */
 327
 328 static bool
 329 try_unroll_loop_completely (struct loop *loop,
 330                             edge exit, tree niter,
 331                             enum unroll_level ul)
 332 {
 333   unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
 334   gimple cond;
 335   struct loop_size size;
 336
 337   if (loop->inner)
 338     return false;
 339
 340   if (!host_integerp (niter, 1))
 341     return false;
 342   n_unroll = tree_low_cst (niter, 1);
 343
 344   max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 345   if (n_unroll > max_unroll)
 346     return false;
 347
 348   if (n_unroll)
 349     {
 350       if (ul == UL_SINGLE_ITER)
 351         return false;
 352
 353       tree_estimate_loop_size (loop, exit, &size);
 354       ninsns = size.overall;
 355
 356       unr_insns = estimated_unrolled_size (&size, n_unroll);
 357       if (dump_file && (dump_flags & TDF_DETAILS))
 358         {
 359           fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
 360           fprintf (dump_file, "  Estimated size after unrolling: %d\n",
 361                    (int) unr_insns);
 362         }
 363
 364       if (unr_insns > ninsns
 365           && (unr_insns
 366               > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)))
 367         {
 368           if (dump_file && (dump_flags & TDF_DETAILS))
 369             fprintf (dump_file, "Not unrolling loop %d "
 370                      "(--param max-completely-peeled-insns limit reached).\n",
 371                      loop->num);
 372           return false;
 373         }
 374
 375       if (ul == UL_NO_GROWTH
 376           && unr_insns > ninsns)
 377         {
 378           if (dump_file && (dump_flags & TDF_DETAILS))
 379             fprintf (dump_file, "Not unrolling loop %d.\n", loop->num);
 380           return false;
 381         }
 382     }
 383
 384   if (n_unroll)
 385     {
 386       sbitmap wont_exit;
 387       edge e;
 388       unsigned i;
 389       VEC (edge, heap) *to_remove = NULL;
 390
 391       initialize_original_copy_tables ();
 392       wont_exit = sbitmap_alloc (n_unroll + 1);
 393       sbitmap_ones (wont_exit);
 394       RESET_BIT (wont_exit, 0);
 395
 396       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 397                                                  n_unroll, wont_exit,
 398                                                  exit, &to_remove,
 399                                                  DLTHE_FLAG_UPDATE_FREQ
 400                                                  | DLTHE_FLAG_COMPLETTE_PEEL))
 401         {
 402           free_original_copy_tables ();
 403           free (wont_exit);
 404           return false;
 405         }
 406
 407       for (i = 0; VEC_iterate (edge, to_remove, i, e); i++)
 408         {
 409           bool ok = remove_path (e);
 410           gcc_assert (ok);
 411         }
 412
 413       VEC_free (edge, heap, to_remove);
 414       free (wont_exit);
 415       free_original_copy_tables ();
 416     }
 417
 418   cond = last_stmt (exit->src);
 419   if (exit->flags & EDGE_TRUE_VALUE)
 420     gimple_cond_make_true (cond);
 421   else
 422     gimple_cond_make_false (cond);
 423   update_stmt (cond);
 424   update_ssa (TODO_update_ssa);
 425
 426   if (dump_file && (dump_flags & TDF_DETAILS))
 427     fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num);
 428
 429   return true;
 430 }
 431
 432 /* Adds a canonical induction variable to LOOP if suitable.
 433    CREATE_IV is true if we may create a new iv.  UL determines
 434    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
 435    to determine the number of iterations of a loop by direct evaluation.
 436    Returns true if cfg is changed.  */
 437
 438 static bool
 439 canonicalize_loop_induction_variables (struct loop *loop,
 440                                        bool create_iv, enum unroll_level ul,
 441                                        bool try_eval)
 442 {
 443   edge exit = NULL;
 444   tree niter;
 445
 446   niter = number_of_latch_executions (loop);
 447   if (TREE_CODE (niter) == INTEGER_CST)
 448     {
 449       exit = single_exit (loop);
 450       if (!just_once_each_iteration_p (loop, exit->src))
 451         return false;
 452     }
 453   else
 454     {
 455       /* If the loop has more than one exit, try checking all of them
 456          for # of iterations determinable through scev.  */
 457       if (!single_exit (loop))
 458         niter = find_loop_niter (loop, &exit);
 459
 460       /* Finally if everything else fails, try brute force evaluation.  */
 461       if (try_eval
 462           && (chrec_contains_undetermined (niter)
 463               || TREE_CODE (niter) != INTEGER_CST))
 464         niter = find_loop_niter_by_eval (loop, &exit);
 465
 466       if (chrec_contains_undetermined (niter)
 467           || TREE_CODE (niter) != INTEGER_CST)
 468         return false;
 469     }
 470
 471   if (dump_file && (dump_flags & TDF_DETAILS))
 472     {
 473       fprintf (dump_file, "Loop %d iterates ", loop->num);
 474       print_generic_expr (dump_file, niter, TDF_SLIM);
 475       fprintf (dump_file, " times.\n");
 476     }
 477
 478   if (try_unroll_loop_completely (loop, exit, niter, ul))
 479     return true;
 480
 481   if (create_iv)
 482     create_canonical_iv (loop, exit, niter);
 483
 484   return false;
 485 }
 486
 487 /* The main entry point of the pass.  Adds canonical induction variables
 488    to the suitable loops.  */
 489
 490 unsigned int
 491 canonicalize_induction_variables (void)
 492 {
 493   loop_iterator li;
 494   struct loop *loop;
 495   bool changed = false;
 496
 497   FOR_EACH_LOOP (li, loop, 0)
 498     {
 499       changed |= canonicalize_loop_induction_variables (loop,
 500                                                         true, UL_SINGLE_ITER,
 501                                                         true);
 502     }
 503
 504   /* Clean up the information about numbers of iterations, since brute force
 505      evaluation could reveal new information.  */
 506   scev_reset ();
 507
 508   if (changed)
 509     return TODO_cleanup_cfg;
 510   return 0;
 511 }
 512
 513 /* Unroll LOOPS completely if they iterate just few times.  Unless
 514    MAY_INCREASE_SIZE is true, perform the unrolling only if the
 515    size of the code does not increase.  */
 516
 517 unsigned int
 518 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
 519 {
 520   loop_iterator li;
 521   struct loop *loop;
 522   bool changed;
 523   enum unroll_level ul;
 524   int iteration = 0;
 525
 526   do
 527     {
 528       changed = false;
 529
 530       FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
 531         {
 532           if (may_increase_size && optimize_loop_for_speed_p (loop)
 533               /* Unroll outermost loops only if asked to do so or they do
 534                  not cause code growth.  */
 535               && (unroll_outer
 536                   || loop_outer (loop_outer (loop))))
 537             ul = UL_ALL;
 538           else
 539             ul = UL_NO_GROWTH;
 540           changed |= canonicalize_loop_induction_variables
 541                        (loop, false, ul, !flag_tree_loop_ivcanon);
 542         }
 543
 544       if (changed)
 545         {
 546           /* This will take care of removing completely unrolled loops
 547              from the loop structures so we can continue unrolling now
 548              innermost loops.  */
 549           if (cleanup_tree_cfg ())
 550             update_ssa (TODO_update_ssa_only_virtuals);
 551
 552           /* Clean up the information about numbers of iterations, since
 553              complete unrolling might have invalidated it.  */
 554           scev_reset ();
 555         }
 556     }
 557   while (changed
 558          && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
 559
 560   return 0;
 561 }