gcc/tree-ssa-loop-ivcanon.c

   1 /* Induction variable canonicalization.
   2    Copyright (C) 2004, 2005, 2007, 2008, 2010
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass detects the loops that iterate a constant number of times,
  22    adds a canonical induction variable (step -1, tested against 0)
  23    and replaces the exit test.  This enables the less powerful rtl
  24    level analysis to use this information.
  25
  26    This might spoil the code in some cases (by increasing register pressure).
  27    Note that in the case the new variable is not needed, ivopts will get rid
  28    of it, so it might only be a problem when there are no other linear induction
  29    variables.  In that case the created optimization possibilities are likely
  30    to pay up.
  31
  32    Additionally in case we detect that it is beneficial to unroll the
  33    loop completely, we do it right here to expose the optimization
  34    possibilities to the following passes.  */
  35
  36 #include "config.h"
  37 #include "system.h"
  38 #include "coretypes.h"
  39 #include "tm.h"
  40 #include "tree.h"
  41 #include "tm_p.h"
  42 #include "basic-block.h"
  43 #include "tree-pretty-print.h"
  44 #include "gimple-pretty-print.h"
  45 #include "tree-flow.h"
  46 #include "tree-dump.h"
  47 #include "cfgloop.h"
  48 #include "tree-pass.h"
  49 #include "tree-chrec.h"
  50 #include "tree-scalar-evolution.h"
  51 #include "params.h"
  52 #include "flags.h"
  53 #include "tree-inline.h"
  54 #include "target.h"
  55
  56 /* Specifies types of loops that may be unrolled.  */
  57
  58 enum unroll_level
  59 {
  60   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
  61                            iteration.  */
  62   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
  63                            of code size.  */
  64   UL_ALL                /* All suitable loops.  */
  65 };
  66
  67 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
  68    is the exit edge whose condition is replaced.  */
  69
  70 static void
  71 create_canonical_iv (struct loop *loop, edge exit, tree niter)
  72 {
  73   edge in;
  74   tree type, var;
  75   gimple cond;
  76   gimple_stmt_iterator incr_at;
  77   enum tree_code cmp;
  78
  79   if (dump_file && (dump_flags & TDF_DETAILS))
  80     {
  81       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
  82       print_generic_expr (dump_file, niter, TDF_SLIM);
  83       fprintf (dump_file, " iterations.\n");
  84     }
  85
  86   cond = last_stmt (exit->src);
  87   in = EDGE_SUCC (exit->src, 0);
  88   if (in == exit)
  89     in = EDGE_SUCC (exit->src, 1);
  90
  91   /* Note that we do not need to worry about overflows, since
  92      type of niter is always unsigned and all comparisons are
  93      just for equality/nonequality -- i.e. everything works
  94      with a modulo arithmetics.  */
  95
  96   type = TREE_TYPE (niter);
  97   niter = fold_build2 (PLUS_EXPR, type,
  98                        niter,
  99                        build_int_cst (type, 1));
 100   incr_at = gsi_last_bb (in->src);
 101   create_iv (niter,
 102              build_int_cst (type, -1),
 103              NULL_TREE, loop,
 104              &incr_at, false, NULL, &var);
 105
 106   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 107   gimple_cond_set_code (cond, cmp);
 108   gimple_cond_set_lhs (cond, var);
 109   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 110   update_stmt (cond);
 111 }
 112
 113 /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.  */
 114
 115 unsigned
 116 tree_num_loop_insns (struct loop *loop, eni_weights *weights)
 117 {
 118   basic_block *body = get_loop_body (loop);
 119   gimple_stmt_iterator gsi;
 120   unsigned size = 0, i;
 121
 122   for (i = 0; i < loop->num_nodes; i++)
 123     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 124       size += estimate_num_insns (gsi_stmt (gsi), weights);
 125   free (body);
 126
 127   return size;
 128 }
 129
 130 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 131 struct loop_size
 132 {
 133   /* Number of instructions in the loop.  */
 134   int overall;
 135
 136   /* Number of instructions that will be likely optimized out in
 137      peeled iterations of loop  (i.e. computation based on induction
 138      variable where induction variable starts at known constant.)  */
 139   int eliminated_by_peeling;
 140
 141   /* Same statistics for last iteration of loop: it is smaller because
 142      instructions after exit are not executed.  */
 143   int last_iteration;
 144   int last_iteration_eliminated_by_peeling;
 145 };
 146
 147 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 148
 149 static bool
 150 constant_after_peeling (tree op, gimple stmt, struct loop *loop)
 151 {
 152   affine_iv iv;
 153
 154   if (is_gimple_min_invariant (op))
 155     return true;
 156
 157   /* We can still fold accesses to constant arrays when index is known.  */
 158   if (TREE_CODE (op) != SSA_NAME)
 159     {
 160       tree base = op;
 161
 162       /* First make fast look if we see constant array inside.  */
 163       while (handled_component_p (base))
 164         base = TREE_OPERAND (base, 0);
 165       if ((DECL_P (base)
 166            && TREE_STATIC (base)
 167            && TREE_READONLY (base)
 168            && (DECL_INITIAL (base)
 169                || (!DECL_EXTERNAL (base)
 170                    && targetm.binds_local_p (base))))
 171           || CONSTANT_CLASS_P (base))
 172         {
 173           /* If so, see if we understand all the indices.  */
 174           base = op;
 175           while (handled_component_p (base))
 176             {
 177               if (TREE_CODE (base) == ARRAY_REF
 178                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
 179                 return false;
 180               base = TREE_OPERAND (base, 0);
 181             }
 182           return true;
 183         }
 184       return false;
 185     }
 186
 187   /* Induction variables are constants.  */
 188   if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
 189     return false;
 190   if (!is_gimple_min_invariant (iv.base))
 191     return false;
 192   if (!is_gimple_min_invariant (iv.step))
 193     return false;
 194   return true;
 195 }
 196
 197 /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.
 198    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.  */
 199
 200 static void
 201 tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size)
 202 {
 203   basic_block *body = get_loop_body (loop);
 204   gimple_stmt_iterator gsi;
 205   unsigned int i;
 206   bool after_exit;
 207
 208   size->overall = 0;
 209   size->eliminated_by_peeling = 0;
 210   size->last_iteration = 0;
 211   size->last_iteration_eliminated_by_peeling = 0;
 212
 213   if (dump_file && (dump_flags & TDF_DETAILS))
 214     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
 215   for (i = 0; i < loop->num_nodes; i++)
 216     {
 217       if (exit && body[i] != exit->src
 218           && dominated_by_p (CDI_DOMINATORS, body[i], exit->src))
 219         after_exit = true;
 220       else
 221         after_exit = false;
 222       if (dump_file && (dump_flags & TDF_DETAILS))
 223         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);
 224
 225       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 226         {
 227           gimple stmt = gsi_stmt (gsi);
 228           int num = estimate_num_insns (stmt, &eni_size_weights);
 229           bool likely_eliminated = false;
 230
 231           if (dump_file && (dump_flags & TDF_DETAILS))
 232             {
 233               fprintf (dump_file, "  size: %3i ", num);
 234               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
 235             }
 236
 237           /* Look for reasons why we might optimize this stmt away. */
 238
 239           /* Exit conditional.  */
 240           if (body[i] == exit->src && stmt == last_stmt (exit->src))
 241             {
 242               if (dump_file && (dump_flags & TDF_DETAILS))
 243                 fprintf (dump_file, "   Exit condition will be eliminated.\n");
 244               likely_eliminated = true;
 245             }
 246           /* Sets of IV variables  */
 247           else if (gimple_code (stmt) == GIMPLE_ASSIGN
 248               && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
 249             {
 250               if (dump_file && (dump_flags & TDF_DETAILS))
 251                 fprintf (dump_file, "   Induction variable computation will"
 252                          " be folded away.\n");
 253               likely_eliminated = true;
 254             }
 255           /* Assignments of IV variables.  */
 256           else if (gimple_code (stmt) == GIMPLE_ASSIGN
 257                    && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
 258                    && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt,loop)
 259                    && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
 260                        || constant_after_peeling (gimple_assign_rhs2 (stmt),
 261                                                   stmt, loop)))
 262             {
 263               if (dump_file && (dump_flags & TDF_DETAILS))
 264                 fprintf (dump_file, "   Constant expression will be folded away.\n");
 265               likely_eliminated = true;
 266             }
 267           /* Conditionals.  */
 268           else if (gimple_code (stmt) == GIMPLE_COND
 269                    && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 270                    && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
 271             {
 272               if (dump_file && (dump_flags & TDF_DETAILS))
 273                 fprintf (dump_file, "   Constant conditional.\n");
 274               likely_eliminated = true;
 275             }
 276
 277           size->overall += num;
 278           if (likely_eliminated)
 279             size->eliminated_by_peeling += num;
 280           if (!after_exit)
 281             {
 282               size->last_iteration += num;
 283               if (likely_eliminated)
 284                 size->last_iteration_eliminated_by_peeling += num;
 285             }
 286         }
 287     }
 288   if (dump_file && (dump_flags & TDF_DETAILS))
 289     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
 290              size->eliminated_by_peeling, size->last_iteration,
 291              size->last_iteration_eliminated_by_peeling);
 292
 293   free (body);
 294 }
 295
 296 /* Estimate number of insns of completely unrolled loop.
 297    It is (NUNROLL + 1) * size of loop body with taking into account
 298    the fact that in last copy everything after exit conditional
 299    is dead and that some instructions will be eliminated after
 300    peeling.
 301
 302    Loop body is likely going to simplify futher, this is difficult
 303    to guess, we just decrease the result by 1/3.  */
 304
 305 static unsigned HOST_WIDE_INT
 306 estimated_unrolled_size (struct loop_size *size,
 307                          unsigned HOST_WIDE_INT nunroll)
 308 {
 309   HOST_WIDE_INT unr_insns = ((nunroll)
 310                              * (HOST_WIDE_INT) (size->overall
 311                                                 - size->eliminated_by_peeling));
 312   if (!nunroll)
 313     unr_insns = 0;
 314   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
 315
 316   unr_insns = unr_insns * 2 / 3;
 317   if (unr_insns <= 0)
 318     unr_insns = 1;
 319
 320   return unr_insns;
 321 }
 322
 323 /* Tries to unroll LOOP completely, i.e. NITER times.
 324    UL determines which loops we are allowed to unroll.
 325    EXIT is the exit of the loop that should be eliminated.  */
 326
 327 static bool
 328 try_unroll_loop_completely (struct loop *loop,
 329                             edge exit, tree niter,
 330                             enum unroll_level ul)
 331 {
 332   unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
 333   gimple cond;
 334   struct loop_size size;
 335
 336   if (loop->inner)
 337     return false;
 338
 339   if (!host_integerp (niter, 1))
 340     return false;
 341   n_unroll = tree_low_cst (niter, 1);
 342
 343   max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 344   if (n_unroll > max_unroll)
 345     return false;
 346
 347   if (n_unroll)
 348     {
 349       if (ul == UL_SINGLE_ITER)
 350         return false;
 351
 352       tree_estimate_loop_size (loop, exit, &size);
 353       ninsns = size.overall;
 354
 355       unr_insns = estimated_unrolled_size (&size, n_unroll);
 356       if (dump_file && (dump_flags & TDF_DETAILS))
 357         {
 358           fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
 359           fprintf (dump_file, "  Estimated size after unrolling: %d\n",
 360                    (int) unr_insns);
 361         }
 362
 363       if (unr_insns > ninsns
 364           && (unr_insns
 365               > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)))
 366         {
 367           if (dump_file && (dump_flags & TDF_DETAILS))
 368             fprintf (dump_file, "Not unrolling loop %d "
 369                      "(--param max-completely-peeled-insns limit reached).\n",
 370                      loop->num);
 371           return false;
 372         }
 373
 374       if (ul == UL_NO_GROWTH
 375           && unr_insns > ninsns)
 376         {
 377           if (dump_file && (dump_flags & TDF_DETAILS))
 378             fprintf (dump_file, "Not unrolling loop %d.\n", loop->num);
 379           return false;
 380         }
 381     }
 382
 383   if (n_unroll)
 384     {
 385       sbitmap wont_exit;
 386       edge e;
 387       unsigned i;
 388       VEC (edge, heap) *to_remove = NULL;
 389
 390       initialize_original_copy_tables ();
 391       wont_exit = sbitmap_alloc (n_unroll + 1);
 392       sbitmap_ones (wont_exit);
 393       RESET_BIT (wont_exit, 0);
 394
 395       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 396                                                  n_unroll, wont_exit,
 397                                                  exit, &to_remove,
 398                                                  DLTHE_FLAG_UPDATE_FREQ
 399                                                  | DLTHE_FLAG_COMPLETTE_PEEL))
 400         {
 401           free_original_copy_tables ();
 402           free (wont_exit);
 403           return false;
 404         }
 405
 406       FOR_EACH_VEC_ELT (edge, to_remove, i, e)
 407         {
 408           bool ok = remove_path (e);
 409           gcc_assert (ok);
 410         }
 411
 412       VEC_free (edge, heap, to_remove);
 413       free (wont_exit);
 414       free_original_copy_tables ();
 415     }
 416
 417   cond = last_stmt (exit->src);
 418   if (exit->flags & EDGE_TRUE_VALUE)
 419     gimple_cond_make_true (cond);
 420   else
 421     gimple_cond_make_false (cond);
 422   update_stmt (cond);
 423   update_ssa (TODO_update_ssa);
 424
 425   if (dump_file && (dump_flags & TDF_DETAILS))
 426     fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num);
 427
 428   return true;
 429 }
 430
 431 /* Adds a canonical induction variable to LOOP if suitable.
 432    CREATE_IV is true if we may create a new iv.  UL determines
 433    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
 434    to determine the number of iterations of a loop by direct evaluation.
 435    Returns true if cfg is changed.  */
 436
 437 static bool
 438 canonicalize_loop_induction_variables (struct loop *loop,
 439                                        bool create_iv, enum unroll_level ul,
 440                                        bool try_eval)
 441 {
 442   edge exit = NULL;
 443   tree niter;
 444
 445   niter = number_of_latch_executions (loop);
 446   if (TREE_CODE (niter) == INTEGER_CST)
 447     {
 448       exit = single_exit (loop);
 449       if (!just_once_each_iteration_p (loop, exit->src))
 450         return false;
 451     }
 452   else
 453     {
 454       /* If the loop has more than one exit, try checking all of them
 455          for # of iterations determinable through scev.  */
 456       if (!single_exit (loop))
 457         niter = find_loop_niter (loop, &exit);
 458
 459       /* Finally if everything else fails, try brute force evaluation.  */
 460       if (try_eval
 461           && (chrec_contains_undetermined (niter)
 462               || TREE_CODE (niter) != INTEGER_CST))
 463         niter = find_loop_niter_by_eval (loop, &exit);
 464
 465       if (chrec_contains_undetermined (niter)
 466           || TREE_CODE (niter) != INTEGER_CST)
 467         return false;
 468     }
 469
 470   if (dump_file && (dump_flags & TDF_DETAILS))
 471     {
 472       fprintf (dump_file, "Loop %d iterates ", loop->num);
 473       print_generic_expr (dump_file, niter, TDF_SLIM);
 474       fprintf (dump_file, " times.\n");
 475     }
 476
 477   if (try_unroll_loop_completely (loop, exit, niter, ul))
 478     return true;
 479
 480   if (create_iv)
 481     create_canonical_iv (loop, exit, niter);
 482
 483   return false;
 484 }
 485
 486 /* The main entry point of the pass.  Adds canonical induction variables
 487    to the suitable loops.  */
 488
 489 unsigned int
 490 canonicalize_induction_variables (void)
 491 {
 492   loop_iterator li;
 493   struct loop *loop;
 494   bool changed = false;
 495
 496   FOR_EACH_LOOP (li, loop, 0)
 497     {
 498       changed |= canonicalize_loop_induction_variables (loop,
 499                                                         true, UL_SINGLE_ITER,
 500                                                         true);
 501     }
 502
 503   /* Clean up the information about numbers of iterations, since brute force
 504      evaluation could reveal new information.  */
 505   scev_reset ();
 506
 507   if (changed)
 508     return TODO_cleanup_cfg;
 509   return 0;
 510 }
 511
 512 /* Unroll LOOPS completely if they iterate just few times.  Unless
 513    MAY_INCREASE_SIZE is true, perform the unrolling only if the
 514    size of the code does not increase.  */
 515
 516 unsigned int
 517 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
 518 {
 519   loop_iterator li;
 520   struct loop *loop;
 521   bool changed;
 522   enum unroll_level ul;
 523   int iteration = 0;
 524
 525   do
 526     {
 527       changed = false;
 528
 529       FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
 530         {
 531           if (may_increase_size && optimize_loop_for_speed_p (loop)
 532               /* Unroll outermost loops only if asked to do so or they do
 533                  not cause code growth.  */
 534               && (unroll_outer
 535                   || loop_outer (loop_outer (loop))))
 536             ul = UL_ALL;
 537           else
 538             ul = UL_NO_GROWTH;
 539           changed |= canonicalize_loop_induction_variables
 540                        (loop, false, ul, !flag_tree_loop_ivcanon);
 541         }
 542
 543       if (changed)
 544         {
 545           /* This will take care of removing completely unrolled loops
 546              from the loop structures so we can continue unrolling now
 547              innermost loops.  */
 548           if (cleanup_tree_cfg ())
 549             update_ssa (TODO_update_ssa_only_virtuals);
 550
 551           /* Clean up the information about numbers of iterations, since
 552              complete unrolling might have invalidated it.  */
 553           scev_reset ();
 554         }
 555     }
 556   while (changed
 557          && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
 558
 559   return 0;
 560 }