gcc/tree-ssa-loop-ivcanon.c

   1 /* Induction variable canonicalization.
   2    Copyright (C) 2004, 2005, 2007, 2008, 2010
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass detects the loops that iterate a constant number of times,
  22    adds a canonical induction variable (step -1, tested against 0)
  23    and replaces the exit test.  This enables the less powerful rtl
  24    level analysis to use this information.
  25
  26    This might spoil the code in some cases (by increasing register pressure).
  27    Note that in the case the new variable is not needed, ivopts will get rid
  28    of it, so it might only be a problem when there are no other linear induction
  29    variables.  In that case the created optimization possibilities are likely
  30    to pay up.
  31
  32    Additionally in case we detect that it is beneficial to unroll the
  33    loop completely, we do it right here to expose the optimization
  34    possibilities to the following passes.  */
  35
  36 #include "config.h"
  37 #include "system.h"
  38 #include "coretypes.h"
  39 #include "tm.h"
  40 #include "tree.h"
  41 #include "tm_p.h"
  42 #include "basic-block.h"
  43 #include "tree-pretty-print.h"
  44 #include "gimple-pretty-print.h"
  45 #include "tree-flow.h"
  46 #include "tree-dump.h"
  47 #include "cfgloop.h"
  48 #include "tree-pass.h"
  49 #include "tree-chrec.h"
  50 #include "tree-scalar-evolution.h"
  51 #include "params.h"
  52 #include "flags.h"
  53 #include "tree-inline.h"
  54 #include "target.h"
  55
  56 /* Specifies types of loops that may be unrolled.  */
  57
  58 enum unroll_level
  59 {
  60   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
  61                            iteration.  */
  62   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
  63                            of code size.  */
  64   UL_ALL                /* All suitable loops.  */
  65 };
  66
  67 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
  68    is the exit edge whose condition is replaced.  */
  69
  70 static void
  71 create_canonical_iv (struct loop *loop, edge exit, tree niter)
  72 {
  73   edge in;
  74   tree type, var;
  75   gimple cond;
  76   gimple_stmt_iterator incr_at;
  77   enum tree_code cmp;
  78
  79   if (dump_file && (dump_flags & TDF_DETAILS))
  80     {
  81       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
  82       print_generic_expr (dump_file, niter, TDF_SLIM);
  83       fprintf (dump_file, " iterations.\n");
  84     }
  85
  86   cond = last_stmt (exit->src);
  87   in = EDGE_SUCC (exit->src, 0);
  88   if (in == exit)
  89     in = EDGE_SUCC (exit->src, 1);
  90
  91   /* Note that we do not need to worry about overflows, since
  92      type of niter is always unsigned and all comparisons are
  93      just for equality/nonequality -- i.e. everything works
  94      with a modulo arithmetics.  */
  95
  96   type = TREE_TYPE (niter);
  97   niter = fold_build2 (PLUS_EXPR, type,
  98                        niter,
  99                        build_int_cst (type, 1));
 100   incr_at = gsi_last_bb (in->src);
 101   create_iv (niter,
 102              build_int_cst (type, -1),
 103              NULL_TREE, loop,
 104              &incr_at, false, NULL, &var);
 105
 106   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 107   gimple_cond_set_code (cond, cmp);
 108   gimple_cond_set_lhs (cond, var);
 109   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 110   update_stmt (cond);
 111 }
 112
 113 /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.  */
 114
 115 unsigned
 116 tree_num_loop_insns (struct loop *loop, eni_weights *weights)
 117 {
 118   basic_block *body = get_loop_body (loop);
 119   gimple_stmt_iterator gsi;
 120   unsigned size = 0, i;
 121
 122   for (i = 0; i < loop->num_nodes; i++)
 123     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 124       size += estimate_num_insns (gsi_stmt (gsi), weights);
 125   free (body);
 126
 127   return size;
 128 }
 129
 130 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 131 struct loop_size
 132 {
 133   /* Number of instructions in the loop.  */
 134   int overall;
 135
 136   /* Number of instructions that will be likely optimized out in
 137      peeled iterations of loop  (i.e. computation based on induction
 138      variable where induction variable starts at known constant.)  */
 139   int eliminated_by_peeling;
 140
 141   /* Same statistics for last iteration of loop: it is smaller because
 142      instructions after exit are not executed.  */
 143   int last_iteration;
 144   int last_iteration_eliminated_by_peeling;
 145 };
 146
 147 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 148
 149 static bool
 150 constant_after_peeling (tree op, gimple stmt, struct loop *loop)
 151 {
 152   affine_iv iv;
 153
 154   if (is_gimple_min_invariant (op))
 155     return true;
 156
 157   /* We can still fold accesses to constant arrays when index is known.  */
 158   if (TREE_CODE (op) != SSA_NAME)
 159     {
 160       tree base = op;
 161
 162       /* First make fast look if we see constant array inside.  */
 163       while (handled_component_p (base))
 164         base = TREE_OPERAND (base, 0);
 165       if ((DECL_P (base)
 166            && TREE_STATIC (base)
 167            && TREE_READONLY (base)
 168            && varpool_get_node (base)->const_value_known)
 169           || CONSTANT_CLASS_P (base))
 170         {
 171           /* If so, see if we understand all the indices.  */
 172           base = op;
 173           while (handled_component_p (base))
 174             {
 175               if (TREE_CODE (base) == ARRAY_REF
 176                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
 177                 return false;
 178               base = TREE_OPERAND (base, 0);
 179             }
 180           return true;
 181         }
 182       return false;
 183     }
 184
 185   /* Induction variables are constants.  */
 186   if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
 187     return false;
 188   if (!is_gimple_min_invariant (iv.base))
 189     return false;
 190   if (!is_gimple_min_invariant (iv.step))
 191     return false;
 192   return true;
 193 }
 194
 195 /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.
 196    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.  */
 197
 198 static void
 199 tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size)
 200 {
 201   basic_block *body = get_loop_body (loop);
 202   gimple_stmt_iterator gsi;
 203   unsigned int i;
 204   bool after_exit;
 205
 206   size->overall = 0;
 207   size->eliminated_by_peeling = 0;
 208   size->last_iteration = 0;
 209   size->last_iteration_eliminated_by_peeling = 0;
 210
 211   if (dump_file && (dump_flags & TDF_DETAILS))
 212     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
 213   for (i = 0; i < loop->num_nodes; i++)
 214     {
 215       if (exit && body[i] != exit->src
 216           && dominated_by_p (CDI_DOMINATORS, body[i], exit->src))
 217         after_exit = true;
 218       else
 219         after_exit = false;
 220       if (dump_file && (dump_flags & TDF_DETAILS))
 221         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);
 222
 223       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 224         {
 225           gimple stmt = gsi_stmt (gsi);
 226           int num = estimate_num_insns (stmt, &eni_size_weights);
 227           bool likely_eliminated = false;
 228
 229           if (dump_file && (dump_flags & TDF_DETAILS))
 230             {
 231               fprintf (dump_file, "  size: %3i ", num);
 232               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
 233             }
 234
 235           /* Look for reasons why we might optimize this stmt away. */
 236
 237           /* Exit conditional.  */
 238           if (body[i] == exit->src && stmt == last_stmt (exit->src))
 239             {
 240               if (dump_file && (dump_flags & TDF_DETAILS))
 241                 fprintf (dump_file, "   Exit condition will be eliminated.\n");
 242               likely_eliminated = true;
 243             }
 244           /* Sets of IV variables  */
 245           else if (gimple_code (stmt) == GIMPLE_ASSIGN
 246               && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
 247             {
 248               if (dump_file && (dump_flags & TDF_DETAILS))
 249                 fprintf (dump_file, "   Induction variable computation will"
 250                          " be folded away.\n");
 251               likely_eliminated = true;
 252             }
 253           /* Assignments of IV variables.  */
 254           else if (gimple_code (stmt) == GIMPLE_ASSIGN
 255                    && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
 256                    && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt,loop)
 257                    && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
 258                        || constant_after_peeling (gimple_assign_rhs2 (stmt),
 259                                                   stmt, loop)))
 260             {
 261               if (dump_file && (dump_flags & TDF_DETAILS))
 262                 fprintf (dump_file, "   Constant expression will be folded away.\n");
 263               likely_eliminated = true;
 264             }
 265           /* Conditionals.  */
 266           else if (gimple_code (stmt) == GIMPLE_COND
 267                    && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 268                    && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
 269             {
 270               if (dump_file && (dump_flags & TDF_DETAILS))
 271                 fprintf (dump_file, "   Constant conditional.\n");
 272               likely_eliminated = true;
 273             }
 274
 275           size->overall += num;
 276           if (likely_eliminated)
 277             size->eliminated_by_peeling += num;
 278           if (!after_exit)
 279             {
 280               size->last_iteration += num;
 281               if (likely_eliminated)
 282                 size->last_iteration_eliminated_by_peeling += num;
 283             }
 284         }
 285     }
 286   if (dump_file && (dump_flags & TDF_DETAILS))
 287     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
 288              size->eliminated_by_peeling, size->last_iteration,
 289              size->last_iteration_eliminated_by_peeling);
 290
 291   free (body);
 292 }
 293
 294 /* Estimate number of insns of completely unrolled loop.
 295    It is (NUNROLL + 1) * size of loop body with taking into account
 296    the fact that in last copy everything after exit conditional
 297    is dead and that some instructions will be eliminated after
 298    peeling.
 299
 300    Loop body is likely going to simplify futher, this is difficult
 301    to guess, we just decrease the result by 1/3.  */
 302
 303 static unsigned HOST_WIDE_INT
 304 estimated_unrolled_size (struct loop_size *size,
 305                          unsigned HOST_WIDE_INT nunroll)
 306 {
 307   HOST_WIDE_INT unr_insns = ((nunroll)
 308                              * (HOST_WIDE_INT) (size->overall
 309                                                 - size->eliminated_by_peeling));
 310   if (!nunroll)
 311     unr_insns = 0;
 312   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
 313
 314   unr_insns = unr_insns * 2 / 3;
 315   if (unr_insns <= 0)
 316     unr_insns = 1;
 317
 318   return unr_insns;
 319 }
 320
 321 /* Tries to unroll LOOP completely, i.e. NITER times.
 322    UL determines which loops we are allowed to unroll.
 323    EXIT is the exit of the loop that should be eliminated.  */
 324
 325 static bool
 326 try_unroll_loop_completely (struct loop *loop,
 327                             edge exit, tree niter,
 328                             enum unroll_level ul)
 329 {
 330   unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
 331   gimple cond;
 332   struct loop_size size;
 333
 334   if (loop->inner)
 335     return false;
 336
 337   if (!host_integerp (niter, 1))
 338     return false;
 339   n_unroll = tree_low_cst (niter, 1);
 340
 341   max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 342   if (n_unroll > max_unroll)
 343     return false;
 344
 345   if (n_unroll)
 346     {
 347       if (ul == UL_SINGLE_ITER)
 348         return false;
 349
 350       tree_estimate_loop_size (loop, exit, &size);
 351       ninsns = size.overall;
 352
 353       unr_insns = estimated_unrolled_size (&size, n_unroll);
 354       if (dump_file && (dump_flags & TDF_DETAILS))
 355         {
 356           fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
 357           fprintf (dump_file, "  Estimated size after unrolling: %d\n",
 358                    (int) unr_insns);
 359         }
 360
 361       if (unr_insns > ninsns
 362           && (unr_insns
 363               > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)))
 364         {
 365           if (dump_file && (dump_flags & TDF_DETAILS))
 366             fprintf (dump_file, "Not unrolling loop %d "
 367                      "(--param max-completely-peeled-insns limit reached).\n",
 368                      loop->num);
 369           return false;
 370         }
 371
 372       if (ul == UL_NO_GROWTH
 373           && unr_insns > ninsns)
 374         {
 375           if (dump_file && (dump_flags & TDF_DETAILS))
 376             fprintf (dump_file, "Not unrolling loop %d.\n", loop->num);
 377           return false;
 378         }
 379     }
 380
 381   if (n_unroll)
 382     {
 383       sbitmap wont_exit;
 384       edge e;
 385       unsigned i;
 386       VEC (edge, heap) *to_remove = NULL;
 387
 388       initialize_original_copy_tables ();
 389       wont_exit = sbitmap_alloc (n_unroll + 1);
 390       sbitmap_ones (wont_exit);
 391       RESET_BIT (wont_exit, 0);
 392
 393       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 394                                                  n_unroll, wont_exit,
 395                                                  exit, &to_remove,
 396                                                  DLTHE_FLAG_UPDATE_FREQ
 397                                                  | DLTHE_FLAG_COMPLETTE_PEEL))
 398         {
 399           free_original_copy_tables ();
 400           free (wont_exit);
 401           return false;
 402         }
 403
 404       FOR_EACH_VEC_ELT (edge, to_remove, i, e)
 405         {
 406           bool ok = remove_path (e);
 407           gcc_assert (ok);
 408         }
 409
 410       VEC_free (edge, heap, to_remove);
 411       free (wont_exit);
 412       free_original_copy_tables ();
 413     }
 414
 415   cond = last_stmt (exit->src);
 416   if (exit->flags & EDGE_TRUE_VALUE)
 417     gimple_cond_make_true (cond);
 418   else
 419     gimple_cond_make_false (cond);
 420   update_stmt (cond);
 421   update_ssa (TODO_update_ssa);
 422
 423   if (dump_file && (dump_flags & TDF_DETAILS))
 424     fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num);
 425
 426   return true;
 427 }
 428
 429 /* Adds a canonical induction variable to LOOP if suitable.
 430    CREATE_IV is true if we may create a new iv.  UL determines
 431    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
 432    to determine the number of iterations of a loop by direct evaluation.
 433    Returns true if cfg is changed.  */
 434
 435 static bool
 436 canonicalize_loop_induction_variables (struct loop *loop,
 437                                        bool create_iv, enum unroll_level ul,
 438                                        bool try_eval)
 439 {
 440   edge exit = NULL;
 441   tree niter;
 442
 443   niter = number_of_latch_executions (loop);
 444   if (TREE_CODE (niter) == INTEGER_CST)
 445     {
 446       exit = single_exit (loop);
 447       if (!just_once_each_iteration_p (loop, exit->src))
 448         return false;
 449     }
 450   else
 451     {
 452       /* If the loop has more than one exit, try checking all of them
 453          for # of iterations determinable through scev.  */
 454       if (!single_exit (loop))
 455         niter = find_loop_niter (loop, &exit);
 456
 457       /* Finally if everything else fails, try brute force evaluation.  */
 458       if (try_eval
 459           && (chrec_contains_undetermined (niter)
 460               || TREE_CODE (niter) != INTEGER_CST))
 461         niter = find_loop_niter_by_eval (loop, &exit);
 462
 463       if (chrec_contains_undetermined (niter)
 464           || TREE_CODE (niter) != INTEGER_CST)
 465         return false;
 466     }
 467
 468   if (dump_file && (dump_flags & TDF_DETAILS))
 469     {
 470       fprintf (dump_file, "Loop %d iterates ", loop->num);
 471       print_generic_expr (dump_file, niter, TDF_SLIM);
 472       fprintf (dump_file, " times.\n");
 473     }
 474
 475   if (try_unroll_loop_completely (loop, exit, niter, ul))
 476     return true;
 477
 478   if (create_iv)
 479     create_canonical_iv (loop, exit, niter);
 480
 481   return false;
 482 }
 483
 484 /* The main entry point of the pass.  Adds canonical induction variables
 485    to the suitable loops.  */
 486
 487 unsigned int
 488 canonicalize_induction_variables (void)
 489 {
 490   loop_iterator li;
 491   struct loop *loop;
 492   bool changed = false;
 493
 494   FOR_EACH_LOOP (li, loop, 0)
 495     {
 496       changed |= canonicalize_loop_induction_variables (loop,
 497                                                         true, UL_SINGLE_ITER,
 498                                                         true);
 499     }
 500
 501   /* Clean up the information about numbers of iterations, since brute force
 502      evaluation could reveal new information.  */
 503   scev_reset ();
 504
 505   if (changed)
 506     return TODO_cleanup_cfg;
 507   return 0;
 508 }
 509
 510 /* Unroll LOOPS completely if they iterate just few times.  Unless
 511    MAY_INCREASE_SIZE is true, perform the unrolling only if the
 512    size of the code does not increase.  */
 513
 514 unsigned int
 515 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
 516 {
 517   loop_iterator li;
 518   struct loop *loop;
 519   bool changed;
 520   enum unroll_level ul;
 521   int iteration = 0;
 522
 523   do
 524     {
 525       changed = false;
 526
 527       FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
 528         {
 529           if (may_increase_size && optimize_loop_for_speed_p (loop)
 530               /* Unroll outermost loops only if asked to do so or they do
 531                  not cause code growth.  */
 532               && (unroll_outer
 533                   || loop_outer (loop_outer (loop))))
 534             ul = UL_ALL;
 535           else
 536             ul = UL_NO_GROWTH;
 537           changed |= canonicalize_loop_induction_variables
 538                        (loop, false, ul, !flag_tree_loop_ivcanon);
 539         }
 540
 541       if (changed)
 542         {
 543           /* This will take care of removing completely unrolled loops
 544              from the loop structures so we can continue unrolling now
 545              innermost loops.  */
 546           if (cleanup_tree_cfg ())
 547             update_ssa (TODO_update_ssa_only_virtuals);
 548
 549           /* Clean up the information about numbers of iterations, since
 550              complete unrolling might have invalidated it.  */
 551           scev_reset ();
 552         }
 553     }
 554   while (changed
 555          && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
 556
 557   return 0;
 558 }