gcc/tree-ssa-loop-ivcanon.c

   1 /* Induction variable canonicalization.
   2    Copyright (C) 2004, 2005, 2007, 2008, 2010
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass detects the loops that iterate a constant number of times,
  22    adds a canonical induction variable (step -1, tested against 0)
  23    and replaces the exit test.  This enables the less powerful rtl
  24    level analysis to use this information.
  25
  26    This might spoil the code in some cases (by increasing register pressure).
  27    Note that in the case the new variable is not needed, ivopts will get rid
  28    of it, so it might only be a problem when there are no other linear induction
  29    variables.  In that case the created optimization possibilities are likely
  30    to pay up.
  31
  32    Additionally in case we detect that it is beneficial to unroll the
  33    loop completely, we do it right here to expose the optimization
  34    possibilities to the following passes.  */
  35
  36 #include "config.h"
  37 #include "system.h"
  38 #include "coretypes.h"
  39 #include "tm.h"
  40 #include "tree.h"
  41 #include "tm_p.h"
  42 #include "basic-block.h"
  43 #include "diagnostic.h"
  44 #include "tree-flow.h"
  45 #include "tree-dump.h"
  46 #include "cfgloop.h"
  47 #include "tree-pass.h"
  48 #include "tree-chrec.h"
  49 #include "tree-scalar-evolution.h"
  50 #include "params.h"
  51 #include "flags.h"
  52 #include "tree-inline.h"
  53 #include "target.h"
  54
  55 /* Specifies types of loops that may be unrolled.  */
  56
  57 enum unroll_level
  58 {
  59   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
  60                            iteration.  */
  61   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
  62                            of code size.  */
  63   UL_ALL                /* All suitable loops.  */
  64 };
  65
  66 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
  67    is the exit edge whose condition is replaced.  */
  68
  69 static void
  70 create_canonical_iv (struct loop *loop, edge exit, tree niter)
  71 {
  72   edge in;
  73   tree type, var;
  74   gimple cond;
  75   gimple_stmt_iterator incr_at;
  76   enum tree_code cmp;
  77
  78   if (dump_file && (dump_flags & TDF_DETAILS))
  79     {
  80       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
  81       print_generic_expr (dump_file, niter, TDF_SLIM);
  82       fprintf (dump_file, " iterations.\n");
  83     }
  84
  85   cond = last_stmt (exit->src);
  86   in = EDGE_SUCC (exit->src, 0);
  87   if (in == exit)
  88     in = EDGE_SUCC (exit->src, 1);
  89
  90   /* Note that we do not need to worry about overflows, since
  91      type of niter is always unsigned and all comparisons are
  92      just for equality/nonequality -- i.e. everything works
  93      with a modulo arithmetics.  */
  94
  95   type = TREE_TYPE (niter);
  96   niter = fold_build2 (PLUS_EXPR, type,
  97                        niter,
  98                        build_int_cst (type, 1));
  99   incr_at = gsi_last_bb (in->src);
 100   create_iv (niter,
 101              build_int_cst (type, -1),
 102              NULL_TREE, loop,
 103              &incr_at, false, NULL, &var);
 104
 105   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 106   gimple_cond_set_code (cond, cmp);
 107   gimple_cond_set_lhs (cond, var);
 108   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 109   update_stmt (cond);
 110 }
 111
 112 /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.  */
 113
 114 unsigned
 115 tree_num_loop_insns (struct loop *loop, eni_weights *weights)
 116 {
 117   basic_block *body = get_loop_body (loop);
 118   gimple_stmt_iterator gsi;
 119   unsigned size = 0, i;
 120
 121   for (i = 0; i < loop->num_nodes; i++)
 122     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 123       size += estimate_num_insns (gsi_stmt (gsi), weights);
 124   free (body);
 125
 126   return size;
 127 }
 128
 129 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 130 struct loop_size
 131 {
 132   /* Number of instructions in the loop.  */
 133   int overall;
 134
 135   /* Number of instructions that will be likely optimized out in
 136      peeled iterations of loop  (i.e. computation based on induction
 137      variable where induction variable starts at known constant.)  */
 138   int eliminated_by_peeling;
 139
 140   /* Same statistics for last iteration of loop: it is smaller because
 141      instructions after exit are not executed.  */
 142   int last_iteration;
 143   int last_iteration_eliminated_by_peeling;
 144 };
 145
 146 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 147
 148 static bool
 149 constant_after_peeling (tree op, gimple stmt, struct loop *loop)
 150 {
 151   affine_iv iv;
 152
 153   if (is_gimple_min_invariant (op))
 154     return true;
 155
 156   /* We can still fold accesses to constant arrays when index is known.  */
 157   if (TREE_CODE (op) != SSA_NAME)
 158     {
 159       tree base = op;
 160
 161       /* First make fast look if we see constant array inside.  */
 162       while (handled_component_p (base))
 163         base = TREE_OPERAND (base, 0);
 164       if ((DECL_P (base)
 165            && TREE_STATIC (base)
 166            && TREE_READONLY (base)
 167            && (DECL_INITIAL (base)
 168                || (!DECL_EXTERNAL (base)
 169                    && targetm.binds_local_p (base))))
 170           || CONSTANT_CLASS_P (base))
 171         {
 172           /* If so, see if we understand all the indices.  */
 173           base = op;
 174           while (handled_component_p (base))
 175             {
 176               if (TREE_CODE (base) == ARRAY_REF
 177                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
 178                 return false;
 179               base = TREE_OPERAND (base, 0);
 180             }
 181           return true;
 182         }
 183       return false;
 184     }
 185
 186   /* Induction variables are constants.  */
 187   if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
 188     return false;
 189   if (!is_gimple_min_invariant (iv.base))
 190     return false;
 191   if (!is_gimple_min_invariant (iv.step))
 192     return false;
 193   return true;
 194 }
 195
 196 /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.
 197    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.  */
 198
 199 static void
 200 tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size)
 201 {
 202   basic_block *body = get_loop_body (loop);
 203   gimple_stmt_iterator gsi;
 204   unsigned int i;
 205   bool after_exit;
 206
 207   size->overall = 0;
 208   size->eliminated_by_peeling = 0;
 209   size->last_iteration = 0;
 210   size->last_iteration_eliminated_by_peeling = 0;
 211
 212   if (dump_file && (dump_flags & TDF_DETAILS))
 213     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
 214   for (i = 0; i < loop->num_nodes; i++)
 215     {
 216       if (exit && body[i] != exit->src
 217           && dominated_by_p (CDI_DOMINATORS, body[i], exit->src))
 218         after_exit = true;
 219       else
 220         after_exit = false;
 221       if (dump_file && (dump_flags & TDF_DETAILS))
 222         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);
 223
 224       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 225         {
 226           gimple stmt = gsi_stmt (gsi);
 227           int num = estimate_num_insns (stmt, &eni_size_weights);
 228           bool likely_eliminated = false;
 229
 230           if (dump_file && (dump_flags & TDF_DETAILS))
 231             {
 232               fprintf (dump_file, "  size: %3i ", num);
 233               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
 234             }
 235
 236           /* Look for reasons why we might optimize this stmt away. */
 237
 238           /* Exit conditional.  */
 239           if (body[i] == exit->src && stmt == last_stmt (exit->src))
 240             {
 241               if (dump_file && (dump_flags & TDF_DETAILS))
 242                 fprintf (dump_file, "   Exit condition will be eliminated.\n");
 243               likely_eliminated = true;
 244             }
 245           /* Sets of IV variables  */
 246           else if (gimple_code (stmt) == GIMPLE_ASSIGN
 247               && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
 248             {
 249               if (dump_file && (dump_flags & TDF_DETAILS))
 250                 fprintf (dump_file, "   Induction variable computation will"
 251                          " be folded away.\n");
 252               likely_eliminated = true;
 253             }
 254           /* Assignments of IV variables.  */
 255           else if (gimple_code (stmt) == GIMPLE_ASSIGN
 256                    && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
 257                    && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt,loop)
 258                    && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
 259                        || constant_after_peeling (gimple_assign_rhs2 (stmt),
 260                                                   stmt, loop)))
 261             {
 262               if (dump_file && (dump_flags & TDF_DETAILS))
 263                 fprintf (dump_file, "   Constant expression will be folded away.\n");
 264               likely_eliminated = true;
 265             }
 266           /* Conditionals.  */
 267           else if (gimple_code (stmt) == GIMPLE_COND
 268                    && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 269                    && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
 270             {
 271               if (dump_file && (dump_flags & TDF_DETAILS))
 272                 fprintf (dump_file, "   Constant conditional.\n");
 273               likely_eliminated = true;
 274             }
 275
 276           size->overall += num;
 277           if (likely_eliminated)
 278             size->eliminated_by_peeling += num;
 279           if (!after_exit)
 280             {
 281               size->last_iteration += num;
 282               if (likely_eliminated)
 283                 size->last_iteration_eliminated_by_peeling += num;
 284             }
 285         }
 286     }
 287   if (dump_file && (dump_flags & TDF_DETAILS))
 288     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
 289              size->eliminated_by_peeling, size->last_iteration,
 290              size->last_iteration_eliminated_by_peeling);
 291
 292   free (body);
 293 }
 294
 295 /* Estimate number of insns of completely unrolled loop.
 296    It is (NUNROLL + 1) * size of loop body with taking into account
 297    the fact that in last copy everything after exit conditional
 298    is dead and that some instructions will be eliminated after
 299    peeling.
 300
 301    Loop body is likely going to simplify futher, this is difficult
 302    to guess, we just decrease the result by 1/3.  */
 303
 304 static unsigned HOST_WIDE_INT
 305 estimated_unrolled_size (struct loop_size *size,
 306                          unsigned HOST_WIDE_INT nunroll)
 307 {
 308   HOST_WIDE_INT unr_insns = ((nunroll)
 309                              * (HOST_WIDE_INT) (size->overall
 310                                                 - size->eliminated_by_peeling));
 311   if (!nunroll)
 312     unr_insns = 0;
 313   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
 314
 315   unr_insns = unr_insns * 2 / 3;
 316   if (unr_insns <= 0)
 317     unr_insns = 1;
 318
 319   return unr_insns;
 320 }
 321
 322 /* Tries to unroll LOOP completely, i.e. NITER times.
 323    UL determines which loops we are allowed to unroll.
 324    EXIT is the exit of the loop that should be eliminated.  */
 325
 326 static bool
 327 try_unroll_loop_completely (struct loop *loop,
 328                             edge exit, tree niter,
 329                             enum unroll_level ul)
 330 {
 331   unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
 332   gimple cond;
 333   struct loop_size size;
 334
 335   if (loop->inner)
 336     return false;
 337
 338   if (!host_integerp (niter, 1))
 339     return false;
 340   n_unroll = tree_low_cst (niter, 1);
 341
 342   max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 343   if (n_unroll > max_unroll)
 344     return false;
 345
 346   if (n_unroll)
 347     {
 348       if (ul == UL_SINGLE_ITER)
 349         return false;
 350
 351       tree_estimate_loop_size (loop, exit, &size);
 352       ninsns = size.overall;
 353
 354       unr_insns = estimated_unrolled_size (&size, n_unroll);
 355       if (dump_file && (dump_flags & TDF_DETAILS))
 356         {
 357           fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
 358           fprintf (dump_file, "  Estimated size after unrolling: %d\n",
 359                    (int) unr_insns);
 360         }
 361
 362       if (unr_insns > ninsns
 363           && (unr_insns
 364               > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)))
 365         {
 366           if (dump_file && (dump_flags & TDF_DETAILS))
 367             fprintf (dump_file, "Not unrolling loop %d "
 368                      "(--param max-completely-peeled-insns limit reached).\n",
 369                      loop->num);
 370           return false;
 371         }
 372
 373       if (ul == UL_NO_GROWTH
 374           && unr_insns > ninsns)
 375         {
 376           if (dump_file && (dump_flags & TDF_DETAILS))
 377             fprintf (dump_file, "Not unrolling loop %d.\n", loop->num);
 378           return false;
 379         }
 380     }
 381
 382   if (n_unroll)
 383     {
 384       sbitmap wont_exit;
 385       edge e;
 386       unsigned i;
 387       VEC (edge, heap) *to_remove = NULL;
 388
 389       initialize_original_copy_tables ();
 390       wont_exit = sbitmap_alloc (n_unroll + 1);
 391       sbitmap_ones (wont_exit);
 392       RESET_BIT (wont_exit, 0);
 393
 394       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 395                                                  n_unroll, wont_exit,
 396                                                  exit, &to_remove,
 397                                                  DLTHE_FLAG_UPDATE_FREQ
 398                                                  | DLTHE_FLAG_COMPLETTE_PEEL))
 399         {
 400           free_original_copy_tables ();
 401           free (wont_exit);
 402           return false;
 403         }
 404
 405       for (i = 0; VEC_iterate (edge, to_remove, i, e); i++)
 406         {
 407           bool ok = remove_path (e);
 408           gcc_assert (ok);
 409         }
 410
 411       VEC_free (edge, heap, to_remove);
 412       free (wont_exit);
 413       free_original_copy_tables ();
 414     }
 415
 416   cond = last_stmt (exit->src);
 417   if (exit->flags & EDGE_TRUE_VALUE)
 418     gimple_cond_make_true (cond);
 419   else
 420     gimple_cond_make_false (cond);
 421   update_stmt (cond);
 422   update_ssa (TODO_update_ssa);
 423
 424   if (dump_file && (dump_flags & TDF_DETAILS))
 425     fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num);
 426
 427   return true;
 428 }
 429
 430 /* Adds a canonical induction variable to LOOP if suitable.
 431    CREATE_IV is true if we may create a new iv.  UL determines
 432    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
 433    to determine the number of iterations of a loop by direct evaluation.
 434    Returns true if cfg is changed.  */
 435
 436 static bool
 437 canonicalize_loop_induction_variables (struct loop *loop,
 438                                        bool create_iv, enum unroll_level ul,
 439                                        bool try_eval)
 440 {
 441   edge exit = NULL;
 442   tree niter;
 443
 444   niter = number_of_latch_executions (loop);
 445   if (TREE_CODE (niter) == INTEGER_CST)
 446     {
 447       exit = single_exit (loop);
 448       if (!just_once_each_iteration_p (loop, exit->src))
 449         return false;
 450     }
 451   else
 452     {
 453       /* If the loop has more than one exit, try checking all of them
 454          for # of iterations determinable through scev.  */
 455       if (!single_exit (loop))
 456         niter = find_loop_niter (loop, &exit);
 457
 458       /* Finally if everything else fails, try brute force evaluation.  */
 459       if (try_eval
 460           && (chrec_contains_undetermined (niter)
 461               || TREE_CODE (niter) != INTEGER_CST))
 462         niter = find_loop_niter_by_eval (loop, &exit);
 463
 464       if (chrec_contains_undetermined (niter)
 465           || TREE_CODE (niter) != INTEGER_CST)
 466         return false;
 467     }
 468
 469   if (dump_file && (dump_flags & TDF_DETAILS))
 470     {
 471       fprintf (dump_file, "Loop %d iterates ", loop->num);
 472       print_generic_expr (dump_file, niter, TDF_SLIM);
 473       fprintf (dump_file, " times.\n");
 474     }
 475
 476   if (try_unroll_loop_completely (loop, exit, niter, ul))
 477     return true;
 478
 479   if (create_iv)
 480     create_canonical_iv (loop, exit, niter);
 481
 482   return false;
 483 }
 484
 485 /* The main entry point of the pass.  Adds canonical induction variables
 486    to the suitable loops.  */
 487
 488 unsigned int
 489 canonicalize_induction_variables (void)
 490 {
 491   loop_iterator li;
 492   struct loop *loop;
 493   bool changed = false;
 494
 495   FOR_EACH_LOOP (li, loop, 0)
 496     {
 497       changed |= canonicalize_loop_induction_variables (loop,
 498                                                         true, UL_SINGLE_ITER,
 499                                                         true);
 500     }
 501
 502   /* Clean up the information about numbers of iterations, since brute force
 503      evaluation could reveal new information.  */
 504   scev_reset ();
 505
 506   if (changed)
 507     return TODO_cleanup_cfg;
 508   return 0;
 509 }
 510
 511 /* Unroll LOOPS completely if they iterate just few times.  Unless
 512    MAY_INCREASE_SIZE is true, perform the unrolling only if the
 513    size of the code does not increase.  */
 514
 515 unsigned int
 516 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
 517 {
 518   loop_iterator li;
 519   struct loop *loop;
 520   bool changed;
 521   enum unroll_level ul;
 522   int iteration = 0;
 523
 524   do
 525     {
 526       changed = false;
 527
 528       FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
 529         {
 530           if (may_increase_size && optimize_loop_for_speed_p (loop)
 531               /* Unroll outermost loops only if asked to do so or they do
 532                  not cause code growth.  */
 533               && (unroll_outer
 534                   || loop_outer (loop_outer (loop))))
 535             ul = UL_ALL;
 536           else
 537             ul = UL_NO_GROWTH;
 538           changed |= canonicalize_loop_induction_variables
 539                        (loop, false, ul, !flag_tree_loop_ivcanon);
 540         }
 541
 542       if (changed)
 543         {
 544           /* This will take care of removing completely unrolled loops
 545              from the loop structures so we can continue unrolling now
 546              innermost loops.  */
 547           if (cleanup_tree_cfg ())
 548             update_ssa (TODO_update_ssa_only_virtuals);
 549
 550           /* Clean up the information about numbers of iterations, since
 551              complete unrolling might have invalidated it.  */
 552           scev_reset ();
 553         }
 554     }
 555   while (changed
 556          && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
 557
 558   return 0;
 559 }