gcc/tree-ssa-loop-ivcanon.c

   1 /* Induction variable canonicalization.
   2    Copyright (C) 2004, 2005, 2007, 2008, 2010
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass detects the loops that iterate a constant number of times,
  22    adds a canonical induction variable (step -1, tested against 0)
  23    and replaces the exit test.  This enables the less powerful rtl
  24    level analysis to use this information.
  25
  26    This might spoil the code in some cases (by increasing register pressure).
  27    Note that in the case the new variable is not needed, ivopts will get rid
  28    of it, so it might only be a problem when there are no other linear induction
  29    variables.  In that case the created optimization possibilities are likely
  30    to pay up.
  31
  32    Additionally in case we detect that it is beneficial to unroll the
  33    loop completely, we do it right here to expose the optimization
  34    possibilities to the following passes.  */
  35
  36 #include "config.h"
  37 #include "system.h"
  38 #include "coretypes.h"
  39 #include "tm.h"
  40 #include "tree.h"
  41 #include "rtl.h"
  42 #include "tm_p.h"
  43 #include "hard-reg-set.h"
  44 #include "basic-block.h"
  45 #include "output.h"
  46 #include "diagnostic.h"
  47 #include "tree-flow.h"
  48 #include "tree-dump.h"
  49 #include "cfgloop.h"
  50 #include "tree-pass.h"
  51 #include "ggc.h"
  52 #include "tree-chrec.h"
  53 #include "tree-scalar-evolution.h"
  54 #include "params.h"
  55 #include "flags.h"
  56 #include "tree-inline.h"
  57 #include "target.h"
  58
  59 /* Specifies types of loops that may be unrolled.  */
  60
  61 enum unroll_level
  62 {
  63   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
  64                            iteration.  */
  65   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
  66                            of code size.  */
  67   UL_ALL                /* All suitable loops.  */
  68 };
  69
  70 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
  71    is the exit edge whose condition is replaced.  */
  72
  73 static void
  74 create_canonical_iv (struct loop *loop, edge exit, tree niter)
  75 {
  76   edge in;
  77   tree type, var;
  78   gimple cond;
  79   gimple_stmt_iterator incr_at;
  80   enum tree_code cmp;
  81
  82   if (dump_file && (dump_flags & TDF_DETAILS))
  83     {
  84       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
  85       print_generic_expr (dump_file, niter, TDF_SLIM);
  86       fprintf (dump_file, " iterations.\n");
  87     }
  88
  89   cond = last_stmt (exit->src);
  90   in = EDGE_SUCC (exit->src, 0);
  91   if (in == exit)
  92     in = EDGE_SUCC (exit->src, 1);
  93
  94   /* Note that we do not need to worry about overflows, since
  95      type of niter is always unsigned and all comparisons are
  96      just for equality/nonequality -- i.e. everything works
  97      with a modulo arithmetics.  */
  98
  99   type = TREE_TYPE (niter);
 100   niter = fold_build2 (PLUS_EXPR, type,
 101                        niter,
 102                        build_int_cst (type, 1));
 103   incr_at = gsi_last_bb (in->src);
 104   create_iv (niter,
 105              build_int_cst (type, -1),
 106              NULL_TREE, loop,
 107              &incr_at, false, NULL, &var);
 108
 109   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 110   gimple_cond_set_code (cond, cmp);
 111   gimple_cond_set_lhs (cond, var);
 112   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 113   update_stmt (cond);
 114 }
 115
 116 /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.  */
 117
 118 unsigned
 119 tree_num_loop_insns (struct loop *loop, eni_weights *weights)
 120 {
 121   basic_block *body = get_loop_body (loop);
 122   gimple_stmt_iterator gsi;
 123   unsigned size = 0, i;
 124
 125   for (i = 0; i < loop->num_nodes; i++)
 126     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 127       size += estimate_num_insns (gsi_stmt (gsi), weights);
 128   free (body);
 129
 130   return size;
 131 }
 132
 133 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 134 struct loop_size
 135 {
 136   /* Number of instructions in the loop.  */
 137   int overall;
 138
 139   /* Number of instructions that will be likely optimized out in
 140      peeled iterations of loop  (i.e. computation based on induction
 141      variable where induction variable starts at known constant.)  */
 142   int eliminated_by_peeling;
 143
 144   /* Same statistics for last iteration of loop: it is smaller because
 145      instructions after exit are not executed.  */
 146   int last_iteration;
 147   int last_iteration_eliminated_by_peeling;
 148 };
 149
 150 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 151
 152 static bool
 153 constant_after_peeling (tree op, gimple stmt, struct loop *loop)
 154 {
 155   affine_iv iv;
 156
 157   if (is_gimple_min_invariant (op))
 158     return true;
 159
 160   /* We can still fold accesses to constant arrays when index is known.  */
 161   if (TREE_CODE (op) != SSA_NAME)
 162     {
 163       tree base = op;
 164
 165       /* First make fast look if we see constant array inside.  */
 166       while (handled_component_p (base))
 167         base = TREE_OPERAND (base, 0);
 168       if ((DECL_P (base)
 169            && TREE_STATIC (base)
 170            && TREE_READONLY (base)
 171            && (DECL_INITIAL (base)
 172                || (!DECL_EXTERNAL (base)
 173                    && targetm.binds_local_p (base))))
 174           || CONSTANT_CLASS_P (base))
 175         {
 176           /* If so, see if we understand all the indices.  */
 177           base = op;
 178           while (handled_component_p (base))
 179             {
 180               if (TREE_CODE (base) == ARRAY_REF
 181                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
 182                 return false;
 183               base = TREE_OPERAND (base, 0);
 184             }
 185           return true;
 186         }
 187       return false;
 188     }
 189
 190   /* Induction variables are constants.  */
 191   if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
 192     return false;
 193   if (!is_gimple_min_invariant (iv.base))
 194     return false;
 195   if (!is_gimple_min_invariant (iv.step))
 196     return false;
 197   return true;
 198 }
 199
 200 /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.
 201    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.  */
 202
 203 static void
 204 tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size)
 205 {
 206   basic_block *body = get_loop_body (loop);
 207   gimple_stmt_iterator gsi;
 208   unsigned int i;
 209   bool after_exit;
 210
 211   size->overall = 0;
 212   size->eliminated_by_peeling = 0;
 213   size->last_iteration = 0;
 214   size->last_iteration_eliminated_by_peeling = 0;
 215
 216   if (dump_file && (dump_flags & TDF_DETAILS))
 217     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
 218   for (i = 0; i < loop->num_nodes; i++)
 219     {
 220       if (exit && body[i] != exit->src
 221           && dominated_by_p (CDI_DOMINATORS, body[i], exit->src))
 222         after_exit = true;
 223       else
 224         after_exit = false;
 225       if (dump_file && (dump_flags & TDF_DETAILS))
 226         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);
 227
 228       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 229         {
 230           gimple stmt = gsi_stmt (gsi);
 231           int num = estimate_num_insns (stmt, &eni_size_weights);
 232           bool likely_eliminated = false;
 233
 234           if (dump_file && (dump_flags & TDF_DETAILS))
 235             {
 236               fprintf (dump_file, "  size: %3i ", num);
 237               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
 238             }
 239
 240           /* Look for reasons why we might optimize this stmt away. */
 241
 242           /* Exit conditional.  */
 243           if (body[i] == exit->src && stmt == last_stmt (exit->src))
 244             {
 245               if (dump_file && (dump_flags & TDF_DETAILS))
 246                 fprintf (dump_file, "   Exit condition will be eliminated.\n");
 247               likely_eliminated = true;
 248             }
 249           /* Sets of IV variables  */
 250           else if (gimple_code (stmt) == GIMPLE_ASSIGN
 251               && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
 252             {
 253               if (dump_file && (dump_flags & TDF_DETAILS))
 254                 fprintf (dump_file, "   Induction variable computation will"
 255                          " be folded away.\n");
 256               likely_eliminated = true;
 257             }
 258           /* Assignments of IV variables.  */
 259           else if (gimple_code (stmt) == GIMPLE_ASSIGN
 260                    && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
 261                    && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt,loop)
 262                    && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
 263                        || constant_after_peeling (gimple_assign_rhs2 (stmt),
 264                                                   stmt, loop)))
 265             {
 266               if (dump_file && (dump_flags & TDF_DETAILS))
 267                 fprintf (dump_file, "   Constant expression will be folded away.\n");
 268               likely_eliminated = true;
 269             }
 270           /* Conditionals.  */
 271           else if (gimple_code (stmt) == GIMPLE_COND
 272                    && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 273                    && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
 274             {
 275               if (dump_file && (dump_flags & TDF_DETAILS))
 276                 fprintf (dump_file, "   Constant conditional.\n");
 277               likely_eliminated = true;
 278             }
 279
 280           size->overall += num;
 281           if (likely_eliminated)
 282             size->eliminated_by_peeling += num;
 283           if (!after_exit)
 284             {
 285               size->last_iteration += num;
 286               if (likely_eliminated)
 287                 size->last_iteration_eliminated_by_peeling += num;
 288             }
 289         }
 290     }
 291   if (dump_file && (dump_flags & TDF_DETAILS))
 292     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
 293              size->eliminated_by_peeling, size->last_iteration,
 294              size->last_iteration_eliminated_by_peeling);
 295
 296   free (body);
 297 }
 298
 299 /* Estimate number of insns of completely unrolled loop.
 300    It is (NUNROLL + 1) * size of loop body with taking into account
 301    the fact that in last copy everything after exit conditional
 302    is dead and that some instructions will be eliminated after
 303    peeling.
 304
 305    Loop body is likely going to simplify futher, this is difficult
 306    to guess, we just decrease the result by 1/3.  */
 307
 308 static unsigned HOST_WIDE_INT
 309 estimated_unrolled_size (struct loop_size *size,
 310                          unsigned HOST_WIDE_INT nunroll)
 311 {
 312   HOST_WIDE_INT unr_insns = ((nunroll)
 313                              * (HOST_WIDE_INT) (size->overall
 314                                                 - size->eliminated_by_peeling));
 315   if (!nunroll)
 316     unr_insns = 0;
 317   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
 318
 319   unr_insns = unr_insns * 2 / 3;
 320   if (unr_insns <= 0)
 321     unr_insns = 1;
 322
 323   return unr_insns;
 324 }
 325
 326 /* Tries to unroll LOOP completely, i.e. NITER times.
 327    UL determines which loops we are allowed to unroll.
 328    EXIT is the exit of the loop that should be eliminated.  */
 329
 330 static bool
 331 try_unroll_loop_completely (struct loop *loop,
 332                             edge exit, tree niter,
 333                             enum unroll_level ul)
 334 {
 335   unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
 336   gimple cond;
 337   struct loop_size size;
 338
 339   if (loop->inner)
 340     return false;
 341
 342   if (!host_integerp (niter, 1))
 343     return false;
 344   n_unroll = tree_low_cst (niter, 1);
 345
 346   max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 347   if (n_unroll > max_unroll)
 348     return false;
 349
 350   if (n_unroll)
 351     {
 352       if (ul == UL_SINGLE_ITER)
 353         return false;
 354
 355       tree_estimate_loop_size (loop, exit, &size);
 356       ninsns = size.overall;
 357
 358       unr_insns = estimated_unrolled_size (&size, n_unroll);
 359       if (dump_file && (dump_flags & TDF_DETAILS))
 360         {
 361           fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
 362           fprintf (dump_file, "  Estimated size after unrolling: %d\n",
 363                    (int) unr_insns);
 364         }
 365
 366       if (unr_insns > ninsns
 367           && (unr_insns
 368               > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)))
 369         {
 370           if (dump_file && (dump_flags & TDF_DETAILS))
 371             fprintf (dump_file, "Not unrolling loop %d "
 372                      "(--param max-completely-peeled-insns limit reached).\n",
 373                      loop->num);
 374           return false;
 375         }
 376
 377       if (ul == UL_NO_GROWTH
 378           && unr_insns > ninsns)
 379         {
 380           if (dump_file && (dump_flags & TDF_DETAILS))
 381             fprintf (dump_file, "Not unrolling loop %d.\n", loop->num);
 382           return false;
 383         }
 384     }
 385
 386   if (n_unroll)
 387     {
 388       sbitmap wont_exit;
 389       edge e;
 390       unsigned i;
 391       VEC (edge, heap) *to_remove = NULL;
 392
 393       initialize_original_copy_tables ();
 394       wont_exit = sbitmap_alloc (n_unroll + 1);
 395       sbitmap_ones (wont_exit);
 396       RESET_BIT (wont_exit, 0);
 397
 398       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 399                                                  n_unroll, wont_exit,
 400                                                  exit, &to_remove,
 401                                                  DLTHE_FLAG_UPDATE_FREQ
 402                                                  | DLTHE_FLAG_COMPLETTE_PEEL))
 403         {
 404           free_original_copy_tables ();
 405           free (wont_exit);
 406           return false;
 407         }
 408
 409       for (i = 0; VEC_iterate (edge, to_remove, i, e); i++)
 410         {
 411           bool ok = remove_path (e);
 412           gcc_assert (ok);
 413         }
 414
 415       VEC_free (edge, heap, to_remove);
 416       free (wont_exit);
 417       free_original_copy_tables ();
 418     }
 419
 420   cond = last_stmt (exit->src);
 421   if (exit->flags & EDGE_TRUE_VALUE)
 422     gimple_cond_make_true (cond);
 423   else
 424     gimple_cond_make_false (cond);
 425   update_stmt (cond);
 426   update_ssa (TODO_update_ssa);
 427
 428   if (dump_file && (dump_flags & TDF_DETAILS))
 429     fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num);
 430
 431   return true;
 432 }
 433
 434 /* Adds a canonical induction variable to LOOP if suitable.
 435    CREATE_IV is true if we may create a new iv.  UL determines
 436    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
 437    to determine the number of iterations of a loop by direct evaluation.
 438    Returns true if cfg is changed.  */
 439
 440 static bool
 441 canonicalize_loop_induction_variables (struct loop *loop,
 442                                        bool create_iv, enum unroll_level ul,
 443                                        bool try_eval)
 444 {
 445   edge exit = NULL;
 446   tree niter;
 447
 448   niter = number_of_latch_executions (loop);
 449   if (TREE_CODE (niter) == INTEGER_CST)
 450     {
 451       exit = single_exit (loop);
 452       if (!just_once_each_iteration_p (loop, exit->src))
 453         return false;
 454     }
 455   else
 456     {
 457       /* If the loop has more than one exit, try checking all of them
 458          for # of iterations determinable through scev.  */
 459       if (!single_exit (loop))
 460         niter = find_loop_niter (loop, &exit);
 461
 462       /* Finally if everything else fails, try brute force evaluation.  */
 463       if (try_eval
 464           && (chrec_contains_undetermined (niter)
 465               || TREE_CODE (niter) != INTEGER_CST))
 466         niter = find_loop_niter_by_eval (loop, &exit);
 467
 468       if (chrec_contains_undetermined (niter)
 469           || TREE_CODE (niter) != INTEGER_CST)
 470         return false;
 471     }
 472
 473   if (dump_file && (dump_flags & TDF_DETAILS))
 474     {
 475       fprintf (dump_file, "Loop %d iterates ", loop->num);
 476       print_generic_expr (dump_file, niter, TDF_SLIM);
 477       fprintf (dump_file, " times.\n");
 478     }
 479
 480   if (try_unroll_loop_completely (loop, exit, niter, ul))
 481     return true;
 482
 483   if (create_iv)
 484     create_canonical_iv (loop, exit, niter);
 485
 486   return false;
 487 }
 488
 489 /* The main entry point of the pass.  Adds canonical induction variables
 490    to the suitable loops.  */
 491
 492 unsigned int
 493 canonicalize_induction_variables (void)
 494 {
 495   loop_iterator li;
 496   struct loop *loop;
 497   bool changed = false;
 498
 499   FOR_EACH_LOOP (li, loop, 0)
 500     {
 501       changed |= canonicalize_loop_induction_variables (loop,
 502                                                         true, UL_SINGLE_ITER,
 503                                                         true);
 504     }
 505
 506   /* Clean up the information about numbers of iterations, since brute force
 507      evaluation could reveal new information.  */
 508   scev_reset ();
 509
 510   if (changed)
 511     return TODO_cleanup_cfg;
 512   return 0;
 513 }
 514
 515 /* Unroll LOOPS completely if they iterate just few times.  Unless
 516    MAY_INCREASE_SIZE is true, perform the unrolling only if the
 517    size of the code does not increase.  */
 518
 519 unsigned int
 520 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
 521 {
 522   loop_iterator li;
 523   struct loop *loop;
 524   bool changed;
 525   enum unroll_level ul;
 526   int iteration = 0;
 527
 528   do
 529     {
 530       changed = false;
 531
 532       FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
 533         {
 534           if (may_increase_size && optimize_loop_for_speed_p (loop)
 535               /* Unroll outermost loops only if asked to do so or they do
 536                  not cause code growth.  */
 537               && (unroll_outer
 538                   || loop_outer (loop_outer (loop))))
 539             ul = UL_ALL;
 540           else
 541             ul = UL_NO_GROWTH;
 542           changed |= canonicalize_loop_induction_variables
 543                        (loop, false, ul, !flag_tree_loop_ivcanon);
 544         }
 545
 546       if (changed)
 547         {
 548           /* This will take care of removing completely unrolled loops
 549              from the loop structures so we can continue unrolling now
 550              innermost loops.  */
 551           if (cleanup_tree_cfg ())
 552             update_ssa (TODO_update_ssa_only_virtuals);
 553
 554           /* Clean up the information about numbers of iterations, since
 555              complete unrolling might have invalidated it.  */
 556           scev_reset ();
 557         }
 558     }
 559   while (changed
 560          && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
 561
 562   return 0;
 563 }