gcc/loop-unroll.c

   1 /* Loop unrolling and peeling.
   2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 2, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "basic-block.h"
  29 #include "cfgloop.h"
  30 #include "cfglayout.h"
  31 #include "params.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "hashtab.h"
  35 #include "recog.h"
  36
  37 /* This pass performs loop unrolling and peeling.  We only perform these
  38    optimizations on innermost loops (with single exception) because
  39    the impact on performance is greatest here, and we want to avoid
  40    unnecessary code size growth.  The gain is caused by greater sequentiality
  41    of code, better code to optimize for further passes and in some cases
  42    by fewer testings of exit conditions.  The main problem is code growth,
  43    that impacts performance negatively due to effect of caches.
  44
  45    What we do:
  46
  47    -- complete peeling of once-rolling loops; this is the above mentioned
  48       exception, as this causes loop to be cancelled completely and
  49       does not cause code growth
  50    -- complete peeling of loops that roll (small) constant times.
  51    -- simple peeling of first iterations of loops that do not roll much
  52       (according to profile feedback)
  53    -- unrolling of loops that roll constant times; this is almost always
  54       win, as we get rid of exit condition tests.
  55    -- unrolling of loops that roll number of times that we can compute
  56       in runtime; we also get rid of exit condition tests here, but there
  57       is the extra expense for calculating the number of iterations
  58    -- simple unrolling of remaining loops; this is performed only if we
  59       are asked to, as the gain is questionable in this case and often
  60       it may even slow down the code
  61    For more detailed descriptions of each of those, see comments at
  62    appropriate function below.
  63
  64    There is a lot of parameters (defined and described in params.def) that
  65    control how much we unroll/peel.
  66
  67    ??? A great problem is that we don't have a good way how to determine
  68    how many times we should unroll the loop; the experiments I have made
  69    showed that this choice may affect performance in order of several %.
  70    */
  71
  72 /* Information about induction variables to split.  */
  73
  74 struct iv_to_split
  75 {
  76   rtx insn;             /* The insn in that the induction variable occurs.  */
  77   rtx base_var;         /* The variable on that the values in the further
  78                            iterations are based.  */
  79   rtx step;             /* Step of the induction variable.  */
  80   unsigned n_loc;
  81   unsigned loc[3];      /* Location where the definition of the induction
  82                            variable occurs in the insn.  For example if
  83                            N_LOC is 2, the expression is located at
  84                            XEXP (XEXP (single_set, loc[0]), loc[1]).  */
  85 };
  86
  87 /* Information about accumulators to expand.  */
  88
  89 struct var_to_expand
  90 {
  91   rtx insn;                        /* The insn in that the variable expansion occurs.  */
  92   rtx reg;                         /* The accumulator which is expanded.  */
  93   VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
  94   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  95                                       or multiplication.  */
  96   int expansion_count;             /* Count the number of expansions generated so far.  */
  97   int reuse_expansion;             /* The expansion we intend to reuse to expand
  98                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
  99                                       the original accumulator.  Else use
 100                                       var_expansions[REUSE_EXPANSION - 1].  */
 101 };
 102
 103 /* Information about optimization applied in
 104    the unrolled loop.  */
 105
 106 struct opt_info
 107 {
 108   htab_t insns_to_split;           /* A hashtable of insns to split.  */
 109   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
 110                                       to expand.  */
 111   unsigned first_new_block;        /* The first basic block that was
 112                                       duplicated.  */
 113   basic_block loop_exit;           /* The loop exit basic block.  */
 114   basic_block loop_preheader;      /* The loop preheader basic block.  */
 115 };
 116
 117 static void decide_unrolling_and_peeling (int);
 118 static void peel_loops_completely (int);
 119 static void decide_peel_simple (struct loop *, int);
 120 static void decide_peel_once_rolling (struct loop *, int);
 121 static void decide_peel_completely (struct loop *, int);
 122 static void decide_unroll_stupid (struct loop *, int);
 123 static void decide_unroll_constant_iterations (struct loop *, int);
 124 static void decide_unroll_runtime_iterations (struct loop *, int);
 125 static void peel_loop_simple (struct loop *);
 126 static void peel_loop_completely (struct loop *);
 127 static void unroll_loop_stupid (struct loop *);
 128 static void unroll_loop_constant_iterations (struct loop *);
 129 static void unroll_loop_runtime_iterations (struct loop *);
 130 static struct opt_info *analyze_insns_in_loop (struct loop *);
 131 static void opt_info_start_duplication (struct opt_info *);
 132 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 133 static void free_opt_info (struct opt_info *);
 134 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
 135 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
 136 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
 137 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
 138 static int insert_var_expansion_initialization (void **, void *);
 139 static int combine_var_copies_in_loop_exit (void **, void *);
 140 static int release_var_copies (void **, void *);
 141 static rtx get_expansion (struct var_to_expand *);
 142
 143 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 144 void
 145 unroll_and_peel_loops (int flags)
 146 {
 147   struct loop *loop;
 148   bool check;
 149   loop_iterator li;
 150
 151   /* First perform complete loop peeling (it is almost surely a win,
 152      and affects parameters for further decision a lot).  */
 153   peel_loops_completely (flags);
 154
 155   /* Now decide rest of unrolling and peeling.  */
 156   decide_unrolling_and_peeling (flags);
 157
 158   /* Scan the loops, inner ones first.  */
 159   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
 160     {
 161       check = true;
 162       /* And perform the appropriate transformations.  */
 163       switch (loop->lpt_decision.decision)
 164         {
 165         case LPT_PEEL_COMPLETELY:
 166           /* Already done.  */
 167           gcc_unreachable ();
 168         case LPT_PEEL_SIMPLE:
 169           peel_loop_simple (loop);
 170           break;
 171         case LPT_UNROLL_CONSTANT:
 172           unroll_loop_constant_iterations (loop);
 173           break;
 174         case LPT_UNROLL_RUNTIME:
 175           unroll_loop_runtime_iterations (loop);
 176           break;
 177         case LPT_UNROLL_STUPID:
 178           unroll_loop_stupid (loop);
 179           break;
 180         case LPT_NONE:
 181           check = false;
 182           break;
 183         default:
 184           gcc_unreachable ();
 185         }
 186       if (check)
 187         {
 188 #ifdef ENABLE_CHECKING
 189           verify_dominators (CDI_DOMINATORS);
 190           verify_loop_structure ();
 191 #endif
 192         }
 193     }
 194
 195   iv_analysis_done ();
 196 }
 197
 198 /* Check whether exit of the LOOP is at the end of loop body.  */
 199
 200 static bool
 201 loop_exit_at_end_p (struct loop *loop)
 202 {
 203   struct niter_desc *desc = get_simple_loop_desc (loop);
 204   rtx insn;
 205
 206   if (desc->in_edge->dest != loop->latch)
 207     return false;
 208
 209   /* Check that the latch is empty.  */
 210   FOR_BB_INSNS (loop->latch, insn)
 211     {
 212       if (INSN_P (insn))
 213         return false;
 214     }
 215
 216   return true;
 217 }
 218
 219 /* Depending on FLAGS, check whether to peel loops completely and do so.  */
 220 static void
 221 peel_loops_completely (int flags)
 222 {
 223   struct loop *loop;
 224   loop_iterator li;
 225
 226   /* Scan the loops, the inner ones first.  */
 227   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
 228     {
 229       loop->lpt_decision.decision = LPT_NONE;
 230
 231       if (dump_file)
 232         fprintf (dump_file,
 233                  "\n;; *** Considering loop %d for complete peeling ***\n",
 234                  loop->num);
 235
 236       loop->ninsns = num_loop_insns (loop);
 237
 238       decide_peel_once_rolling (loop, flags);
 239       if (loop->lpt_decision.decision == LPT_NONE)
 240         decide_peel_completely (loop, flags);
 241
 242       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
 243         {
 244           peel_loop_completely (loop);
 245 #ifdef ENABLE_CHECKING
 246           verify_dominators (CDI_DOMINATORS);
 247           verify_loop_structure ();
 248 #endif
 249         }
 250     }
 251 }
 252
 253 /* Decide whether unroll or peel loops (depending on FLAGS) and how much.  */
 254 static void
 255 decide_unrolling_and_peeling (int flags)
 256 {
 257   struct loop *loop;
 258   loop_iterator li;
 259
 260   /* Scan the loops, inner ones first.  */
 261   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
 262     {
 263       loop->lpt_decision.decision = LPT_NONE;
 264
 265       if (dump_file)
 266         fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
 267
 268       /* Do not peel cold areas.  */
 269       if (!maybe_hot_bb_p (loop->header))
 270         {
 271           if (dump_file)
 272             fprintf (dump_file, ";; Not considering loop, cold area\n");
 273           continue;
 274         }
 275
 276       /* Can the loop be manipulated?  */
 277       if (!can_duplicate_loop_p (loop))
 278         {
 279           if (dump_file)
 280             fprintf (dump_file,
 281                      ";; Not considering loop, cannot duplicate\n");
 282           continue;
 283         }
 284
 285       /* Skip non-innermost loops.  */
 286       if (loop->inner)
 287         {
 288           if (dump_file)
 289             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 290           continue;
 291         }
 292
 293       loop->ninsns = num_loop_insns (loop);
 294       loop->av_ninsns = average_num_loop_insns (loop);
 295
 296       /* Try transformations one by one in decreasing order of
 297          priority.  */
 298
 299       decide_unroll_constant_iterations (loop, flags);
 300       if (loop->lpt_decision.decision == LPT_NONE)
 301         decide_unroll_runtime_iterations (loop, flags);
 302       if (loop->lpt_decision.decision == LPT_NONE)
 303         decide_unroll_stupid (loop, flags);
 304       if (loop->lpt_decision.decision == LPT_NONE)
 305         decide_peel_simple (loop, flags);
 306     }
 307 }
 308
 309 /* Decide whether the LOOP is once rolling and suitable for complete
 310    peeling.  */
 311 static void
 312 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 313 {
 314   struct niter_desc *desc;
 315
 316   if (dump_file)
 317     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 318
 319   /* Is the loop small enough?  */
 320   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
 321     {
 322       if (dump_file)
 323         fprintf (dump_file, ";; Not considering loop, is too big\n");
 324       return;
 325     }
 326
 327   /* Check for simple loops.  */
 328   desc = get_simple_loop_desc (loop);
 329
 330   /* Check number of iterations.  */
 331   if (!desc->simple_p
 332       || desc->assumptions
 333       || desc->infinite
 334       || !desc->const_iter
 335       || desc->niter != 0)
 336     {
 337       if (dump_file)
 338         fprintf (dump_file,
 339                  ";; Unable to prove that the loop rolls exactly once\n");
 340       return;
 341     }
 342
 343   /* Success.  */
 344   if (dump_file)
 345     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
 346   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 347 }
 348
 349 /* Decide whether the LOOP is suitable for complete peeling.  */
 350 static void
 351 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 352 {
 353   unsigned npeel;
 354   struct niter_desc *desc;
 355
 356   if (dump_file)
 357     fprintf (dump_file, "\n;; Considering peeling completely\n");
 358
 359   /* Skip non-innermost loops.  */
 360   if (loop->inner)
 361     {
 362       if (dump_file)
 363         fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 364       return;
 365     }
 366
 367   /* Do not peel cold areas.  */
 368   if (!maybe_hot_bb_p (loop->header))
 369     {
 370       if (dump_file)
 371         fprintf (dump_file, ";; Not considering loop, cold area\n");
 372       return;
 373     }
 374
 375   /* Can the loop be manipulated?  */
 376   if (!can_duplicate_loop_p (loop))
 377     {
 378       if (dump_file)
 379         fprintf (dump_file,
 380                  ";; Not considering loop, cannot duplicate\n");
 381       return;
 382     }
 383
 384   /* npeel = number of iterations to peel.  */
 385   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
 386   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 387     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 388
 389   /* Is the loop small enough?  */
 390   if (!npeel)
 391     {
 392       if (dump_file)
 393         fprintf (dump_file, ";; Not considering loop, is too big\n");
 394       return;
 395     }
 396
 397   /* Check for simple loops.  */
 398   desc = get_simple_loop_desc (loop);
 399
 400   /* Check number of iterations.  */
 401   if (!desc->simple_p
 402       || desc->assumptions
 403       || !desc->const_iter
 404       || desc->infinite)
 405     {
 406       if (dump_file)
 407         fprintf (dump_file,
 408                  ";; Unable to prove that the loop iterates constant times\n");
 409       return;
 410     }
 411
 412   if (desc->niter > npeel - 1)
 413     {
 414       if (dump_file)
 415         {
 416           fprintf (dump_file,
 417                    ";; Not peeling loop completely, rolls too much (");
 418           fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
 419           fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
 420         }
 421       return;
 422     }
 423
 424   /* Success.  */
 425   if (dump_file)
 426     fprintf (dump_file, ";; Decided to peel loop completely\n");
 427   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 428 }
 429
 430 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
 431    completely.  The transformation done:
 432
 433    for (i = 0; i < 4; i++)
 434      body;
 435
 436    ==>
 437
 438    i = 0;
 439    body; i++;
 440    body; i++;
 441    body; i++;
 442    body; i++;
 443    */
 444 static void
 445 peel_loop_completely (struct loop *loop)
 446 {
 447   sbitmap wont_exit;
 448   unsigned HOST_WIDE_INT npeel;
 449   unsigned i;
 450   VEC (edge, heap) *remove_edges;
 451   edge ein;
 452   struct niter_desc *desc = get_simple_loop_desc (loop);
 453   struct opt_info *opt_info = NULL;
 454
 455   npeel = desc->niter;
 456
 457   if (npeel)
 458     {
 459       bool ok;
 460
 461       wont_exit = sbitmap_alloc (npeel + 1);
 462       sbitmap_ones (wont_exit);
 463       RESET_BIT (wont_exit, 0);
 464       if (desc->noloop_assumptions)
 465         RESET_BIT (wont_exit, 1);
 466
 467       remove_edges = NULL;
 468
 469       if (flag_split_ivs_in_unroller)
 470         opt_info = analyze_insns_in_loop (loop);
 471
 472       opt_info_start_duplication (opt_info);
 473       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 474                                           npeel,
 475                                           wont_exit, desc->out_edge,
 476                                           &remove_edges,
 477                                           DLTHE_FLAG_UPDATE_FREQ
 478                                           | DLTHE_FLAG_COMPLETTE_PEEL
 479                                           | (opt_info
 480                                              ? DLTHE_RECORD_COPY_NUMBER : 0));
 481       gcc_assert (ok);
 482
 483       free (wont_exit);
 484
 485       if (opt_info)
 486         {
 487           apply_opt_in_copies (opt_info, npeel, false, true);
 488           free_opt_info (opt_info);
 489         }
 490
 491       /* Remove the exit edges.  */
 492       for (i = 0; VEC_iterate (edge, remove_edges, i, ein); i++)
 493         remove_path (ein);
 494       VEC_free (edge, heap, remove_edges);
 495     }
 496
 497   ein = desc->in_edge;
 498   free_simple_loop_desc (loop);
 499
 500   /* Now remove the unreachable part of the last iteration and cancel
 501      the loop.  */
 502   remove_path (ein);
 503
 504   if (dump_file)
 505     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
 506 }
 507
 508 /* Decide whether to unroll LOOP iterating constant number of times
 509    and how much.  */
 510
 511 static void
 512 decide_unroll_constant_iterations (struct loop *loop, int flags)
 513 {
 514   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 515   struct niter_desc *desc;
 516
 517   if (!(flags & UAP_UNROLL))
 518     {
 519       /* We were not asked to, just return back silently.  */
 520       return;
 521     }
 522
 523   if (dump_file)
 524     fprintf (dump_file,
 525              "\n;; Considering unrolling loop with constant "
 526              "number of iterations\n");
 527
 528   /* nunroll = total number of copies of the original loop body in
 529      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 530   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 531   nunroll_by_av
 532     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 533   if (nunroll > nunroll_by_av)
 534     nunroll = nunroll_by_av;
 535   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 536     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 537
 538   /* Skip big loops.  */
 539   if (nunroll <= 1)
 540     {
 541       if (dump_file)
 542         fprintf (dump_file, ";; Not considering loop, is too big\n");
 543       return;
 544     }
 545
 546   /* Check for simple loops.  */
 547   desc = get_simple_loop_desc (loop);
 548
 549   /* Check number of iterations.  */
 550   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 551     {
 552       if (dump_file)
 553         fprintf (dump_file,
 554                  ";; Unable to prove that the loop iterates constant times\n");
 555       return;
 556     }
 557
 558   /* Check whether the loop rolls enough to consider.  */
 559   if (desc->niter < 2 * nunroll)
 560     {
 561       if (dump_file)
 562         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 563       return;
 564     }
 565
 566   /* Success; now compute number of iterations to unroll.  We alter
 567      nunroll so that as few as possible copies of loop body are
 568      necessary, while still not decreasing the number of unrollings
 569      too much (at most by 1).  */
 570   best_copies = 2 * nunroll + 10;
 571
 572   i = 2 * nunroll + 2;
 573   if (i - 1 >= desc->niter)
 574     i = desc->niter - 2;
 575
 576   for (; i >= nunroll - 1; i--)
 577     {
 578       unsigned exit_mod = desc->niter % (i + 1);
 579
 580       if (!loop_exit_at_end_p (loop))
 581         n_copies = exit_mod + i + 1;
 582       else if (exit_mod != (unsigned) i
 583                || desc->noloop_assumptions != NULL_RTX)
 584         n_copies = exit_mod + i + 2;
 585       else
 586         n_copies = i + 1;
 587
 588       if (n_copies < best_copies)
 589         {
 590           best_copies = n_copies;
 591           best_unroll = i;
 592         }
 593     }
 594
 595   if (dump_file)
 596     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
 597              best_unroll + 1, best_copies, nunroll);
 598
 599   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 600   loop->lpt_decision.times = best_unroll;
 601
 602   if (dump_file)
 603     fprintf (dump_file,
 604              ";; Decided to unroll the constant times rolling loop, %d times.\n",
 605              loop->lpt_decision.times);
 606 }
 607
 608 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
 609    times.  The transformation does this:
 610
 611    for (i = 0; i < 102; i++)
 612      body;
 613
 614    ==>
 615
 616    i = 0;
 617    body; i++;
 618    body; i++;
 619    while (i < 102)
 620      {
 621        body; i++;
 622        body; i++;
 623        body; i++;
 624        body; i++;
 625      }
 626   */
 627 static void
 628 unroll_loop_constant_iterations (struct loop *loop)
 629 {
 630   unsigned HOST_WIDE_INT niter;
 631   unsigned exit_mod;
 632   sbitmap wont_exit;
 633   unsigned i;
 634   VEC (edge, heap) *remove_edges;
 635   edge e;
 636   unsigned max_unroll = loop->lpt_decision.times;
 637   struct niter_desc *desc = get_simple_loop_desc (loop);
 638   bool exit_at_end = loop_exit_at_end_p (loop);
 639   struct opt_info *opt_info = NULL;
 640   bool ok;
 641
 642   niter = desc->niter;
 643
 644   /* Should not get here (such loop should be peeled instead).  */
 645   gcc_assert (niter > max_unroll + 1);
 646
 647   exit_mod = niter % (max_unroll + 1);
 648
 649   wont_exit = sbitmap_alloc (max_unroll + 1);
 650   sbitmap_ones (wont_exit);
 651
 652   remove_edges = NULL;
 653   if (flag_split_ivs_in_unroller
 654       || flag_variable_expansion_in_unroller)
 655     opt_info = analyze_insns_in_loop (loop);
 656
 657   if (!exit_at_end)
 658     {
 659       /* The exit is not at the end of the loop; leave exit test
 660          in the first copy, so that the loops that start with test
 661          of exit condition have continuous body after unrolling.  */
 662
 663       if (dump_file)
 664         fprintf (dump_file, ";; Condition on beginning of loop.\n");
 665
 666       /* Peel exit_mod iterations.  */
 667       RESET_BIT (wont_exit, 0);
 668       if (desc->noloop_assumptions)
 669         RESET_BIT (wont_exit, 1);
 670
 671       if (exit_mod)
 672         {
 673           opt_info_start_duplication (opt_info);
 674           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 675                                               exit_mod,
 676                                               wont_exit, desc->out_edge,
 677                                               &remove_edges,
 678                                               DLTHE_FLAG_UPDATE_FREQ
 679                                               | (opt_info && exit_mod > 1
 680                                                  ? DLTHE_RECORD_COPY_NUMBER
 681                                                    : 0));
 682           gcc_assert (ok);
 683
 684           if (opt_info && exit_mod > 1)
 685             apply_opt_in_copies (opt_info, exit_mod, false, false);
 686
 687           desc->noloop_assumptions = NULL_RTX;
 688           desc->niter -= exit_mod;
 689           desc->niter_max -= exit_mod;
 690         }
 691
 692       SET_BIT (wont_exit, 1);
 693     }
 694   else
 695     {
 696       /* Leave exit test in last copy, for the same reason as above if
 697          the loop tests the condition at the end of loop body.  */
 698
 699       if (dump_file)
 700         fprintf (dump_file, ";; Condition on end of loop.\n");
 701
 702       /* We know that niter >= max_unroll + 2; so we do not need to care of
 703          case when we would exit before reaching the loop.  So just peel
 704          exit_mod + 1 iterations.  */
 705       if (exit_mod != max_unroll
 706           || desc->noloop_assumptions)
 707         {
 708           RESET_BIT (wont_exit, 0);
 709           if (desc->noloop_assumptions)
 710             RESET_BIT (wont_exit, 1);
 711
 712           opt_info_start_duplication (opt_info);
 713           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 714                                               exit_mod + 1,
 715                                               wont_exit, desc->out_edge,
 716                                               &remove_edges,
 717                                               DLTHE_FLAG_UPDATE_FREQ
 718                                               | (opt_info && exit_mod > 0
 719                                                  ? DLTHE_RECORD_COPY_NUMBER
 720                                                    : 0));
 721           gcc_assert (ok);
 722
 723           if (opt_info && exit_mod > 0)
 724             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 725
 726           desc->niter -= exit_mod + 1;
 727           desc->niter_max -= exit_mod + 1;
 728           desc->noloop_assumptions = NULL_RTX;
 729
 730           SET_BIT (wont_exit, 0);
 731           SET_BIT (wont_exit, 1);
 732         }
 733
 734       RESET_BIT (wont_exit, max_unroll);
 735     }
 736
 737   /* Now unroll the loop.  */
 738
 739   opt_info_start_duplication (opt_info);
 740   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 741                                       max_unroll,
 742                                       wont_exit, desc->out_edge,
 743                                       &remove_edges,
 744                                       DLTHE_FLAG_UPDATE_FREQ
 745                                       | (opt_info
 746                                          ? DLTHE_RECORD_COPY_NUMBER
 747                                            : 0));
 748   gcc_assert (ok);
 749
 750   if (opt_info)
 751     {
 752       apply_opt_in_copies (opt_info, max_unroll, true, true);
 753       free_opt_info (opt_info);
 754     }
 755
 756   free (wont_exit);
 757
 758   if (exit_at_end)
 759     {
 760       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 761       /* Find a new in and out edge; they are in the last copy we have made.  */
 762
 763       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 764         {
 765           desc->out_edge = EDGE_SUCC (exit_block, 0);
 766           desc->in_edge = EDGE_SUCC (exit_block, 1);
 767         }
 768       else
 769         {
 770           desc->out_edge = EDGE_SUCC (exit_block, 1);
 771           desc->in_edge = EDGE_SUCC (exit_block, 0);
 772         }
 773     }
 774
 775   desc->niter /= max_unroll + 1;
 776   desc->niter_max /= max_unroll + 1;
 777   desc->niter_expr = GEN_INT (desc->niter);
 778
 779   /* Remove the edges.  */
 780   for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++)
 781     remove_path (e);
 782   VEC_free (edge, heap, remove_edges);
 783
 784   if (dump_file)
 785     fprintf (dump_file,
 786              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 787              max_unroll, num_loop_insns (loop));
 788 }
 789
 790 /* Decide whether to unroll LOOP iterating runtime computable number of times
 791    and how much.  */
 792 static void
 793 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 794 {
 795   unsigned nunroll, nunroll_by_av, i;
 796   struct niter_desc *desc;
 797
 798   if (!(flags & UAP_UNROLL))
 799     {
 800       /* We were not asked to, just return back silently.  */
 801       return;
 802     }
 803
 804   if (dump_file)
 805     fprintf (dump_file,
 806              "\n;; Considering unrolling loop with runtime "
 807              "computable number of iterations\n");
 808
 809   /* nunroll = total number of copies of the original loop body in
 810      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 811   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 812   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 813   if (nunroll > nunroll_by_av)
 814     nunroll = nunroll_by_av;
 815   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 816     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 817
 818   /* Skip big loops.  */
 819   if (nunroll <= 1)
 820     {
 821       if (dump_file)
 822         fprintf (dump_file, ";; Not considering loop, is too big\n");
 823       return;
 824     }
 825
 826   /* Check for simple loops.  */
 827   desc = get_simple_loop_desc (loop);
 828
 829   /* Check simpleness.  */
 830   if (!desc->simple_p || desc->assumptions)
 831     {
 832       if (dump_file)
 833         fprintf (dump_file,
 834                  ";; Unable to prove that the number of iterations "
 835                  "can be counted in runtime\n");
 836       return;
 837     }
 838
 839   if (desc->const_iter)
 840     {
 841       if (dump_file)
 842         fprintf (dump_file, ";; Loop iterates constant times\n");
 843       return;
 844     }
 845
 846   /* If we have profile feedback, check whether the loop rolls.  */
 847   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
 848     {
 849       if (dump_file)
 850         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 851       return;
 852     }
 853
 854   /* Success; now force nunroll to be power of 2, as we are unable to
 855      cope with overflows in computation of number of iterations.  */
 856   for (i = 1; 2 * i <= nunroll; i *= 2)
 857     continue;
 858
 859   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 860   loop->lpt_decision.times = i - 1;
 861
 862   if (dump_file)
 863     fprintf (dump_file,
 864              ";; Decided to unroll the runtime computable "
 865              "times rolling loop, %d times.\n",
 866              loop->lpt_decision.times);
 867 }
 868
 869 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
 870    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
 871    and NULL is returned instead.  */
 872
 873 basic_block
 874 split_edge_and_insert (edge e, rtx insns)
 875 {
 876   basic_block bb;
 877
 878   if (!insns)
 879     return NULL;
 880   bb = split_edge (e);
 881   emit_insn_after (insns, BB_END (bb));
 882
 883   /* ??? We used to assume that INSNS can contain control flow insns, and
 884      that we had to try to find sub basic blocks in BB to maintain a valid
 885      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
 886      and call break_superblocks when going out of cfglayout mode.  But it
 887      turns out that this never happens; and that if it does ever happen,
 888      the verify_flow_info call in loop_optimizer_finalize would fail.
 889
 890      There are two reasons why we expected we could have control flow insns
 891      in INSNS.  The first is when a comparison has to be done in parts, and
 892      the second is when the number of iterations is computed for loops with
 893      the number of iterations known at runtime.  In both cases, test cases
 894      to get control flow in INSNS appear to be impossible to construct:
 895
 896       * If do_compare_rtx_and_jump needs several branches to do comparison
 897         in a mode that needs comparison by parts, we cannot analyze the
 898         number of iterations of the loop, and we never get to unrolling it.
 899
 900       * The code in expand_divmod that was suspected to cause creation of
 901         branching code seems to be only accessed for signed division.  The
 902         divisions used by # of iterations analysis are always unsigned.
 903         Problems might arise on architectures that emits branching code
 904         for some operations that may appear in the unroller (especially
 905         for division), but we have no such architectures.
 906
 907      Considering all this, it was decided that we should for now assume
 908      that INSNS can in theory contain control flow insns, but in practice
 909      it never does.  So we don't handle the theoretical case, and should
 910      a real failure ever show up, we have a pretty good clue for how to
 911      fix it.  */
 912
 913   return bb;
 914 }
 915
 916 /* Unroll LOOP for that we are able to count number of iterations in runtime
 917    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
 918    extra care for case n < 0):
 919
 920    for (i = 0; i < n; i++)
 921      body;
 922
 923    ==>
 924
 925    i = 0;
 926    mod = n % 4;
 927
 928    switch (mod)
 929      {
 930        case 3:
 931          body; i++;
 932        case 2:
 933          body; i++;
 934        case 1:
 935          body; i++;
 936        case 0: ;
 937      }
 938
 939    while (i < n)
 940      {
 941        body; i++;
 942        body; i++;
 943        body; i++;
 944        body; i++;
 945      }
 946    */
 947 static void
 948 unroll_loop_runtime_iterations (struct loop *loop)
 949 {
 950   rtx old_niter, niter, init_code, branch_code, tmp;
 951   unsigned i, j, p;
 952   basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
 953   unsigned n_dom_bbs;
 954   sbitmap wont_exit;
 955   int may_exit_copy;
 956   unsigned n_peel;
 957   VEC (edge, heap) *remove_edges;
 958   edge e;
 959   bool extra_zero_check, last_may_exit;
 960   unsigned max_unroll = loop->lpt_decision.times;
 961   struct niter_desc *desc = get_simple_loop_desc (loop);
 962   bool exit_at_end = loop_exit_at_end_p (loop);
 963   struct opt_info *opt_info = NULL;
 964   bool ok;
 965
 966   if (flag_split_ivs_in_unroller
 967       || flag_variable_expansion_in_unroller)
 968     opt_info = analyze_insns_in_loop (loop);
 969
 970   /* Remember blocks whose dominators will have to be updated.  */
 971   dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
 972   n_dom_bbs = 0;
 973
 974   body = get_loop_body (loop);
 975   for (i = 0; i < loop->num_nodes; i++)
 976     {
 977       unsigned nldom;
 978       basic_block *ldom;
 979
 980       nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
 981       for (j = 0; j < nldom; j++)
 982         if (!flow_bb_inside_loop_p (loop, ldom[j]))
 983           dom_bbs[n_dom_bbs++] = ldom[j];
 984
 985       free (ldom);
 986     }
 987   free (body);
 988
 989   if (!exit_at_end)
 990     {
 991       /* Leave exit in first copy (for explanation why see comment in
 992          unroll_loop_constant_iterations).  */
 993       may_exit_copy = 0;
 994       n_peel = max_unroll - 1;
 995       extra_zero_check = true;
 996       last_may_exit = false;
 997     }
 998   else
 999     {
1000       /* Leave exit in last copy (for explanation why see comment in
1001          unroll_loop_constant_iterations).  */
1002       may_exit_copy = max_unroll;
1003       n_peel = max_unroll;
1004       extra_zero_check = false;
1005       last_may_exit = true;
1006     }
1007
1008   /* Get expression for number of iterations.  */
1009   start_sequence ();
1010   old_niter = niter = gen_reg_rtx (desc->mode);
1011   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
1012   if (tmp != niter)
1013     emit_move_insn (niter, tmp);
1014
1015   /* Count modulo by ANDing it with max_unroll; we use the fact that
1016      the number of unrollings is a power of two, and thus this is correct
1017      even if there is overflow in the computation.  */
1018   niter = expand_simple_binop (desc->mode, AND,
1019                                niter,
1020                                GEN_INT (max_unroll),
1021                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
1022
1023   init_code = get_insns ();
1024   end_sequence ();
1025
1026   /* Precondition the loop.  */
1027   split_edge_and_insert (loop_preheader_edge (loop), init_code);
1028
1029   remove_edges = NULL;
1030
1031   wont_exit = sbitmap_alloc (max_unroll + 2);
1032
1033   /* Peel the first copy of loop body (almost always we must leave exit test
1034      here; the only exception is when we have extra zero check and the number
1035      of iterations is reliable.  Also record the place of (possible) extra
1036      zero check.  */
1037   sbitmap_zero (wont_exit);
1038   if (extra_zero_check
1039       && !desc->noloop_assumptions)
1040     SET_BIT (wont_exit, 1);
1041   ezc_swtch = loop_preheader_edge (loop)->src;
1042   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1043                                       1, wont_exit, desc->out_edge,
1044                                       &remove_edges,
1045                                       DLTHE_FLAG_UPDATE_FREQ);
1046   gcc_assert (ok);
1047
1048   /* Record the place where switch will be built for preconditioning.  */
1049   swtch = split_edge (loop_preheader_edge (loop));
1050
1051   for (i = 0; i < n_peel; i++)
1052     {
1053       /* Peel the copy.  */
1054       sbitmap_zero (wont_exit);
1055       if (i != n_peel - 1 || !last_may_exit)
1056         SET_BIT (wont_exit, 1);
1057       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1058                                           1, wont_exit, desc->out_edge,
1059                                           &remove_edges,
1060                                           DLTHE_FLAG_UPDATE_FREQ);
1061       gcc_assert (ok);
1062
1063       /* Create item for switch.  */
1064       j = n_peel - i - (extra_zero_check ? 0 : 1);
1065       p = REG_BR_PROB_BASE / (i + 2);
1066
1067       preheader = split_edge (loop_preheader_edge (loop));
1068       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1069                                           block_label (preheader), p,
1070                                           NULL_RTX);
1071
1072       /* We rely on the fact that the compare and jump cannot be optimized out,
1073          and hence the cfg we create is correct.  */
1074       gcc_assert (branch_code != NULL_RTX);
1075
1076       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1077       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1078       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1079       e = make_edge (swtch, preheader,
1080                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1081       e->probability = p;
1082     }
1083
1084   if (extra_zero_check)
1085     {
1086       /* Add branch for zero iterations.  */
1087       p = REG_BR_PROB_BASE / (max_unroll + 1);
1088       swtch = ezc_swtch;
1089       preheader = split_edge (loop_preheader_edge (loop));
1090       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1091                                           block_label (preheader), p,
1092                                           NULL_RTX);
1093       gcc_assert (branch_code != NULL_RTX);
1094
1095       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1096       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1097       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1098       e = make_edge (swtch, preheader,
1099                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1100       e->probability = p;
1101     }
1102
1103   /* Recount dominators for outer blocks.  */
1104   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1105
1106   /* And unroll loop.  */
1107
1108   sbitmap_ones (wont_exit);
1109   RESET_BIT (wont_exit, may_exit_copy);
1110   opt_info_start_duplication (opt_info);
1111
1112   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1113                                       max_unroll,
1114                                       wont_exit, desc->out_edge,
1115                                       &remove_edges,
1116                                       DLTHE_FLAG_UPDATE_FREQ
1117                                       | (opt_info
1118                                          ? DLTHE_RECORD_COPY_NUMBER
1119                                            : 0));
1120   gcc_assert (ok);
1121
1122   if (opt_info)
1123     {
1124       apply_opt_in_copies (opt_info, max_unroll, true, true);
1125       free_opt_info (opt_info);
1126     }
1127
1128   free (wont_exit);
1129
1130   if (exit_at_end)
1131     {
1132       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1133       /* Find a new in and out edge; they are in the last copy we have
1134          made.  */
1135
1136       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1137         {
1138           desc->out_edge = EDGE_SUCC (exit_block, 0);
1139           desc->in_edge = EDGE_SUCC (exit_block, 1);
1140         }
1141       else
1142         {
1143           desc->out_edge = EDGE_SUCC (exit_block, 1);
1144           desc->in_edge = EDGE_SUCC (exit_block, 0);
1145         }
1146     }
1147
1148   /* Remove the edges.  */
1149   for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++)
1150     remove_path (e);
1151   VEC_free (edge, heap, remove_edges);
1152
1153   /* We must be careful when updating the number of iterations due to
1154      preconditioning and the fact that the value must be valid at entry
1155      of the loop.  After passing through the above code, we see that
1156      the correct new number of iterations is this:  */
1157   gcc_assert (!desc->const_iter);
1158   desc->niter_expr =
1159     simplify_gen_binary (UDIV, desc->mode, old_niter,
1160                          GEN_INT (max_unroll + 1));
1161   desc->niter_max /= max_unroll + 1;
1162   if (exit_at_end)
1163     {
1164       desc->niter_expr =
1165         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1166       desc->noloop_assumptions = NULL_RTX;
1167       desc->niter_max--;
1168     }
1169
1170   if (dump_file)
1171     fprintf (dump_file,
1172              ";; Unrolled loop %d times, counting # of iterations "
1173              "in runtime, %i insns\n",
1174              max_unroll, num_loop_insns (loop));
1175
1176   if (dom_bbs)
1177     free (dom_bbs);
1178 }
1179
1180 /* Decide whether to simply peel LOOP and how much.  */
1181 static void
1182 decide_peel_simple (struct loop *loop, int flags)
1183 {
1184   unsigned npeel;
1185   struct niter_desc *desc;
1186
1187   if (!(flags & UAP_PEEL))
1188     {
1189       /* We were not asked to, just return back silently.  */
1190       return;
1191     }
1192
1193   if (dump_file)
1194     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1195
1196   /* npeel = number of iterations to peel.  */
1197   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1198   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1199     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1200
1201   /* Skip big loops.  */
1202   if (!npeel)
1203     {
1204       if (dump_file)
1205         fprintf (dump_file, ";; Not considering loop, is too big\n");
1206       return;
1207     }
1208
1209   /* Check for simple loops.  */
1210   desc = get_simple_loop_desc (loop);
1211
1212   /* Check number of iterations.  */
1213   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1214     {
1215       if (dump_file)
1216         fprintf (dump_file, ";; Loop iterates constant times\n");
1217       return;
1218     }
1219
1220   /* Do not simply peel loops with branches inside -- it increases number
1221      of mispredicts.  */
1222   if (num_loop_branches (loop) > 1)
1223     {
1224       if (dump_file)
1225         fprintf (dump_file, ";; Not peeling, contains branches\n");
1226       return;
1227     }
1228
1229   if (loop->header->count)
1230     {
1231       unsigned niter = expected_loop_iterations (loop);
1232       if (niter + 1 > npeel)
1233         {
1234           if (dump_file)
1235             {
1236               fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1237               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1238                        (HOST_WIDEST_INT) (niter + 1));
1239               fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1240                        npeel);
1241             }
1242           return;
1243         }
1244       npeel = niter + 1;
1245     }
1246   else
1247     {
1248       /* For now we have no good heuristics to decide whether loop peeling
1249          will be effective, so disable it.  */
1250       if (dump_file)
1251         fprintf (dump_file,
1252                  ";; Not peeling loop, no evidence it will be profitable\n");
1253       return;
1254     }
1255
1256   /* Success.  */
1257   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1258   loop->lpt_decision.times = npeel;
1259
1260   if (dump_file)
1261     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1262              loop->lpt_decision.times);
1263 }
1264
1265 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1266    while (cond)
1267      body;
1268
1269    ==>
1270
1271    if (!cond) goto end;
1272    body;
1273    if (!cond) goto end;
1274    body;
1275    while (cond)
1276      body;
1277    end: ;
1278    */
1279 static void
1280 peel_loop_simple (struct loop *loop)
1281 {
1282   sbitmap wont_exit;
1283   unsigned npeel = loop->lpt_decision.times;
1284   struct niter_desc *desc = get_simple_loop_desc (loop);
1285   struct opt_info *opt_info = NULL;
1286   bool ok;
1287
1288   if (flag_split_ivs_in_unroller && npeel > 1)
1289     opt_info = analyze_insns_in_loop (loop);
1290
1291   wont_exit = sbitmap_alloc (npeel + 1);
1292   sbitmap_zero (wont_exit);
1293
1294   opt_info_start_duplication (opt_info);
1295
1296   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1297                                       npeel, wont_exit, NULL,
1298                                       NULL, DLTHE_FLAG_UPDATE_FREQ
1299                                       | (opt_info
1300                                          ? DLTHE_RECORD_COPY_NUMBER
1301                                            : 0));
1302   gcc_assert (ok);
1303
1304   free (wont_exit);
1305
1306   if (opt_info)
1307     {
1308       apply_opt_in_copies (opt_info, npeel, false, false);
1309       free_opt_info (opt_info);
1310     }
1311
1312   if (desc->simple_p)
1313     {
1314       if (desc->const_iter)
1315         {
1316           desc->niter -= npeel;
1317           desc->niter_expr = GEN_INT (desc->niter);
1318           desc->noloop_assumptions = NULL_RTX;
1319         }
1320       else
1321         {
1322           /* We cannot just update niter_expr, as its value might be clobbered
1323              inside loop.  We could handle this by counting the number into
1324              temporary just like we do in runtime unrolling, but it does not
1325              seem worthwhile.  */
1326           free_simple_loop_desc (loop);
1327         }
1328     }
1329   if (dump_file)
1330     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1331 }
1332
1333 /* Decide whether to unroll LOOP stupidly and how much.  */
1334 static void
1335 decide_unroll_stupid (struct loop *loop, int flags)
1336 {
1337   unsigned nunroll, nunroll_by_av, i;
1338   struct niter_desc *desc;
1339
1340   if (!(flags & UAP_UNROLL_ALL))
1341     {
1342       /* We were not asked to, just return back silently.  */
1343       return;
1344     }
1345
1346   if (dump_file)
1347     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1348
1349   /* nunroll = total number of copies of the original loop body in
1350      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1351   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1352   nunroll_by_av
1353     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1354   if (nunroll > nunroll_by_av)
1355     nunroll = nunroll_by_av;
1356   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1357     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1358
1359   /* Skip big loops.  */
1360   if (nunroll <= 1)
1361     {
1362       if (dump_file)
1363         fprintf (dump_file, ";; Not considering loop, is too big\n");
1364       return;
1365     }
1366
1367   /* Check for simple loops.  */
1368   desc = get_simple_loop_desc (loop);
1369
1370   /* Check simpleness.  */
1371   if (desc->simple_p && !desc->assumptions)
1372     {
1373       if (dump_file)
1374         fprintf (dump_file, ";; The loop is simple\n");
1375       return;
1376     }
1377
1378   /* Do not unroll loops with branches inside -- it increases number
1379      of mispredicts.  */
1380   if (num_loop_branches (loop) > 1)
1381     {
1382       if (dump_file)
1383         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1384       return;
1385     }
1386
1387   /* If we have profile feedback, check whether the loop rolls.  */
1388   if (loop->header->count
1389       && expected_loop_iterations (loop) < 2 * nunroll)
1390     {
1391       if (dump_file)
1392         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1393       return;
1394     }
1395
1396   /* Success.  Now force nunroll to be power of 2, as it seems that this
1397      improves results (partially because of better alignments, partially
1398      because of some dark magic).  */
1399   for (i = 1; 2 * i <= nunroll; i *= 2)
1400     continue;
1401
1402   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1403   loop->lpt_decision.times = i - 1;
1404
1405   if (dump_file)
1406     fprintf (dump_file,
1407              ";; Decided to unroll the loop stupidly, %d times.\n",
1408              loop->lpt_decision.times);
1409 }
1410
1411 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1412    while (cond)
1413      body;
1414
1415    ==>
1416
1417    while (cond)
1418      {
1419        body;
1420        if (!cond) break;
1421        body;
1422        if (!cond) break;
1423        body;
1424        if (!cond) break;
1425        body;
1426      }
1427    */
1428 static void
1429 unroll_loop_stupid (struct loop *loop)
1430 {
1431   sbitmap wont_exit;
1432   unsigned nunroll = loop->lpt_decision.times;
1433   struct niter_desc *desc = get_simple_loop_desc (loop);
1434   struct opt_info *opt_info = NULL;
1435   bool ok;
1436
1437   if (flag_split_ivs_in_unroller
1438       || flag_variable_expansion_in_unroller)
1439     opt_info = analyze_insns_in_loop (loop);
1440
1441
1442   wont_exit = sbitmap_alloc (nunroll + 1);
1443   sbitmap_zero (wont_exit);
1444   opt_info_start_duplication (opt_info);
1445
1446   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1447                                       nunroll, wont_exit,
1448                                       NULL, NULL,
1449                                       DLTHE_FLAG_UPDATE_FREQ
1450                                       | (opt_info
1451                                          ? DLTHE_RECORD_COPY_NUMBER
1452                                            : 0));
1453   gcc_assert (ok);
1454
1455   if (opt_info)
1456     {
1457       apply_opt_in_copies (opt_info, nunroll, true, true);
1458       free_opt_info (opt_info);
1459     }
1460
1461   free (wont_exit);
1462
1463   if (desc->simple_p)
1464     {
1465       /* We indeed may get here provided that there are nontrivial assumptions
1466          for a loop to be really simple.  We could update the counts, but the
1467          problem is that we are unable to decide which exit will be taken
1468          (not really true in case the number of iterations is constant,
1469          but noone will do anything with this information, so we do not
1470          worry about it).  */
1471       desc->simple_p = false;
1472     }
1473
1474   if (dump_file)
1475     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1476              nunroll, num_loop_insns (loop));
1477 }
1478
1479 /* A hash function for information about insns to split.  */
1480
1481 static hashval_t
1482 si_info_hash (const void *ivts)
1483 {
1484   return (hashval_t) INSN_UID (((struct iv_to_split *) ivts)->insn);
1485 }
1486
1487 /* An equality functions for information about insns to split.  */
1488
1489 static int
1490 si_info_eq (const void *ivts1, const void *ivts2)
1491 {
1492   const struct iv_to_split *i1 = ivts1;
1493   const struct iv_to_split *i2 = ivts2;
1494
1495   return i1->insn == i2->insn;
1496 }
1497
1498 /* Return a hash for VES, which is really a "var_to_expand *".  */
1499
1500 static hashval_t
1501 ve_info_hash (const void *ves)
1502 {
1503   return (hashval_t) INSN_UID (((struct var_to_expand *) ves)->insn);
1504 }
1505
1506 /* Return true if IVTS1 and IVTS2 (which are really both of type
1507    "var_to_expand *") refer to the same instruction.  */
1508
1509 static int
1510 ve_info_eq (const void *ivts1, const void *ivts2)
1511 {
1512   const struct var_to_expand *i1 = ivts1;
1513   const struct var_to_expand *i2 = ivts2;
1514
1515   return i1->insn == i2->insn;
1516 }
1517
1518 /* Returns true if REG is referenced in one insn in LOOP.  */
1519
1520 bool
1521 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1522 {
1523   basic_block *body, bb;
1524   unsigned i;
1525   int count_ref = 0;
1526   rtx insn;
1527
1528   body = get_loop_body (loop);
1529   for (i = 0; i < loop->num_nodes; i++)
1530     {
1531       bb = body[i];
1532
1533       FOR_BB_INSNS (bb, insn)
1534       {
1535         if (rtx_referenced_p (reg, insn))
1536           count_ref++;
1537       }
1538     }
1539   return (count_ref  == 1);
1540 }
1541
1542 /* Determine whether INSN contains an accumulator
1543    which can be expanded into separate copies,
1544    one for each copy of the LOOP body.
1545
1546    for (i = 0 ; i < n; i++)
1547      sum += a[i];
1548
1549    ==>
1550
1551    sum += a[i]
1552    ....
1553    i = i+1;
1554    sum1 += a[i]
1555    ....
1556    i = i+1
1557    sum2 += a[i];
1558    ....
1559
1560    Return NULL if INSN contains no opportunity for expansion of accumulator.
1561    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1562    information and return a pointer to it.
1563 */
1564
1565 static struct var_to_expand *
1566 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1567 {
1568   rtx set, dest, src, op1;
1569   struct var_to_expand *ves;
1570   enum machine_mode mode1, mode2;
1571
1572   set = single_set (insn);
1573   if (!set)
1574     return NULL;
1575
1576   dest = SET_DEST (set);
1577   src = SET_SRC (set);
1578
1579   if (GET_CODE (src) != PLUS
1580       && GET_CODE (src) != MINUS
1581       && GET_CODE (src) != MULT)
1582     return NULL;
1583
1584   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1585      in MD.  But if there is no optab to generate the insn, we can not
1586      perform the variable expansion.  This can happen if an MD provides
1587      an insn but not a named pattern to generate it, for example to avoid
1588      producing code that needs additional mode switches like for x87/mmx.
1589
1590      So we check have_insn_for which looks for an optab for the operation
1591      in SRC.  If it doesn't exist, we can't perform the expansion even
1592      though INSN is valid.  */
1593   if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
1594     return NULL;
1595
1596   if (!XEXP (src, 0))
1597     return NULL;
1598
1599   op1 = XEXP (src, 0);
1600
1601   if (!REG_P (dest)
1602       && !(GET_CODE (dest) == SUBREG
1603            && REG_P (SUBREG_REG (dest))))
1604     return NULL;
1605
1606   if (!rtx_equal_p (dest, op1))
1607     return NULL;
1608
1609   if (!referenced_in_one_insn_in_loop_p (loop, dest))
1610     return NULL;
1611
1612   if (rtx_referenced_p (dest, XEXP (src, 1)))
1613     return NULL;
1614
1615   mode1 = GET_MODE (dest);
1616   mode2 = GET_MODE (XEXP (src, 1));
1617   if ((FLOAT_MODE_P (mode1)
1618        || FLOAT_MODE_P (mode2))
1619       && !flag_unsafe_math_optimizations)
1620     return NULL;
1621
1622   if (dump_file)
1623   {
1624     fprintf (dump_file,
1625     "\n;; Expanding Accumulator ");
1626     print_rtl (dump_file, dest);
1627     fprintf (dump_file, "\n");
1628   }
1629
1630   /* Record the accumulator to expand.  */
1631   ves = XNEW (struct var_to_expand);
1632   ves->insn = insn;
1633   ves->var_expansions = VEC_alloc (rtx, heap, 1);
1634   ves->reg = copy_rtx (dest);
1635   ves->op = GET_CODE (src);
1636   ves->expansion_count = 0;
1637   ves->reuse_expansion = 0;
1638   return ves;
1639 }
1640
1641 /* Determine whether there is an induction variable in INSN that
1642    we would like to split during unrolling.
1643
1644    I.e. replace
1645
1646    i = i + 1;
1647    ...
1648    i = i + 1;
1649    ...
1650    i = i + 1;
1651    ...
1652
1653    type chains by
1654
1655    i0 = i + 1
1656    ...
1657    i = i0 + 1
1658    ...
1659    i = i0 + 2
1660    ...
1661
1662    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1663    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1664    pointer to it.  */
1665
1666 static struct iv_to_split *
1667 analyze_iv_to_split_insn (rtx insn)
1668 {
1669   rtx set, dest;
1670   struct rtx_iv iv;
1671   struct iv_to_split *ivts;
1672   bool ok;
1673
1674   /* For now we just split the basic induction variables.  Later this may be
1675      extended for example by selecting also addresses of memory references.  */
1676   set = single_set (insn);
1677   if (!set)
1678     return NULL;
1679
1680   dest = SET_DEST (set);
1681   if (!REG_P (dest))
1682     return NULL;
1683
1684   if (!biv_p (insn, dest))
1685     return NULL;
1686
1687   ok = iv_analyze_result (insn, dest, &iv);
1688
1689   /* This used to be an assert under the assumption that if biv_p returns
1690      true that iv_analyze_result must also return true.  However, that
1691      assumption is not strictly correct as evidenced by pr25569.
1692
1693      Returning NULL when iv_analyze_result returns false is safe and
1694      avoids the problems in pr25569 until the iv_analyze_* routines
1695      can be fixed, which is apparently hard and time consuming
1696      according to their author.  */
1697   if (! ok)
1698     return NULL;
1699
1700   if (iv.step == const0_rtx
1701       || iv.mode != iv.extend_mode)
1702     return NULL;
1703
1704   /* Record the insn to split.  */
1705   ivts = XNEW (struct iv_to_split);
1706   ivts->insn = insn;
1707   ivts->base_var = NULL_RTX;
1708   ivts->step = iv.step;
1709   ivts->n_loc = 1;
1710   ivts->loc[0] = 1;
1711
1712   return ivts;
1713 }
1714
1715 /* Determines which of insns in LOOP can be optimized.
1716    Return a OPT_INFO struct with the relevant hash tables filled
1717    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1718    is undefined for the return value.  */
1719
1720 static struct opt_info *
1721 analyze_insns_in_loop (struct loop *loop)
1722 {
1723   basic_block *body, bb;
1724   unsigned i;
1725   struct opt_info *opt_info = XCNEW (struct opt_info);
1726   rtx insn;
1727   struct iv_to_split *ivts = NULL;
1728   struct var_to_expand *ves = NULL;
1729   PTR *slot1;
1730   PTR *slot2;
1731   VEC (edge, heap) *edges = get_loop_exit_edges (loop);
1732   edge exit;
1733   bool can_apply = false;
1734
1735   iv_analysis_loop_init (loop);
1736
1737   body = get_loop_body (loop);
1738
1739   if (flag_split_ivs_in_unroller)
1740     opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1741                                             si_info_hash, si_info_eq, free);
1742
1743   /* Record the loop exit bb and loop preheader before the unrolling.  */
1744   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1745
1746   if (VEC_length (edge, edges) == 1)
1747     {
1748       exit = VEC_index (edge, edges, 0);
1749       if (!(exit->flags & EDGE_COMPLEX))
1750         {
1751           opt_info->loop_exit = split_edge (exit);
1752           can_apply = true;
1753         }
1754     }
1755
1756   if (flag_variable_expansion_in_unroller
1757       && can_apply)
1758     opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1759                                                       ve_info_hash, ve_info_eq, free);
1760
1761   for (i = 0; i < loop->num_nodes; i++)
1762     {
1763       bb = body[i];
1764       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1765         continue;
1766
1767       FOR_BB_INSNS (bb, insn)
1768       {
1769         if (!INSN_P (insn))
1770           continue;
1771
1772         if (opt_info->insns_to_split)
1773           ivts = analyze_iv_to_split_insn (insn);
1774
1775         if (ivts)
1776           {
1777             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1778             *slot1 = ivts;
1779             continue;
1780           }
1781
1782         if (opt_info->insns_with_var_to_expand)
1783           ves = analyze_insn_to_expand_var (loop, insn);
1784
1785         if (ves)
1786           {
1787             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1788             *slot2 = ves;
1789           }
1790       }
1791     }
1792
1793   VEC_free (edge, heap, edges);
1794   free (body);
1795   return opt_info;
1796 }
1797
1798 /* Called just before loop duplication.  Records start of duplicated area
1799    to OPT_INFO.  */
1800
1801 static void
1802 opt_info_start_duplication (struct opt_info *opt_info)
1803 {
1804   if (opt_info)
1805     opt_info->first_new_block = last_basic_block;
1806 }
1807
1808 /* Determine the number of iterations between initialization of the base
1809    variable and the current copy (N_COPY).  N_COPIES is the total number
1810    of newly created copies.  UNROLLING is true if we are unrolling
1811    (not peeling) the loop.  */
1812
1813 static unsigned
1814 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1815 {
1816   if (unrolling)
1817     {
1818       /* If we are unrolling, initialization is done in the original loop
1819          body (number 0).  */
1820       return n_copy;
1821     }
1822   else
1823     {
1824       /* If we are peeling, the copy in that the initialization occurs has
1825          number 1.  The original loop (number 0) is the last.  */
1826       if (n_copy)
1827         return n_copy - 1;
1828       else
1829         return n_copies;
1830     }
1831 }
1832
1833 /* Locate in EXPR the expression corresponding to the location recorded
1834    in IVTS, and return a pointer to the RTX for this location.  */
1835
1836 static rtx *
1837 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1838 {
1839   unsigned i;
1840   rtx *ret = &expr;
1841
1842   for (i = 0; i < ivts->n_loc; i++)
1843     ret = &XEXP (*ret, ivts->loc[i]);
1844
1845   return ret;
1846 }
1847
1848 /* Allocate basic variable for the induction variable chain.  Callback for
1849    htab_traverse.  */
1850
1851 static int
1852 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1853 {
1854   struct iv_to_split *ivts = *slot;
1855   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1856
1857   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1858
1859   return 1;
1860 }
1861
1862 /* Insert initialization of basic variable of IVTS before INSN, taking
1863    the initial value from INSN.  */
1864
1865 static void
1866 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1867 {
1868   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1869   rtx seq;
1870
1871   start_sequence ();
1872   expr = force_operand (expr, ivts->base_var);
1873   if (expr != ivts->base_var)
1874     emit_move_insn (ivts->base_var, expr);
1875   seq = get_insns ();
1876   end_sequence ();
1877
1878   emit_insn_before (seq, insn);
1879 }
1880
1881 /* Replace the use of induction variable described in IVTS in INSN
1882    by base variable + DELTA * step.  */
1883
1884 static void
1885 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1886 {
1887   rtx expr, *loc, seq, incr, var;
1888   enum machine_mode mode = GET_MODE (ivts->base_var);
1889   rtx src, dest, set;
1890
1891   /* Construct base + DELTA * step.  */
1892   if (!delta)
1893     expr = ivts->base_var;
1894   else
1895     {
1896       incr = simplify_gen_binary (MULT, mode,
1897                                   ivts->step, gen_int_mode (delta, mode));
1898       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1899                                   ivts->base_var, incr);
1900     }
1901
1902   /* Figure out where to do the replacement.  */
1903   loc = get_ivts_expr (single_set (insn), ivts);
1904
1905   /* If we can make the replacement right away, we're done.  */
1906   if (validate_change (insn, loc, expr, 0))
1907     return;
1908
1909   /* Otherwise, force EXPR into a register and try again.  */
1910   start_sequence ();
1911   var = gen_reg_rtx (mode);
1912   expr = force_operand (expr, var);
1913   if (expr != var)
1914     emit_move_insn (var, expr);
1915   seq = get_insns ();
1916   end_sequence ();
1917   emit_insn_before (seq, insn);
1918
1919   if (validate_change (insn, loc, var, 0))
1920     return;
1921
1922   /* The last chance.  Try recreating the assignment in insn
1923      completely from scratch.  */
1924   set = single_set (insn);
1925   gcc_assert (set);
1926
1927   start_sequence ();
1928   *loc = var;
1929   src = copy_rtx (SET_SRC (set));
1930   dest = copy_rtx (SET_DEST (set));
1931   src = force_operand (src, dest);
1932   if (src != dest)
1933     emit_move_insn (dest, src);
1934   seq = get_insns ();
1935   end_sequence ();
1936
1937   emit_insn_before (seq, insn);
1938   delete_insn (insn);
1939 }
1940
1941
1942 /* Return one expansion of the accumulator recorded in struct VE.  */
1943
1944 static rtx
1945 get_expansion (struct var_to_expand *ve)
1946 {
1947   rtx reg;
1948
1949   if (ve->reuse_expansion == 0)
1950     reg = ve->reg;
1951   else
1952     reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1953
1954   if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1955     ve->reuse_expansion = 0;
1956   else
1957     ve->reuse_expansion++;
1958
1959   return reg;
1960 }
1961
1962
1963 /* Given INSN replace the uses of the accumulator recorded in VE
1964    with a new register.  */
1965
1966 static void
1967 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1968 {
1969   rtx new_reg, set;
1970   bool really_new_expansion = false;
1971
1972   set = single_set (insn);
1973   gcc_assert (set);
1974
1975   /* Generate a new register only if the expansion limit has not been
1976      reached.  Else reuse an already existing expansion.  */
1977   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1978     {
1979       really_new_expansion = true;
1980       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1981     }
1982   else
1983     new_reg = get_expansion (ve);
1984
1985   validate_change (insn, &SET_DEST (set), new_reg, 1);
1986   validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1987
1988   if (apply_change_group ())
1989     if (really_new_expansion)
1990       {
1991         VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
1992         ve->expansion_count++;
1993       }
1994 }
1995
1996 /* Initialize the variable expansions in loop preheader.
1997    Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1998    basic block where the initialization of the expansions
1999    should take place.  */
2000
2001 static int
2002 insert_var_expansion_initialization (void **slot, void *place_p)
2003 {
2004   struct var_to_expand *ve = *slot;
2005   basic_block place = (basic_block)place_p;
2006   rtx seq, var, zero_init, insn;
2007   unsigned i;
2008
2009   if (VEC_length (rtx, ve->var_expansions) == 0)
2010     return 1;
2011
2012   start_sequence ();
2013   if (ve->op == PLUS || ve->op == MINUS)
2014     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2015       {
2016         zero_init =  CONST0_RTX (GET_MODE (var));
2017         emit_move_insn (var, zero_init);
2018       }
2019   else if (ve->op == MULT)
2020     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2021       {
2022         zero_init =  CONST1_RTX (GET_MODE (var));
2023         emit_move_insn (var, zero_init);
2024       }
2025
2026   seq = get_insns ();
2027   end_sequence ();
2028
2029   insn = BB_HEAD (place);
2030   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2031     insn = NEXT_INSN (insn);
2032
2033   emit_insn_after (seq, insn);
2034   /* Continue traversing the hash table.  */
2035   return 1;
2036 }
2037
2038 /*  Combine the variable expansions at the loop exit.
2039     Callbacks for htab_traverse.  PLACE_P is the loop exit
2040     basic block where the summation of the expansions should
2041     take place.  */
2042
2043 static int
2044 combine_var_copies_in_loop_exit (void **slot, void *place_p)
2045 {
2046   struct var_to_expand *ve = *slot;
2047   basic_block place = (basic_block)place_p;
2048   rtx sum = ve->reg;
2049   rtx expr, seq, var, insn;
2050   unsigned i;
2051
2052   if (VEC_length (rtx, ve->var_expansions) == 0)
2053     return 1;
2054
2055   start_sequence ();
2056   if (ve->op == PLUS || ve->op == MINUS)
2057     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2058       {
2059         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2060                                    var, sum);
2061       }
2062   else if (ve->op == MULT)
2063     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2064       {
2065         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2066                                    var, sum);
2067       }
2068
2069   expr = force_operand (sum, ve->reg);
2070   if (expr != ve->reg)
2071     emit_move_insn (ve->reg, expr);
2072   seq = get_insns ();
2073   end_sequence ();
2074
2075   insn = BB_HEAD (place);
2076   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2077     insn = NEXT_INSN (insn);
2078
2079   emit_insn_after (seq, insn);
2080
2081   /* Continue traversing the hash table.  */
2082   return 1;
2083 }
2084
2085 /* Apply loop optimizations in loop copies using the
2086    data which gathered during the unrolling.  Structure
2087    OPT_INFO record that data.
2088
2089    UNROLLING is true if we unrolled (not peeled) the loop.
2090    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2091    the loop (as it should happen in complete unrolling, but not in ordinary
2092    peeling of the loop).  */
2093
2094 static void
2095 apply_opt_in_copies (struct opt_info *opt_info,
2096                      unsigned n_copies, bool unrolling,
2097                      bool rewrite_original_loop)
2098 {
2099   unsigned i, delta;
2100   basic_block bb, orig_bb;
2101   rtx insn, orig_insn, next;
2102   struct iv_to_split ivts_templ, *ivts;
2103   struct var_to_expand ve_templ, *ves;
2104
2105   /* Sanity check -- we need to put initialization in the original loop
2106      body.  */
2107   gcc_assert (!unrolling || rewrite_original_loop);
2108
2109   /* Allocate the basic variables (i0).  */
2110   if (opt_info->insns_to_split)
2111     htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2112
2113   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2114     {
2115       bb = BASIC_BLOCK (i);
2116       orig_bb = get_bb_original (bb);
2117
2118       /* bb->aux holds position in copy sequence initialized by
2119          duplicate_loop_to_header_edge.  */
2120       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2121                                         unrolling);
2122       bb->aux = 0;
2123       orig_insn = BB_HEAD (orig_bb);
2124       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2125         {
2126           next = NEXT_INSN (insn);
2127           if (!INSN_P (insn))
2128             continue;
2129
2130           while (!INSN_P (orig_insn))
2131             orig_insn = NEXT_INSN (orig_insn);
2132
2133           ivts_templ.insn = orig_insn;
2134           ve_templ.insn = orig_insn;
2135
2136           /* Apply splitting iv optimization.  */
2137           if (opt_info->insns_to_split)
2138             {
2139               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2140
2141               if (ivts)
2142                 {
2143                   gcc_assert (GET_CODE (PATTERN (insn))
2144                               == GET_CODE (PATTERN (orig_insn)));
2145
2146                   if (!delta)
2147                     insert_base_initialization (ivts, insn);
2148                   split_iv (ivts, insn, delta);
2149                 }
2150             }
2151           /* Apply variable expansion optimization.  */
2152           if (unrolling && opt_info->insns_with_var_to_expand)
2153             {
2154               ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2155               if (ves)
2156                 {
2157                   gcc_assert (GET_CODE (PATTERN (insn))
2158                               == GET_CODE (PATTERN (orig_insn)));
2159                   expand_var_during_unrolling (ves, insn);
2160                 }
2161             }
2162           orig_insn = NEXT_INSN (orig_insn);
2163         }
2164     }
2165
2166   if (!rewrite_original_loop)
2167     return;
2168
2169   /* Initialize the variable expansions in the loop preheader
2170      and take care of combining them at the loop exit.  */
2171   if (opt_info->insns_with_var_to_expand)
2172     {
2173       htab_traverse (opt_info->insns_with_var_to_expand,
2174                      insert_var_expansion_initialization,
2175                      opt_info->loop_preheader);
2176       htab_traverse (opt_info->insns_with_var_to_expand,
2177                      combine_var_copies_in_loop_exit,
2178                      opt_info->loop_exit);
2179     }
2180
2181   /* Rewrite also the original loop body.  Find them as originals of the blocks
2182      in the last copied iteration, i.e. those that have
2183      get_bb_copy (get_bb_original (bb)) == bb.  */
2184   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2185     {
2186       bb = BASIC_BLOCK (i);
2187       orig_bb = get_bb_original (bb);
2188       if (get_bb_copy (orig_bb) != bb)
2189         continue;
2190
2191       delta = determine_split_iv_delta (0, n_copies, unrolling);
2192       for (orig_insn = BB_HEAD (orig_bb);
2193            orig_insn != NEXT_INSN (BB_END (bb));
2194            orig_insn = next)
2195         {
2196           next = NEXT_INSN (orig_insn);
2197
2198           if (!INSN_P (orig_insn))
2199             continue;
2200
2201           ivts_templ.insn = orig_insn;
2202           if (opt_info->insns_to_split)
2203             {
2204               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2205               if (ivts)
2206                 {
2207                   if (!delta)
2208                     insert_base_initialization (ivts, orig_insn);
2209                   split_iv (ivts, orig_insn, delta);
2210                   continue;
2211                 }
2212             }
2213
2214         }
2215     }
2216 }
2217
2218 /*  Release the data structures used for the variable expansion
2219     optimization.  Callbacks for htab_traverse.  */
2220
2221 static int
2222 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2223 {
2224   struct var_to_expand *ve = *slot;
2225
2226   VEC_free (rtx, heap, ve->var_expansions);
2227
2228   /* Continue traversing the hash table.  */
2229   return 1;
2230 }
2231
2232 /* Release OPT_INFO.  */
2233
2234 static void
2235 free_opt_info (struct opt_info *opt_info)
2236 {
2237   if (opt_info->insns_to_split)
2238     htab_delete (opt_info->insns_to_split);
2239   if (opt_info->insns_with_var_to_expand)
2240     {
2241       htab_traverse (opt_info->insns_with_var_to_expand,
2242                      release_var_copies, NULL);
2243       htab_delete (opt_info->insns_with_var_to_expand);
2244     }
2245   free (opt_info);
2246 }