gcc/loop-unroll.c

   1 /* Loop unrolling and peeling.
   2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 2, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "basic-block.h"
  29 #include "cfgloop.h"
  30 #include "cfglayout.h"
  31 #include "params.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "hashtab.h"
  35 #include "recog.h"
  36 #include "varray.h"
  37
  38 /* This pass performs loop unrolling and peeling.  We only perform these
  39    optimizations on innermost loops (with single exception) because
  40    the impact on performance is greatest here, and we want to avoid
  41    unnecessary code size growth.  The gain is caused by greater sequentiality
  42    of code, better code to optimize for further passes and in some cases
  43    by fewer testings of exit conditions.  The main problem is code growth,
  44    that impacts performance negatively due to effect of caches.
  45
  46    What we do:
  47
  48    -- complete peeling of once-rolling loops; this is the above mentioned
  49       exception, as this causes loop to be cancelled completely and
  50       does not cause code growth
  51    -- complete peeling of loops that roll (small) constant times.
  52    -- simple peeling of first iterations of loops that do not roll much
  53       (according to profile feedback)
  54    -- unrolling of loops that roll constant times; this is almost always
  55       win, as we get rid of exit condition tests.
  56    -- unrolling of loops that roll number of times that we can compute
  57       in runtime; we also get rid of exit condition tests here, but there
  58       is the extra expense for calculating the number of iterations
  59    -- simple unrolling of remaining loops; this is performed only if we
  60       are asked to, as the gain is questionable in this case and often
  61       it may even slow down the code
  62    For more detailed descriptions of each of those, see comments at
  63    appropriate function below.
  64
  65    There is a lot of parameters (defined and described in params.def) that
  66    control how much we unroll/peel.
  67
  68    ??? A great problem is that we don't have a good way how to determine
  69    how many times we should unroll the loop; the experiments I have made
  70    showed that this choice may affect performance in order of several %.
  71    */
  72
  73 /* Information about induction variables to split.  */
  74
  75 struct iv_to_split
  76 {
  77   rtx insn;             /* The insn in that the induction variable occurs.  */
  78   rtx base_var;         /* The variable on that the values in the further
  79                            iterations are based.  */
  80   rtx step;             /* Step of the induction variable.  */
  81   unsigned n_loc;
  82   unsigned loc[3];      /* Location where the definition of the induction
  83                            variable occurs in the insn.  For example if
  84                            N_LOC is 2, the expression is located at
  85                            XEXP (XEXP (single_set, loc[0]), loc[1]).  */
  86 };
  87
  88 /* Information about accumulators to expand.  */
  89
  90 struct var_to_expand
  91 {
  92   rtx insn;                        /* The insn in that the variable expansion occurs.  */
  93   rtx reg;                         /* The accumulator which is expanded.  */
  94   varray_type var_expansions;      /* The copies of the accumulator which is expanded.  */
  95   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  96                                       or multiplication.  */
  97   int expansion_count;             /* Count the number of expansions generated so far.  */
  98   int reuse_expansion;             /* The expansion we intend to reuse to expand
  99                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
 100                                       the original accumulator.  Else use
 101                                       var_expansions[REUSE_EXPANSION - 1].  */
 102 };
 103
 104 /* Information about optimization applied in
 105    the unrolled loop.  */
 106
 107 struct opt_info
 108 {
 109   htab_t insns_to_split;           /* A hashtable of insns to split.  */
 110   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
 111                                       to expand.  */
 112   unsigned first_new_block;        /* The first basic block that was
 113                                       duplicated.  */
 114   basic_block loop_exit;           /* The loop exit basic block.  */
 115   basic_block loop_preheader;      /* The loop preheader basic block.  */
 116 };
 117
 118 static void decide_unrolling_and_peeling (struct loops *, int);
 119 static void peel_loops_completely (struct loops *, int);
 120 static void decide_peel_simple (struct loop *, int);
 121 static void decide_peel_once_rolling (struct loop *, int);
 122 static void decide_peel_completely (struct loop *, int);
 123 static void decide_unroll_stupid (struct loop *, int);
 124 static void decide_unroll_constant_iterations (struct loop *, int);
 125 static void decide_unroll_runtime_iterations (struct loop *, int);
 126 static void peel_loop_simple (struct loops *, struct loop *);
 127 static void peel_loop_completely (struct loops *, struct loop *);
 128 static void unroll_loop_stupid (struct loops *, struct loop *);
 129 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
 130 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
 131 static struct opt_info *analyze_insns_in_loop (struct loop *);
 132 static void opt_info_start_duplication (struct opt_info *);
 133 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 134 static void free_opt_info (struct opt_info *);
 135 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
 136 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
 137 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
 138 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
 139 static int insert_var_expansion_initialization (void **, void *);
 140 static int combine_var_copies_in_loop_exit (void **, void *);
 141 static int release_var_copies (void **, void *);
 142 static rtx get_expansion (struct var_to_expand *);
 143
 144 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 145 void
 146 unroll_and_peel_loops (struct loops *loops, int flags)
 147 {
 148   struct loop *loop, *next;
 149   bool check;
 150
 151   /* First perform complete loop peeling (it is almost surely a win,
 152      and affects parameters for further decision a lot).  */
 153   peel_loops_completely (loops, flags);
 154
 155   /* Now decide rest of unrolling and peeling.  */
 156   decide_unrolling_and_peeling (loops, flags);
 157
 158   loop = loops->tree_root;
 159   while (loop->inner)
 160     loop = loop->inner;
 161
 162   /* Scan the loops, inner ones first.  */
 163   while (loop != loops->tree_root)
 164     {
 165       if (loop->next)
 166         {
 167           next = loop->next;
 168           while (next->inner)
 169             next = next->inner;
 170         }
 171       else
 172         next = loop->outer;
 173
 174       check = true;
 175       /* And perform the appropriate transformations.  */
 176       switch (loop->lpt_decision.decision)
 177         {
 178         case LPT_PEEL_COMPLETELY:
 179           /* Already done.  */
 180           gcc_unreachable ();
 181         case LPT_PEEL_SIMPLE:
 182           peel_loop_simple (loops, loop);
 183           break;
 184         case LPT_UNROLL_CONSTANT:
 185           unroll_loop_constant_iterations (loops, loop);
 186           break;
 187         case LPT_UNROLL_RUNTIME:
 188           unroll_loop_runtime_iterations (loops, loop);
 189           break;
 190         case LPT_UNROLL_STUPID:
 191           unroll_loop_stupid (loops, loop);
 192           break;
 193         case LPT_NONE:
 194           check = false;
 195           break;
 196         default:
 197           gcc_unreachable ();
 198         }
 199       if (check)
 200         {
 201 #ifdef ENABLE_CHECKING
 202           verify_dominators (CDI_DOMINATORS);
 203           verify_loop_structure (loops);
 204 #endif
 205         }
 206       loop = next;
 207     }
 208
 209   iv_analysis_done ();
 210 }
 211
 212 /* Check whether exit of the LOOP is at the end of loop body.  */
 213
 214 static bool
 215 loop_exit_at_end_p (struct loop *loop)
 216 {
 217   struct niter_desc *desc = get_simple_loop_desc (loop);
 218   rtx insn;
 219
 220   if (desc->in_edge->dest != loop->latch)
 221     return false;
 222
 223   /* Check that the latch is empty.  */
 224   FOR_BB_INSNS (loop->latch, insn)
 225     {
 226       if (INSN_P (insn))
 227         return false;
 228     }
 229
 230   return true;
 231 }
 232
 233 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
 234 static void
 235 peel_loops_completely (struct loops *loops, int flags)
 236 {
 237   struct loop *loop, *next;
 238
 239   loop = loops->tree_root;
 240   while (loop->inner)
 241     loop = loop->inner;
 242
 243   while (loop != loops->tree_root)
 244     {
 245       if (loop->next)
 246         {
 247           next = loop->next;
 248           while (next->inner)
 249             next = next->inner;
 250         }
 251       else
 252         next = loop->outer;
 253
 254       loop->lpt_decision.decision = LPT_NONE;
 255
 256       if (dump_file)
 257         fprintf (dump_file,
 258                  "\n;; *** Considering loop %d for complete peeling ***\n",
 259                  loop->num);
 260
 261       loop->ninsns = num_loop_insns (loop);
 262
 263       decide_peel_once_rolling (loop, flags);
 264       if (loop->lpt_decision.decision == LPT_NONE)
 265         decide_peel_completely (loop, flags);
 266
 267       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
 268         {
 269           peel_loop_completely (loops, loop);
 270 #ifdef ENABLE_CHECKING
 271           verify_dominators (CDI_DOMINATORS);
 272           verify_loop_structure (loops);
 273 #endif
 274         }
 275       loop = next;
 276     }
 277 }
 278
 279 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
 280 static void
 281 decide_unrolling_and_peeling (struct loops *loops, int flags)
 282 {
 283   struct loop *loop = loops->tree_root, *next;
 284
 285   while (loop->inner)
 286     loop = loop->inner;
 287
 288   /* Scan the loops, inner ones first.  */
 289   while (loop != loops->tree_root)
 290     {
 291       if (loop->next)
 292         {
 293           next = loop->next;
 294           while (next->inner)
 295             next = next->inner;
 296         }
 297       else
 298         next = loop->outer;
 299
 300       loop->lpt_decision.decision = LPT_NONE;
 301
 302       if (dump_file)
 303         fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
 304
 305       /* Do not peel cold areas.  */
 306       if (!maybe_hot_bb_p (loop->header))
 307         {
 308           if (dump_file)
 309             fprintf (dump_file, ";; Not considering loop, cold area\n");
 310           loop = next;
 311           continue;
 312         }
 313
 314       /* Can the loop be manipulated?  */
 315       if (!can_duplicate_loop_p (loop))
 316         {
 317           if (dump_file)
 318             fprintf (dump_file,
 319                      ";; Not considering loop, cannot duplicate\n");
 320           loop = next;
 321           continue;
 322         }
 323
 324       /* Skip non-innermost loops.  */
 325       if (loop->inner)
 326         {
 327           if (dump_file)
 328             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 329           loop = next;
 330           continue;
 331         }
 332
 333       loop->ninsns = num_loop_insns (loop);
 334       loop->av_ninsns = average_num_loop_insns (loop);
 335
 336       /* Try transformations one by one in decreasing order of
 337          priority.  */
 338
 339       decide_unroll_constant_iterations (loop, flags);
 340       if (loop->lpt_decision.decision == LPT_NONE)
 341         decide_unroll_runtime_iterations (loop, flags);
 342       if (loop->lpt_decision.decision == LPT_NONE)
 343         decide_unroll_stupid (loop, flags);
 344       if (loop->lpt_decision.decision == LPT_NONE)
 345         decide_peel_simple (loop, flags);
 346
 347       loop = next;
 348     }
 349 }
 350
 351 /* Decide whether the LOOP is once rolling and suitable for complete
 352    peeling.  */
 353 static void
 354 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 355 {
 356   struct niter_desc *desc;
 357
 358   if (dump_file)
 359     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 360
 361   /* Is the loop small enough?  */
 362   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
 363     {
 364       if (dump_file)
 365         fprintf (dump_file, ";; Not considering loop, is too big\n");
 366       return;
 367     }
 368
 369   /* Check for simple loops.  */
 370   desc = get_simple_loop_desc (loop);
 371
 372   /* Check number of iterations.  */
 373   if (!desc->simple_p
 374       || desc->assumptions
 375       || !desc->const_iter
 376       || desc->niter != 0)
 377     {
 378       if (dump_file)
 379         fprintf (dump_file,
 380                  ";; Unable to prove that the loop rolls exactly once\n");
 381       return;
 382     }
 383
 384   /* Success.  */
 385   if (dump_file)
 386     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
 387   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 388 }
 389
 390 /* Decide whether the LOOP is suitable for complete peeling.  */
 391 static void
 392 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 393 {
 394   unsigned npeel;
 395   struct niter_desc *desc;
 396
 397   if (dump_file)
 398     fprintf (dump_file, "\n;; Considering peeling completely\n");
 399
 400   /* Skip non-innermost loops.  */
 401   if (loop->inner)
 402     {
 403       if (dump_file)
 404         fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 405       return;
 406     }
 407
 408   /* Do not peel cold areas.  */
 409   if (!maybe_hot_bb_p (loop->header))
 410     {
 411       if (dump_file)
 412         fprintf (dump_file, ";; Not considering loop, cold area\n");
 413       return;
 414     }
 415
 416   /* Can the loop be manipulated?  */
 417   if (!can_duplicate_loop_p (loop))
 418     {
 419       if (dump_file)
 420         fprintf (dump_file,
 421                  ";; Not considering loop, cannot duplicate\n");
 422       return;
 423     }
 424
 425   /* npeel = number of iterations to peel.  */
 426   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
 427   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 428     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 429
 430   /* Is the loop small enough?  */
 431   if (!npeel)
 432     {
 433       if (dump_file)
 434         fprintf (dump_file, ";; Not considering loop, is too big\n");
 435       return;
 436     }
 437
 438   /* Check for simple loops.  */
 439   desc = get_simple_loop_desc (loop);
 440
 441   /* Check number of iterations.  */
 442   if (!desc->simple_p
 443       || desc->assumptions
 444       || !desc->const_iter)
 445     {
 446       if (dump_file)
 447         fprintf (dump_file,
 448                  ";; Unable to prove that the loop iterates constant times\n");
 449       return;
 450     }
 451
 452   if (desc->niter > npeel - 1)
 453     {
 454       if (dump_file)
 455         {
 456           fprintf (dump_file,
 457                    ";; Not peeling loop completely, rolls too much (");
 458           fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
 459           fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
 460         }
 461       return;
 462     }
 463
 464   /* Success.  */
 465   if (dump_file)
 466     fprintf (dump_file, ";; Decided to peel loop completely\n");
 467   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 468 }
 469
 470 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
 471    completely.  The transformation done:
 472
 473    for (i = 0; i < 4; i++)
 474      body;
 475
 476    ==>
 477
 478    i = 0;
 479    body; i++;
 480    body; i++;
 481    body; i++;
 482    body; i++;
 483    */
 484 static void
 485 peel_loop_completely (struct loops *loops, struct loop *loop)
 486 {
 487   sbitmap wont_exit;
 488   unsigned HOST_WIDE_INT npeel;
 489   unsigned n_remove_edges, i;
 490   edge *remove_edges, ein;
 491   struct niter_desc *desc = get_simple_loop_desc (loop);
 492   struct opt_info *opt_info = NULL;
 493
 494   npeel = desc->niter;
 495
 496   if (npeel)
 497     {
 498       bool ok;
 499
 500       wont_exit = sbitmap_alloc (npeel + 1);
 501       sbitmap_ones (wont_exit);
 502       RESET_BIT (wont_exit, 0);
 503       if (desc->noloop_assumptions)
 504         RESET_BIT (wont_exit, 1);
 505
 506       remove_edges = xcalloc (npeel, sizeof (edge));
 507       n_remove_edges = 0;
 508
 509       if (flag_split_ivs_in_unroller)
 510         opt_info = analyze_insns_in_loop (loop);
 511
 512       opt_info_start_duplication (opt_info);
 513       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 514                                           loops, npeel,
 515                                           wont_exit, desc->out_edge,
 516                                           remove_edges, &n_remove_edges,
 517                                           DLTHE_FLAG_UPDATE_FREQ);
 518       gcc_assert (ok);
 519
 520       free (wont_exit);
 521
 522       if (opt_info)
 523         {
 524           apply_opt_in_copies (opt_info, npeel, false, true);
 525           free_opt_info (opt_info);
 526         }
 527
 528       /* Remove the exit edges.  */
 529       for (i = 0; i < n_remove_edges; i++)
 530         remove_path (loops, remove_edges[i]);
 531       free (remove_edges);
 532     }
 533
 534   ein = desc->in_edge;
 535   free_simple_loop_desc (loop);
 536
 537   /* Now remove the unreachable part of the last iteration and cancel
 538      the loop.  */
 539   remove_path (loops, ein);
 540
 541   if (dump_file)
 542     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
 543 }
 544
 545 /* Decide whether to unroll LOOP iterating constant number of times
 546    and how much.  */
 547
 548 static void
 549 decide_unroll_constant_iterations (struct loop *loop, int flags)
 550 {
 551   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 552   struct niter_desc *desc;
 553
 554   if (!(flags & UAP_UNROLL))
 555     {
 556       /* We were not asked to, just return back silently.  */
 557       return;
 558     }
 559
 560   if (dump_file)
 561     fprintf (dump_file,
 562              "\n;; Considering unrolling loop with constant "
 563              "number of iterations\n");
 564
 565   /* nunroll = total number of copies of the original loop body in
 566      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 567   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 568   nunroll_by_av
 569     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 570   if (nunroll > nunroll_by_av)
 571     nunroll = nunroll_by_av;
 572   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 573     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 574
 575   /* Skip big loops.  */
 576   if (nunroll <= 1)
 577     {
 578       if (dump_file)
 579         fprintf (dump_file, ";; Not considering loop, is too big\n");
 580       return;
 581     }
 582
 583   /* Check for simple loops.  */
 584   desc = get_simple_loop_desc (loop);
 585
 586   /* Check number of iterations.  */
 587   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 588     {
 589       if (dump_file)
 590         fprintf (dump_file,
 591                  ";; Unable to prove that the loop iterates constant times\n");
 592       return;
 593     }
 594
 595   /* Check whether the loop rolls enough to consider.  */
 596   if (desc->niter < 2 * nunroll)
 597     {
 598       if (dump_file)
 599         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 600       return;
 601     }
 602
 603   /* Success; now compute number of iterations to unroll.  We alter
 604      nunroll so that as few as possible copies of loop body are
 605      necessary, while still not decreasing the number of unrollings
 606      too much (at most by 1).  */
 607   best_copies = 2 * nunroll + 10;
 608
 609   i = 2 * nunroll + 2;
 610   if (i - 1 >= desc->niter)
 611     i = desc->niter - 2;
 612
 613   for (; i >= nunroll - 1; i--)
 614     {
 615       unsigned exit_mod = desc->niter % (i + 1);
 616
 617       if (!loop_exit_at_end_p (loop))
 618         n_copies = exit_mod + i + 1;
 619       else if (exit_mod != (unsigned) i
 620                || desc->noloop_assumptions != NULL_RTX)
 621         n_copies = exit_mod + i + 2;
 622       else
 623         n_copies = i + 1;
 624
 625       if (n_copies < best_copies)
 626         {
 627           best_copies = n_copies;
 628           best_unroll = i;
 629         }
 630     }
 631
 632   if (dump_file)
 633     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
 634              best_unroll + 1, best_copies, nunroll);
 635
 636   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 637   loop->lpt_decision.times = best_unroll;
 638
 639   if (dump_file)
 640     fprintf (dump_file,
 641              ";; Decided to unroll the constant times rolling loop, %d times.\n",
 642              loop->lpt_decision.times);
 643 }
 644
 645 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
 646    times.  The transformation does this:
 647
 648    for (i = 0; i < 102; i++)
 649      body;
 650
 651    ==>
 652
 653    i = 0;
 654    body; i++;
 655    body; i++;
 656    while (i < 102)
 657      {
 658        body; i++;
 659        body; i++;
 660        body; i++;
 661        body; i++;
 662      }
 663   */
 664 static void
 665 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
 666 {
 667   unsigned HOST_WIDE_INT niter;
 668   unsigned exit_mod;
 669   sbitmap wont_exit;
 670   unsigned n_remove_edges, i;
 671   edge *remove_edges;
 672   unsigned max_unroll = loop->lpt_decision.times;
 673   struct niter_desc *desc = get_simple_loop_desc (loop);
 674   bool exit_at_end = loop_exit_at_end_p (loop);
 675   struct opt_info *opt_info = NULL;
 676   bool ok;
 677
 678   niter = desc->niter;
 679
 680   /* Should not get here (such loop should be peeled instead).  */
 681   gcc_assert (niter > max_unroll + 1);
 682
 683   exit_mod = niter % (max_unroll + 1);
 684
 685   wont_exit = sbitmap_alloc (max_unroll + 1);
 686   sbitmap_ones (wont_exit);
 687
 688   remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
 689   n_remove_edges = 0;
 690   if (flag_split_ivs_in_unroller
 691       || flag_variable_expansion_in_unroller)
 692     opt_info = analyze_insns_in_loop (loop);
 693
 694   if (!exit_at_end)
 695     {
 696       /* The exit is not at the end of the loop; leave exit test
 697          in the first copy, so that the loops that start with test
 698          of exit condition have continuous body after unrolling.  */
 699
 700       if (dump_file)
 701         fprintf (dump_file, ";; Condition on beginning of loop.\n");
 702
 703       /* Peel exit_mod iterations.  */
 704       RESET_BIT (wont_exit, 0);
 705       if (desc->noloop_assumptions)
 706         RESET_BIT (wont_exit, 1);
 707
 708       if (exit_mod)
 709         {
 710           opt_info_start_duplication (opt_info);
 711           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 712                                               loops, exit_mod,
 713                                               wont_exit, desc->out_edge,
 714                                               remove_edges, &n_remove_edges,
 715                                               DLTHE_FLAG_UPDATE_FREQ);
 716           gcc_assert (ok);
 717
 718           if (opt_info && exit_mod > 1)
 719             apply_opt_in_copies (opt_info, exit_mod, false, false);
 720
 721           desc->noloop_assumptions = NULL_RTX;
 722           desc->niter -= exit_mod;
 723           desc->niter_max -= exit_mod;
 724         }
 725
 726       SET_BIT (wont_exit, 1);
 727     }
 728   else
 729     {
 730       /* Leave exit test in last copy, for the same reason as above if
 731          the loop tests the condition at the end of loop body.  */
 732
 733       if (dump_file)
 734         fprintf (dump_file, ";; Condition on end of loop.\n");
 735
 736       /* We know that niter >= max_unroll + 2; so we do not need to care of
 737          case when we would exit before reaching the loop.  So just peel
 738          exit_mod + 1 iterations.  */
 739       if (exit_mod != max_unroll
 740           || desc->noloop_assumptions)
 741         {
 742           RESET_BIT (wont_exit, 0);
 743           if (desc->noloop_assumptions)
 744             RESET_BIT (wont_exit, 1);
 745
 746           opt_info_start_duplication (opt_info);
 747           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 748                                               loops, exit_mod + 1,
 749                                               wont_exit, desc->out_edge,
 750                                               remove_edges, &n_remove_edges,
 751                                               DLTHE_FLAG_UPDATE_FREQ);
 752           gcc_assert (ok);
 753
 754           if (opt_info && exit_mod > 0)
 755             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 756
 757           desc->niter -= exit_mod + 1;
 758           desc->niter_max -= exit_mod + 1;
 759           desc->noloop_assumptions = NULL_RTX;
 760
 761           SET_BIT (wont_exit, 0);
 762           SET_BIT (wont_exit, 1);
 763         }
 764
 765       RESET_BIT (wont_exit, max_unroll);
 766     }
 767
 768   /* Now unroll the loop.  */
 769
 770   opt_info_start_duplication (opt_info);
 771   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 772                                       loops, max_unroll,
 773                                       wont_exit, desc->out_edge,
 774                                       remove_edges, &n_remove_edges,
 775                                       DLTHE_FLAG_UPDATE_FREQ);
 776   gcc_assert (ok);
 777
 778   if (opt_info)
 779     {
 780       apply_opt_in_copies (opt_info, max_unroll, true, true);
 781       free_opt_info (opt_info);
 782     }
 783
 784   free (wont_exit);
 785
 786   if (exit_at_end)
 787     {
 788       basic_block exit_block = desc->in_edge->src->rbi->copy;
 789       /* Find a new in and out edge; they are in the last copy we have made.  */
 790
 791       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 792         {
 793           desc->out_edge = EDGE_SUCC (exit_block, 0);
 794           desc->in_edge = EDGE_SUCC (exit_block, 1);
 795         }
 796       else
 797         {
 798           desc->out_edge = EDGE_SUCC (exit_block, 1);
 799           desc->in_edge = EDGE_SUCC (exit_block, 0);
 800         }
 801     }
 802
 803   desc->niter /= max_unroll + 1;
 804   desc->niter_max /= max_unroll + 1;
 805   desc->niter_expr = GEN_INT (desc->niter);
 806
 807   /* Remove the edges.  */
 808   for (i = 0; i < n_remove_edges; i++)
 809     remove_path (loops, remove_edges[i]);
 810   free (remove_edges);
 811
 812   if (dump_file)
 813     fprintf (dump_file,
 814              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 815              max_unroll, num_loop_insns (loop));
 816 }
 817
 818 /* Decide whether to unroll LOOP iterating runtime computable number of times
 819    and how much.  */
 820 static void
 821 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 822 {
 823   unsigned nunroll, nunroll_by_av, i;
 824   struct niter_desc *desc;
 825
 826   if (!(flags & UAP_UNROLL))
 827     {
 828       /* We were not asked to, just return back silently.  */
 829       return;
 830     }
 831
 832   if (dump_file)
 833     fprintf (dump_file,
 834              "\n;; Considering unrolling loop with runtime "
 835              "computable number of iterations\n");
 836
 837   /* nunroll = total number of copies of the original loop body in
 838      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 839   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 840   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 841   if (nunroll > nunroll_by_av)
 842     nunroll = nunroll_by_av;
 843   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 844     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 845
 846   /* Skip big loops.  */
 847   if (nunroll <= 1)
 848     {
 849       if (dump_file)
 850         fprintf (dump_file, ";; Not considering loop, is too big\n");
 851       return;
 852     }
 853
 854   /* Check for simple loops.  */
 855   desc = get_simple_loop_desc (loop);
 856
 857   /* Check simpleness.  */
 858   if (!desc->simple_p || desc->assumptions)
 859     {
 860       if (dump_file)
 861         fprintf (dump_file,
 862                  ";; Unable to prove that the number of iterations "
 863                  "can be counted in runtime\n");
 864       return;
 865     }
 866
 867   if (desc->const_iter)
 868     {
 869       if (dump_file)
 870         fprintf (dump_file, ";; Loop iterates constant times\n");
 871       return;
 872     }
 873
 874   /* If we have profile feedback, check whether the loop rolls.  */
 875   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
 876     {
 877       if (dump_file)
 878         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 879       return;
 880     }
 881
 882   /* Success; now force nunroll to be power of 2, as we are unable to
 883      cope with overflows in computation of number of iterations.  */
 884   for (i = 1; 2 * i <= nunroll; i *= 2)
 885     continue;
 886
 887   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 888   loop->lpt_decision.times = i - 1;
 889
 890   if (dump_file)
 891     fprintf (dump_file,
 892              ";; Decided to unroll the runtime computable "
 893              "times rolling loop, %d times.\n",
 894              loop->lpt_decision.times);
 895 }
 896
 897 /* Unroll LOOP for that we are able to count number of iterations in runtime
 898    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
 899    extra care for case n < 0):
 900
 901    for (i = 0; i < n; i++)
 902      body;
 903
 904    ==>
 905
 906    i = 0;
 907    mod = n % 4;
 908
 909    switch (mod)
 910      {
 911        case 3:
 912          body; i++;
 913        case 2:
 914          body; i++;
 915        case 1:
 916          body; i++;
 917        case 0: ;
 918      }
 919
 920    while (i < n)
 921      {
 922        body; i++;
 923        body; i++;
 924        body; i++;
 925        body; i++;
 926      }
 927    */
 928 static void
 929 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
 930 {
 931   rtx old_niter, niter, init_code, branch_code, tmp;
 932   unsigned i, j, p;
 933   basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
 934   unsigned n_dom_bbs;
 935   sbitmap wont_exit;
 936   int may_exit_copy;
 937   unsigned n_peel, n_remove_edges;
 938   edge *remove_edges, e;
 939   bool extra_zero_check, last_may_exit;
 940   unsigned max_unroll = loop->lpt_decision.times;
 941   struct niter_desc *desc = get_simple_loop_desc (loop);
 942   bool exit_at_end = loop_exit_at_end_p (loop);
 943   struct opt_info *opt_info = NULL;
 944   bool ok;
 945
 946   if (flag_split_ivs_in_unroller
 947       || flag_variable_expansion_in_unroller)
 948     opt_info = analyze_insns_in_loop (loop);
 949
 950   /* Remember blocks whose dominators will have to be updated.  */
 951   dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
 952   n_dom_bbs = 0;
 953
 954   body = get_loop_body (loop);
 955   for (i = 0; i < loop->num_nodes; i++)
 956     {
 957       unsigned nldom;
 958       basic_block *ldom;
 959
 960       nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
 961       for (j = 0; j < nldom; j++)
 962         if (!flow_bb_inside_loop_p (loop, ldom[j]))
 963           dom_bbs[n_dom_bbs++] = ldom[j];
 964
 965       free (ldom);
 966     }
 967   free (body);
 968
 969   if (!exit_at_end)
 970     {
 971       /* Leave exit in first copy (for explanation why see comment in
 972          unroll_loop_constant_iterations).  */
 973       may_exit_copy = 0;
 974       n_peel = max_unroll - 1;
 975       extra_zero_check = true;
 976       last_may_exit = false;
 977     }
 978   else
 979     {
 980       /* Leave exit in last copy (for explanation why see comment in
 981          unroll_loop_constant_iterations).  */
 982       may_exit_copy = max_unroll;
 983       n_peel = max_unroll;
 984       extra_zero_check = false;
 985       last_may_exit = true;
 986     }
 987
 988   /* Get expression for number of iterations.  */
 989   start_sequence ();
 990   old_niter = niter = gen_reg_rtx (desc->mode);
 991   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 992   if (tmp != niter)
 993     emit_move_insn (niter, tmp);
 994
 995   /* Count modulo by ANDing it with max_unroll; we use the fact that
 996      the number of unrollings is a power of two, and thus this is correct
 997      even if there is overflow in the computation.  */
 998   niter = expand_simple_binop (desc->mode, AND,
 999                                niter,
1000                                GEN_INT (max_unroll),
1001                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
1002
1003   init_code = get_insns ();
1004   end_sequence ();
1005
1006   /* Precondition the loop.  */
1007   loop_split_edge_with (loop_preheader_edge (loop), init_code);
1008
1009   remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
1010   n_remove_edges = 0;
1011
1012   wont_exit = sbitmap_alloc (max_unroll + 2);
1013
1014   /* Peel the first copy of loop body (almost always we must leave exit test
1015      here; the only exception is when we have extra zero check and the number
1016      of iterations is reliable.  Also record the place of (possible) extra
1017      zero check.  */
1018   sbitmap_zero (wont_exit);
1019   if (extra_zero_check
1020       && !desc->noloop_assumptions)
1021     SET_BIT (wont_exit, 1);
1022   ezc_swtch = loop_preheader_edge (loop)->src;
1023   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1024                                       loops, 1,
1025                                       wont_exit, desc->out_edge,
1026                                       remove_edges, &n_remove_edges,
1027                                       DLTHE_FLAG_UPDATE_FREQ);
1028   gcc_assert (ok);
1029
1030   /* Record the place where switch will be built for preconditioning.  */
1031   swtch = loop_split_edge_with (loop_preheader_edge (loop),
1032                                 NULL_RTX);
1033
1034   for (i = 0; i < n_peel; i++)
1035     {
1036       /* Peel the copy.  */
1037       sbitmap_zero (wont_exit);
1038       if (i != n_peel - 1 || !last_may_exit)
1039         SET_BIT (wont_exit, 1);
1040       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1041                                           loops, 1,
1042                                           wont_exit, desc->out_edge,
1043                                           remove_edges, &n_remove_edges,
1044                                           DLTHE_FLAG_UPDATE_FREQ);
1045       gcc_assert (ok);
1046
1047       /* Create item for switch.  */
1048       j = n_peel - i - (extra_zero_check ? 0 : 1);
1049       p = REG_BR_PROB_BASE / (i + 2);
1050
1051       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1052       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1053                                           block_label (preheader), p,
1054                                           NULL_RTX);
1055
1056       swtch = loop_split_edge_with (single_pred_edge (swtch), branch_code);
1057       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1058       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1059       e = make_edge (swtch, preheader,
1060                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1061       e->probability = p;
1062     }
1063
1064   if (extra_zero_check)
1065     {
1066       /* Add branch for zero iterations.  */
1067       p = REG_BR_PROB_BASE / (max_unroll + 1);
1068       swtch = ezc_swtch;
1069       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1070       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1071                                           block_label (preheader), p,
1072                                           NULL_RTX);
1073
1074       swtch = loop_split_edge_with (single_succ_edge (swtch), branch_code);
1075       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1076       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1077       e = make_edge (swtch, preheader,
1078                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1079       e->probability = p;
1080     }
1081
1082   /* Recount dominators for outer blocks.  */
1083   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1084
1085   /* And unroll loop.  */
1086
1087   sbitmap_ones (wont_exit);
1088   RESET_BIT (wont_exit, may_exit_copy);
1089   opt_info_start_duplication (opt_info);
1090
1091   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1092                                       loops, max_unroll,
1093                                       wont_exit, desc->out_edge,
1094                                       remove_edges, &n_remove_edges,
1095                                       DLTHE_FLAG_UPDATE_FREQ);
1096   gcc_assert (ok);
1097
1098   if (opt_info)
1099     {
1100       apply_opt_in_copies (opt_info, max_unroll, true, true);
1101       free_opt_info (opt_info);
1102     }
1103
1104   free (wont_exit);
1105
1106   if (exit_at_end)
1107     {
1108       basic_block exit_block = desc->in_edge->src->rbi->copy;
1109       /* Find a new in and out edge; they are in the last copy we have
1110          made.  */
1111
1112       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1113         {
1114           desc->out_edge = EDGE_SUCC (exit_block, 0);
1115           desc->in_edge = EDGE_SUCC (exit_block, 1);
1116         }
1117       else
1118         {
1119           desc->out_edge = EDGE_SUCC (exit_block, 1);
1120           desc->in_edge = EDGE_SUCC (exit_block, 0);
1121         }
1122     }
1123
1124   /* Remove the edges.  */
1125   for (i = 0; i < n_remove_edges; i++)
1126     remove_path (loops, remove_edges[i]);
1127   free (remove_edges);
1128
1129   /* We must be careful when updating the number of iterations due to
1130      preconditioning and the fact that the value must be valid at entry
1131      of the loop.  After passing through the above code, we see that
1132      the correct new number of iterations is this:  */
1133   gcc_assert (!desc->const_iter);
1134   desc->niter_expr =
1135     simplify_gen_binary (UDIV, desc->mode, old_niter,
1136                          GEN_INT (max_unroll + 1));
1137   desc->niter_max /= max_unroll + 1;
1138   if (exit_at_end)
1139     {
1140       desc->niter_expr =
1141         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1142       desc->noloop_assumptions = NULL_RTX;
1143       desc->niter_max--;
1144     }
1145
1146   if (dump_file)
1147     fprintf (dump_file,
1148              ";; Unrolled loop %d times, counting # of iterations "
1149              "in runtime, %i insns\n",
1150              max_unroll, num_loop_insns (loop));
1151 }
1152
1153 /* Decide whether to simply peel LOOP and how much.  */
1154 static void
1155 decide_peel_simple (struct loop *loop, int flags)
1156 {
1157   unsigned npeel;
1158   struct niter_desc *desc;
1159
1160   if (!(flags & UAP_PEEL))
1161     {
1162       /* We were not asked to, just return back silently.  */
1163       return;
1164     }
1165
1166   if (dump_file)
1167     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1168
1169   /* npeel = number of iterations to peel.  */
1170   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1171   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1172     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1173
1174   /* Skip big loops.  */
1175   if (!npeel)
1176     {
1177       if (dump_file)
1178         fprintf (dump_file, ";; Not considering loop, is too big\n");
1179       return;
1180     }
1181
1182   /* Check for simple loops.  */
1183   desc = get_simple_loop_desc (loop);
1184
1185   /* Check number of iterations.  */
1186   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1187     {
1188       if (dump_file)
1189         fprintf (dump_file, ";; Loop iterates constant times\n");
1190       return;
1191     }
1192
1193   /* Do not simply peel loops with branches inside -- it increases number
1194      of mispredicts.  */
1195   if (num_loop_branches (loop) > 1)
1196     {
1197       if (dump_file)
1198         fprintf (dump_file, ";; Not peeling, contains branches\n");
1199       return;
1200     }
1201
1202   if (loop->header->count)
1203     {
1204       unsigned niter = expected_loop_iterations (loop);
1205       if (niter + 1 > npeel)
1206         {
1207           if (dump_file)
1208             {
1209               fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1210               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1211                        (HOST_WIDEST_INT) (niter + 1));
1212               fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1213                        npeel);
1214             }
1215           return;
1216         }
1217       npeel = niter + 1;
1218     }
1219   else
1220     {
1221       /* For now we have no good heuristics to decide whether loop peeling
1222          will be effective, so disable it.  */
1223       if (dump_file)
1224         fprintf (dump_file,
1225                  ";; Not peeling loop, no evidence it will be profitable\n");
1226       return;
1227     }
1228
1229   /* Success.  */
1230   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1231   loop->lpt_decision.times = npeel;
1232
1233   if (dump_file)
1234     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1235              loop->lpt_decision.times);
1236 }
1237
1238 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1239    while (cond)
1240      body;
1241
1242    ==>
1243
1244    if (!cond) goto end;
1245    body;
1246    if (!cond) goto end;
1247    body;
1248    while (cond)
1249      body;
1250    end: ;
1251    */
1252 static void
1253 peel_loop_simple (struct loops *loops, struct loop *loop)
1254 {
1255   sbitmap wont_exit;
1256   unsigned npeel = loop->lpt_decision.times;
1257   struct niter_desc *desc = get_simple_loop_desc (loop);
1258   struct opt_info *opt_info = NULL;
1259   bool ok;
1260
1261   if (flag_split_ivs_in_unroller && npeel > 1)
1262     opt_info = analyze_insns_in_loop (loop);
1263
1264   wont_exit = sbitmap_alloc (npeel + 1);
1265   sbitmap_zero (wont_exit);
1266
1267   opt_info_start_duplication (opt_info);
1268
1269   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1270                                       loops, npeel, wont_exit,
1271                                       NULL, NULL,
1272                                       NULL, DLTHE_FLAG_UPDATE_FREQ);
1273   gcc_assert (ok);
1274
1275   free (wont_exit);
1276
1277   if (opt_info)
1278     {
1279       apply_opt_in_copies (opt_info, npeel, false, false);
1280       free_opt_info (opt_info);
1281     }
1282
1283   if (desc->simple_p)
1284     {
1285       if (desc->const_iter)
1286         {
1287           desc->niter -= npeel;
1288           desc->niter_expr = GEN_INT (desc->niter);
1289           desc->noloop_assumptions = NULL_RTX;
1290         }
1291       else
1292         {
1293           /* We cannot just update niter_expr, as its value might be clobbered
1294              inside loop.  We could handle this by counting the number into
1295              temporary just like we do in runtime unrolling, but it does not
1296              seem worthwhile.  */
1297           free_simple_loop_desc (loop);
1298         }
1299     }
1300   if (dump_file)
1301     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1302 }
1303
1304 /* Decide whether to unroll LOOP stupidly and how much.  */
1305 static void
1306 decide_unroll_stupid (struct loop *loop, int flags)
1307 {
1308   unsigned nunroll, nunroll_by_av, i;
1309   struct niter_desc *desc;
1310
1311   if (!(flags & UAP_UNROLL_ALL))
1312     {
1313       /* We were not asked to, just return back silently.  */
1314       return;
1315     }
1316
1317   if (dump_file)
1318     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1319
1320   /* nunroll = total number of copies of the original loop body in
1321      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1322   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1323   nunroll_by_av
1324     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1325   if (nunroll > nunroll_by_av)
1326     nunroll = nunroll_by_av;
1327   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1328     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1329
1330   /* Skip big loops.  */
1331   if (nunroll <= 1)
1332     {
1333       if (dump_file)
1334         fprintf (dump_file, ";; Not considering loop, is too big\n");
1335       return;
1336     }
1337
1338   /* Check for simple loops.  */
1339   desc = get_simple_loop_desc (loop);
1340
1341   /* Check simpleness.  */
1342   if (desc->simple_p && !desc->assumptions)
1343     {
1344       if (dump_file)
1345         fprintf (dump_file, ";; The loop is simple\n");
1346       return;
1347     }
1348
1349   /* Do not unroll loops with branches inside -- it increases number
1350      of mispredicts.  */
1351   if (num_loop_branches (loop) > 1)
1352     {
1353       if (dump_file)
1354         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1355       return;
1356     }
1357
1358   /* If we have profile feedback, check whether the loop rolls.  */
1359   if (loop->header->count
1360       && expected_loop_iterations (loop) < 2 * nunroll)
1361     {
1362       if (dump_file)
1363         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1364       return;
1365     }
1366
1367   /* Success.  Now force nunroll to be power of 2, as it seems that this
1368      improves results (partially because of better alignments, partially
1369      because of some dark magic).  */
1370   for (i = 1; 2 * i <= nunroll; i *= 2)
1371     continue;
1372
1373   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1374   loop->lpt_decision.times = i - 1;
1375
1376   if (dump_file)
1377     fprintf (dump_file,
1378              ";; Decided to unroll the loop stupidly, %d times.\n",
1379              loop->lpt_decision.times);
1380 }
1381
1382 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1383    while (cond)
1384      body;
1385
1386    ==>
1387
1388    while (cond)
1389      {
1390        body;
1391        if (!cond) break;
1392        body;
1393        if (!cond) break;
1394        body;
1395        if (!cond) break;
1396        body;
1397      }
1398    */
1399 static void
1400 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1401 {
1402   sbitmap wont_exit;
1403   unsigned nunroll = loop->lpt_decision.times;
1404   struct niter_desc *desc = get_simple_loop_desc (loop);
1405   struct opt_info *opt_info = NULL;
1406   bool ok;
1407
1408   if (flag_split_ivs_in_unroller
1409       || flag_variable_expansion_in_unroller)
1410     opt_info = analyze_insns_in_loop (loop);
1411
1412
1413   wont_exit = sbitmap_alloc (nunroll + 1);
1414   sbitmap_zero (wont_exit);
1415   opt_info_start_duplication (opt_info);
1416
1417   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1418                                       loops, nunroll, wont_exit,
1419                                       NULL, NULL, NULL,
1420                                       DLTHE_FLAG_UPDATE_FREQ);
1421   gcc_assert (ok);
1422
1423   if (opt_info)
1424     {
1425       apply_opt_in_copies (opt_info, nunroll, true, true);
1426       free_opt_info (opt_info);
1427     }
1428
1429   free (wont_exit);
1430
1431   if (desc->simple_p)
1432     {
1433       /* We indeed may get here provided that there are nontrivial assumptions
1434          for a loop to be really simple.  We could update the counts, but the
1435          problem is that we are unable to decide which exit will be taken
1436          (not really true in case the number of iterations is constant,
1437          but noone will do anything with this information, so we do not
1438          worry about it).  */
1439       desc->simple_p = false;
1440     }
1441
1442   if (dump_file)
1443     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1444              nunroll, num_loop_insns (loop));
1445 }
1446
1447 /* A hash function for information about insns to split.  */
1448
1449 static hashval_t
1450 si_info_hash (const void *ivts)
1451 {
1452   return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
1453 }
1454
1455 /* An equality functions for information about insns to split.  */
1456
1457 static int
1458 si_info_eq (const void *ivts1, const void *ivts2)
1459 {
1460   const struct iv_to_split *i1 = ivts1;
1461   const struct iv_to_split *i2 = ivts2;
1462
1463   return i1->insn == i2->insn;
1464 }
1465
1466 /* Return a hash for VES, which is really a "var_to_expand *".  */
1467
1468 static hashval_t
1469 ve_info_hash (const void *ves)
1470 {
1471   return htab_hash_pointer (((struct var_to_expand *) ves)->insn);
1472 }
1473
1474 /* Return true if IVTS1 and IVTS2 (which are really both of type
1475    "var_to_expand *") refer to the same instruction.  */
1476
1477 static int
1478 ve_info_eq (const void *ivts1, const void *ivts2)
1479 {
1480   const struct var_to_expand *i1 = ivts1;
1481   const struct var_to_expand *i2 = ivts2;
1482
1483   return i1->insn == i2->insn;
1484 }
1485
1486 /* Returns true if REG is referenced in one insn in LOOP.  */
1487
1488 bool
1489 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1490 {
1491   basic_block *body, bb;
1492   unsigned i;
1493   int count_ref = 0;
1494   rtx insn;
1495
1496   body = get_loop_body (loop);
1497   for (i = 0; i < loop->num_nodes; i++)
1498     {
1499       bb = body[i];
1500
1501       FOR_BB_INSNS (bb, insn)
1502       {
1503         if (rtx_referenced_p (reg, insn))
1504           count_ref++;
1505       }
1506     }
1507   return (count_ref  == 1);
1508 }
1509
1510 /* Determine whether INSN contains an accumulator
1511    which can be expanded into separate copies,
1512    one for each copy of the LOOP body.
1513
1514    for (i = 0 ; i < n; i++)
1515      sum += a[i];
1516
1517    ==>
1518
1519    sum += a[i]
1520    ....
1521    i = i+1;
1522    sum1 += a[i]
1523    ....
1524    i = i+1
1525    sum2 += a[i];
1526    ....
1527
1528    Return NULL if INSN contains no opportunity for expansion of accumulator.
1529    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1530    information and return a pointer to it.
1531 */
1532
1533 static struct var_to_expand *
1534 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1535 {
1536   rtx set, dest, src, op1;
1537   struct var_to_expand *ves;
1538   enum machine_mode mode1, mode2;
1539
1540   set = single_set (insn);
1541   if (!set)
1542     return NULL;
1543
1544   dest = SET_DEST (set);
1545   src = SET_SRC (set);
1546
1547   if (GET_CODE (src) != PLUS
1548       && GET_CODE (src) != MINUS
1549       && GET_CODE (src) != MULT)
1550     return NULL;
1551
1552   if (!XEXP (src, 0))
1553     return NULL;
1554
1555   op1 = XEXP (src, 0);
1556
1557   if (!REG_P (dest)
1558       && !(GET_CODE (dest) == SUBREG
1559            && REG_P (SUBREG_REG (dest))))
1560     return NULL;
1561
1562   if (!rtx_equal_p (dest, op1))
1563     return NULL;
1564
1565   if (!referenced_in_one_insn_in_loop_p (loop, dest))
1566     return NULL;
1567
1568   if (rtx_referenced_p (dest, XEXP (src, 1)))
1569     return NULL;
1570
1571   mode1 = GET_MODE (dest);
1572   mode2 = GET_MODE (XEXP (src, 1));
1573   if ((FLOAT_MODE_P (mode1)
1574        || FLOAT_MODE_P (mode2))
1575       && !flag_unsafe_math_optimizations)
1576     return NULL;
1577
1578   /* Record the accumulator to expand.  */
1579   ves = xmalloc (sizeof (struct var_to_expand));
1580   ves->insn = insn;
1581   VARRAY_RTX_INIT (ves->var_expansions, 1, "var_expansions");
1582   ves->reg = copy_rtx (dest);
1583   ves->op = GET_CODE (src);
1584   ves->expansion_count = 0;
1585   ves->reuse_expansion = 0;
1586   return ves;
1587 }
1588
1589 /* Determine whether there is an induction variable in INSN that
1590    we would like to split during unrolling.
1591
1592    I.e. replace
1593
1594    i = i + 1;
1595    ...
1596    i = i + 1;
1597    ...
1598    i = i + 1;
1599    ...
1600
1601    type chains by
1602
1603    i0 = i + 1
1604    ...
1605    i = i0 + 1
1606    ...
1607    i = i0 + 2
1608    ...
1609
1610    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1611    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1612    pointer to it.  */
1613
1614 static struct iv_to_split *
1615 analyze_iv_to_split_insn (rtx insn)
1616 {
1617   rtx set, dest;
1618   struct rtx_iv iv;
1619   struct iv_to_split *ivts;
1620   bool ok;
1621
1622   /* For now we just split the basic induction variables.  Later this may be
1623      extended for example by selecting also addresses of memory references.  */
1624   set = single_set (insn);
1625   if (!set)
1626     return NULL;
1627
1628   dest = SET_DEST (set);
1629   if (!REG_P (dest))
1630     return NULL;
1631
1632   if (!biv_p (insn, dest))
1633     return NULL;
1634
1635   ok = iv_analyze (insn, dest, &iv);
1636   gcc_assert (ok);
1637
1638   if (iv.step == const0_rtx
1639       || iv.mode != iv.extend_mode)
1640     return NULL;
1641
1642   /* Record the insn to split.  */
1643   ivts = xmalloc (sizeof (struct iv_to_split));
1644   ivts->insn = insn;
1645   ivts->base_var = NULL_RTX;
1646   ivts->step = iv.step;
1647   ivts->n_loc = 1;
1648   ivts->loc[0] = 1;
1649
1650   return ivts;
1651 }
1652
1653 /* Determines which of insns in LOOP can be optimized.
1654    Return a OPT_INFO struct with the relevant hash tables filled
1655    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1656    is undefined for the return value.  */
1657
1658 static struct opt_info *
1659 analyze_insns_in_loop (struct loop *loop)
1660 {
1661   basic_block *body, bb;
1662   unsigned i, num_edges = 0;
1663   struct opt_info *opt_info = xcalloc (1, sizeof (struct opt_info));
1664   rtx insn;
1665   struct iv_to_split *ivts = NULL;
1666   struct var_to_expand *ves = NULL;
1667   PTR *slot1;
1668   PTR *slot2;
1669   edge *edges = get_loop_exit_edges (loop, &num_edges);
1670   bool can_apply = false;
1671
1672   iv_analysis_loop_init (loop);
1673
1674   body = get_loop_body (loop);
1675
1676   if (flag_split_ivs_in_unroller)
1677     opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1678                                             si_info_hash, si_info_eq, free);
1679
1680   /* Record the loop exit bb and loop preheader before the unrolling.  */
1681   if (!loop_preheader_edge (loop)->src)
1682     {
1683       loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1684       opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1685     }
1686   else
1687     opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1688
1689   if (num_edges == 1
1690       && !(edges[0]->flags & EDGE_COMPLEX))
1691     {
1692       opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
1693       can_apply = true;
1694     }
1695
1696   if (flag_variable_expansion_in_unroller
1697       && can_apply)
1698     opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1699                                                       ve_info_hash, ve_info_eq, free);
1700
1701   for (i = 0; i < loop->num_nodes; i++)
1702     {
1703       bb = body[i];
1704       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1705         continue;
1706
1707       FOR_BB_INSNS (bb, insn)
1708       {
1709         if (!INSN_P (insn))
1710           continue;
1711
1712         if (opt_info->insns_to_split)
1713           ivts = analyze_iv_to_split_insn (insn);
1714
1715         if (ivts)
1716           {
1717             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1718             *slot1 = ivts;
1719             continue;
1720           }
1721
1722         if (opt_info->insns_with_var_to_expand)
1723           ves = analyze_insn_to_expand_var (loop, insn);
1724
1725         if (ves)
1726           {
1727             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1728             *slot2 = ves;
1729           }
1730       }
1731     }
1732
1733   free (edges);
1734   free (body);
1735   return opt_info;
1736 }
1737
1738 /* Called just before loop duplication.  Records start of duplicated area
1739    to OPT_INFO.  */
1740
1741 static void
1742 opt_info_start_duplication (struct opt_info *opt_info)
1743 {
1744   if (opt_info)
1745     opt_info->first_new_block = last_basic_block;
1746 }
1747
1748 /* Determine the number of iterations between initialization of the base
1749    variable and the current copy (N_COPY).  N_COPIES is the total number
1750    of newly created copies.  UNROLLING is true if we are unrolling
1751    (not peeling) the loop.  */
1752
1753 static unsigned
1754 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1755 {
1756   if (unrolling)
1757     {
1758       /* If we are unrolling, initialization is done in the original loop
1759          body (number 0).  */
1760       return n_copy;
1761     }
1762   else
1763     {
1764       /* If we are peeling, the copy in that the initialization occurs has
1765          number 1.  The original loop (number 0) is the last.  */
1766       if (n_copy)
1767         return n_copy - 1;
1768       else
1769         return n_copies;
1770     }
1771 }
1772
1773 /* Locate in EXPR the expression corresponding to the location recorded
1774    in IVTS, and return a pointer to the RTX for this location.  */
1775
1776 static rtx *
1777 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1778 {
1779   unsigned i;
1780   rtx *ret = &expr;
1781
1782   for (i = 0; i < ivts->n_loc; i++)
1783     ret = &XEXP (*ret, ivts->loc[i]);
1784
1785   return ret;
1786 }
1787
1788 /* Allocate basic variable for the induction variable chain.  Callback for
1789    htab_traverse.  */
1790
1791 static int
1792 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1793 {
1794   struct iv_to_split *ivts = *slot;
1795   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1796
1797   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1798
1799   return 1;
1800 }
1801
1802 /* Insert initialization of basic variable of IVTS before INSN, taking
1803    the initial value from INSN.  */
1804
1805 static void
1806 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1807 {
1808   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1809   rtx seq;
1810
1811   start_sequence ();
1812   expr = force_operand (expr, ivts->base_var);
1813   if (expr != ivts->base_var)
1814     emit_move_insn (ivts->base_var, expr);
1815   seq = get_insns ();
1816   end_sequence ();
1817
1818   emit_insn_before (seq, insn);
1819 }
1820
1821 /* Replace the use of induction variable described in IVTS in INSN
1822    by base variable + DELTA * step.  */
1823
1824 static void
1825 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1826 {
1827   rtx expr, *loc, seq, incr, var;
1828   enum machine_mode mode = GET_MODE (ivts->base_var);
1829   rtx src, dest, set;
1830
1831   /* Construct base + DELTA * step.  */
1832   if (!delta)
1833     expr = ivts->base_var;
1834   else
1835     {
1836       incr = simplify_gen_binary (MULT, mode,
1837                                   ivts->step, gen_int_mode (delta, mode));
1838       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1839                                   ivts->base_var, incr);
1840     }
1841
1842   /* Figure out where to do the replacement.  */
1843   loc = get_ivts_expr (single_set (insn), ivts);
1844
1845   /* If we can make the replacement right away, we're done.  */
1846   if (validate_change (insn, loc, expr, 0))
1847     return;
1848
1849   /* Otherwise, force EXPR into a register and try again.  */
1850   start_sequence ();
1851   var = gen_reg_rtx (mode);
1852   expr = force_operand (expr, var);
1853   if (expr != var)
1854     emit_move_insn (var, expr);
1855   seq = get_insns ();
1856   end_sequence ();
1857   emit_insn_before (seq, insn);
1858
1859   if (validate_change (insn, loc, var, 0))
1860     return;
1861
1862   /* The last chance.  Try recreating the assignment in insn
1863      completely from scratch.  */
1864   set = single_set (insn);
1865   gcc_assert (set);
1866
1867   start_sequence ();
1868   *loc = var;
1869   src = copy_rtx (SET_SRC (set));
1870   dest = copy_rtx (SET_DEST (set));
1871   src = force_operand (src, dest);
1872   if (src != dest)
1873     emit_move_insn (dest, src);
1874   seq = get_insns ();
1875   end_sequence ();
1876
1877   emit_insn_before (seq, insn);
1878   delete_insn (insn);
1879 }
1880
1881
1882 /* Return one expansion of the accumulator recorded in struct VE.  */
1883
1884 static rtx
1885 get_expansion (struct var_to_expand *ve)
1886 {
1887   rtx reg;
1888
1889   if (ve->reuse_expansion == 0)
1890     reg = ve->reg;
1891   else
1892     reg = VARRAY_RTX (ve->var_expansions,  ve->reuse_expansion - 1);
1893
1894   if (VARRAY_ACTIVE_SIZE (ve->var_expansions) == (unsigned) ve->reuse_expansion)
1895     ve->reuse_expansion = 0;
1896   else
1897     ve->reuse_expansion++;
1898
1899   return reg;
1900 }
1901
1902
1903 /* Given INSN replace the uses of the accumulator recorded in VE
1904    with a new register.  */
1905
1906 static void
1907 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1908 {
1909   rtx new_reg, set;
1910   bool really_new_expansion = false;
1911
1912   set = single_set (insn);
1913   gcc_assert (set);
1914
1915   /* Generate a new register only if the expansion limit has not been
1916      reached.  Else reuse an already existing expansion.  */
1917   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1918     {
1919       really_new_expansion = true;
1920       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1921     }
1922   else
1923     new_reg = get_expansion (ve);
1924
1925   validate_change (insn, &SET_DEST (set), new_reg, 1);
1926   validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1927
1928   if (apply_change_group ())
1929     if (really_new_expansion)
1930       {
1931         VARRAY_PUSH_RTX (ve->var_expansions, new_reg);
1932         ve->expansion_count++;
1933       }
1934 }
1935
1936 /* Initialize the variable expansions in loop preheader.
1937    Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1938    basic block where the initialization of the expansions
1939    should take place.  */
1940
1941 static int
1942 insert_var_expansion_initialization (void **slot, void *place_p)
1943 {
1944   struct var_to_expand *ve = *slot;
1945   basic_block place = (basic_block)place_p;
1946   rtx seq, var, zero_init, insn;
1947   unsigned i;
1948
1949   if (VARRAY_ACTIVE_SIZE (ve->var_expansions) == 0)
1950     return 1;
1951
1952   start_sequence ();
1953   if (ve->op == PLUS || ve->op == MINUS)
1954     for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
1955       {
1956         var = VARRAY_RTX (ve->var_expansions, i);
1957         zero_init =  CONST0_RTX (GET_MODE (var));
1958         emit_move_insn (var, zero_init);
1959       }
1960   else if (ve->op == MULT)
1961     for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
1962       {
1963         var = VARRAY_RTX (ve->var_expansions, i);
1964         zero_init =  CONST1_RTX (GET_MODE (var));
1965         emit_move_insn (var, zero_init);
1966       }
1967
1968   seq = get_insns ();
1969   end_sequence ();
1970
1971   insn = BB_HEAD (place);
1972   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1973     insn = NEXT_INSN (insn);
1974
1975   emit_insn_after (seq, insn);
1976   /* Continue traversing the hash table.  */
1977   return 1;
1978 }
1979
1980 /*  Combine the variable expansions at the loop exit.
1981     Callbacks for htab_traverse.  PLACE_P is the loop exit
1982     basic block where the summation of the expansions should
1983     take place.  */
1984
1985 static int
1986 combine_var_copies_in_loop_exit (void **slot, void *place_p)
1987 {
1988   struct var_to_expand *ve = *slot;
1989   basic_block place = (basic_block)place_p;
1990   rtx sum = ve->reg;
1991   rtx expr, seq, var, insn;
1992   unsigned i;
1993
1994   if (VARRAY_ACTIVE_SIZE (ve->var_expansions) == 0)
1995     return 1;
1996
1997   start_sequence ();
1998   if (ve->op == PLUS || ve->op == MINUS)
1999     for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
2000       {
2001         var = VARRAY_RTX (ve->var_expansions, i);
2002         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2003                                    var, sum);
2004       }
2005   else if (ve->op == MULT)
2006     for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
2007       {
2008         var = VARRAY_RTX (ve->var_expansions, i);
2009         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2010                                    var, sum);
2011       }
2012
2013   expr = force_operand (sum, ve->reg);
2014   if (expr != ve->reg)
2015     emit_move_insn (ve->reg, expr);
2016   seq = get_insns ();
2017   end_sequence ();
2018
2019   insn = BB_HEAD (place);
2020   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2021     insn = NEXT_INSN (insn);
2022
2023   emit_insn_after (seq, insn);
2024
2025   /* Continue traversing the hash table.  */
2026   return 1;
2027 }
2028
2029 /* Apply loop optimizations in loop copies using the
2030    data which gathered during the unrolling.  Structure
2031    OPT_INFO record that data.
2032
2033    UNROLLING is true if we unrolled (not peeled) the loop.
2034    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2035    the loop (as it should happen in complete unrolling, but not in ordinary
2036    peeling of the loop).  */
2037
2038 static void
2039 apply_opt_in_copies (struct opt_info *opt_info,
2040                      unsigned n_copies, bool unrolling,
2041                      bool rewrite_original_loop)
2042 {
2043   unsigned i, delta;
2044   basic_block bb, orig_bb;
2045   rtx insn, orig_insn, next;
2046   struct iv_to_split ivts_templ, *ivts;
2047   struct var_to_expand ve_templ, *ves;
2048
2049   /* Sanity check -- we need to put initialization in the original loop
2050      body.  */
2051   gcc_assert (!unrolling || rewrite_original_loop);
2052
2053   /* Allocate the basic variables (i0).  */
2054   if (opt_info->insns_to_split)
2055     htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2056
2057   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2058     {
2059       bb = BASIC_BLOCK (i);
2060       orig_bb = bb->rbi->original;
2061
2062       delta = determine_split_iv_delta (bb->rbi->copy_number, n_copies,
2063                                         unrolling);
2064       orig_insn = BB_HEAD (orig_bb);
2065       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2066         {
2067           next = NEXT_INSN (insn);
2068           if (!INSN_P (insn))
2069             continue;
2070
2071           while (!INSN_P (orig_insn))
2072             orig_insn = NEXT_INSN (orig_insn);
2073
2074           ivts_templ.insn = orig_insn;
2075           ve_templ.insn = orig_insn;
2076
2077           /* Apply splitting iv optimization.  */
2078           if (opt_info->insns_to_split)
2079             {
2080               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2081
2082               if (ivts)
2083                 {
2084 #ifdef ENABLE_CHECKING
2085                   gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
2086 #endif
2087
2088                   if (!delta)
2089                     insert_base_initialization (ivts, insn);
2090                   split_iv (ivts, insn, delta);
2091                 }
2092             }
2093           /* Apply variable expansion optimization.  */
2094           if (unrolling && opt_info->insns_with_var_to_expand)
2095             {
2096               ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2097               if (ves)
2098                 {
2099 #ifdef ENABLE_CHECKING
2100                   gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
2101 #endif
2102                   expand_var_during_unrolling (ves, insn);
2103                 }
2104             }
2105           orig_insn = NEXT_INSN (orig_insn);
2106         }
2107     }
2108
2109   if (!rewrite_original_loop)
2110     return;
2111
2112   /* Initialize the variable expansions in the loop preheader
2113      and take care of combining them at the loop exit.  */
2114   if (opt_info->insns_with_var_to_expand)
2115     {
2116       htab_traverse (opt_info->insns_with_var_to_expand,
2117                      insert_var_expansion_initialization,
2118                      opt_info->loop_preheader);
2119       htab_traverse (opt_info->insns_with_var_to_expand,
2120                      combine_var_copies_in_loop_exit,
2121                      opt_info->loop_exit);
2122     }
2123
2124   /* Rewrite also the original loop body.  Find them as originals of the blocks
2125      in the last copied iteration, i.e. those that have
2126      bb->rbi->original->copy == bb.  */
2127   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2128     {
2129       bb = BASIC_BLOCK (i);
2130       orig_bb = bb->rbi->original;
2131       if (orig_bb->rbi->copy != bb)
2132         continue;
2133
2134       delta = determine_split_iv_delta (0, n_copies, unrolling);
2135       for (orig_insn = BB_HEAD (orig_bb);
2136            orig_insn != NEXT_INSN (BB_END (bb));
2137            orig_insn = next)
2138         {
2139           next = NEXT_INSN (orig_insn);
2140
2141           if (!INSN_P (orig_insn))
2142             continue;
2143
2144           ivts_templ.insn = orig_insn;
2145           if (opt_info->insns_to_split)
2146             {
2147               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2148               if (ivts)
2149                 {
2150                   if (!delta)
2151                     insert_base_initialization (ivts, orig_insn);
2152                   split_iv (ivts, orig_insn, delta);
2153                   continue;
2154                 }
2155             }
2156
2157         }
2158     }
2159 }
2160
2161 /*  Release the data structures used for the variable expansion
2162     optimization.  Callbacks for htab_traverse.  */
2163
2164 static int
2165 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2166 {
2167   struct var_to_expand *ve = *slot;
2168
2169   VARRAY_CLEAR (ve->var_expansions);
2170
2171   /* Continue traversing the hash table.  */
2172   return 1;
2173 }
2174
2175 /* Release OPT_INFO.  */
2176
2177 static void
2178 free_opt_info (struct opt_info *opt_info)
2179 {
2180   if (opt_info->insns_to_split)
2181     htab_delete (opt_info->insns_to_split);
2182   if (opt_info->insns_with_var_to_expand)
2183     {
2184       htab_traverse (opt_info->insns_with_var_to_expand,
2185                      release_var_copies, NULL);
2186       htab_delete (opt_info->insns_with_var_to_expand);
2187     }
2188   free (opt_info);
2189 }