gcc/loop-unroll.c

   1 /* Loop unrolling and peeling.
   2    Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 2, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "basic-block.h"
  28 #include "cfgloop.h"
  29 #include "cfglayout.h"
  30 #include "params.h"
  31 #include "output.h"
  32 #include "expr.h"
  33 #include "hashtab.h"
  34 #include "recog.h"
  35 #include "varray.h"
  36
  37 /* This pass performs loop unrolling and peeling.  We only perform these
  38    optimizations on innermost loops (with single exception) because
  39    the impact on performance is greatest here, and we want to avoid
  40    unnecessary code size growth.  The gain is caused by greater sequentiality
  41    of code, better code to optimize for further passes and in some cases
  42    by fewer testings of exit conditions.  The main problem is code growth,
  43    that impacts performance negatively due to effect of caches.
  44
  45    What we do:
  46
  47    -- complete peeling of once-rolling loops; this is the above mentioned
  48       exception, as this causes loop to be cancelled completely and
  49       does not cause code growth
  50    -- complete peeling of loops that roll (small) constant times.
  51    -- simple peeling of first iterations of loops that do not roll much
  52       (according to profile feedback)
  53    -- unrolling of loops that roll constant times; this is almost always
  54       win, as we get rid of exit condition tests.
  55    -- unrolling of loops that roll number of times that we can compute
  56       in runtime; we also get rid of exit condition tests here, but there
  57       is the extra expense for calculating the number of iterations
  58    -- simple unrolling of remaining loops; this is performed only if we
  59       are asked to, as the gain is questionable in this case and often
  60       it may even slow down the code
  61    For more detailed descriptions of each of those, see comments at
  62    appropriate function below.
  63
  64    There is a lot of parameters (defined and described in params.def) that
  65    control how much we unroll/peel.
  66
  67    ??? A great problem is that we don't have a good way how to determine
  68    how many times we should unroll the loop; the experiments I have made
  69    showed that this choice may affect performance in order of several %.
  70    */
  71
  72 /* Information about induction variables to split.  */
  73
  74 struct iv_to_split
  75 {
  76   rtx insn;             /* The insn in that the induction variable occurs.  */
  77   rtx base_var;         /* The variable on that the values in the further
  78                            iterations are based.  */
  79   rtx step;             /* Step of the induction variable.  */
  80   unsigned n_loc;
  81   unsigned loc[3];      /* Location where the definition of the induction
  82                            variable occurs in the insn.  For example if
  83                            N_LOC is 2, the expression is located at
  84                            XEXP (XEXP (single_set, loc[0]), loc[1]).  */
  85 };
  86
  87 /* Information about accumulators to expand.  */
  88
  89 struct var_to_expand
  90 {
  91   rtx insn;                        /* The insn in that the variable expansion occurs.  */
  92   rtx reg;                         /* The accumulator which is expanded.  */
  93   varray_type var_expansions;      /* The copies of the accumulator which is expanded.  */
  94   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  95                                       or multiplication.  */
  96   int expansion_count;             /* Count the number of expansions generated so far.  */
  97   int reuse_expansion;             /* The expansion we intend to reuse to expand
  98                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
  99                                       the original accumulator.  Else use
 100                                       var_expansions[REUSE_EXPANSION - 1].  */
 101 };
 102
 103 /* Information about optimization applied in
 104    the unrolled loop.  */
 105
 106 struct opt_info
 107 {
 108   htab_t insns_to_split;           /* A hashtable of insns to split.  */
 109   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
 110                                       to expand.  */
 111   unsigned first_new_block;        /* The first basic block that was
 112                                       duplicated.  */
 113   basic_block loop_exit;           /* The loop exit basic block.  */
 114   basic_block loop_preheader;      /* The loop preheader basic block.  */
 115 };
 116
 117 static void decide_unrolling_and_peeling (struct loops *, int);
 118 static void peel_loops_completely (struct loops *, int);
 119 static void decide_peel_simple (struct loop *, int);
 120 static void decide_peel_once_rolling (struct loop *, int);
 121 static void decide_peel_completely (struct loop *, int);
 122 static void decide_unroll_stupid (struct loop *, int);
 123 static void decide_unroll_constant_iterations (struct loop *, int);
 124 static void decide_unroll_runtime_iterations (struct loop *, int);
 125 static void peel_loop_simple (struct loops *, struct loop *);
 126 static void peel_loop_completely (struct loops *, struct loop *);
 127 static void unroll_loop_stupid (struct loops *, struct loop *);
 128 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
 129 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
 130 static struct opt_info *analyze_insns_in_loop (struct loop *);
 131 static void opt_info_start_duplication (struct opt_info *);
 132 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 133 static void free_opt_info (struct opt_info *);
 134 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
 135 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
 136 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
 137 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
 138 static int insert_var_expansion_initialization (void **, void *);
 139 static int combine_var_copies_in_loop_exit (void **, void *);
 140 static int release_var_copies (void **, void *);
 141 static rtx get_expansion (struct var_to_expand *);
 142
 143 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 144 void
 145 unroll_and_peel_loops (struct loops *loops, int flags)
 146 {
 147   struct loop *loop, *next;
 148   bool check;
 149
 150   /* First perform complete loop peeling (it is almost surely a win,
 151      and affects parameters for further decision a lot).  */
 152   peel_loops_completely (loops, flags);
 153
 154   /* Now decide rest of unrolling and peeling.  */
 155   decide_unrolling_and_peeling (loops, flags);
 156
 157   loop = loops->tree_root;
 158   while (loop->inner)
 159     loop = loop->inner;
 160
 161   /* Scan the loops, inner ones first.  */
 162   while (loop != loops->tree_root)
 163     {
 164       if (loop->next)
 165         {
 166           next = loop->next;
 167           while (next->inner)
 168             next = next->inner;
 169         }
 170       else
 171         next = loop->outer;
 172
 173       check = true;
 174       /* And perform the appropriate transformations.  */
 175       switch (loop->lpt_decision.decision)
 176         {
 177         case LPT_PEEL_COMPLETELY:
 178           /* Already done.  */
 179           gcc_unreachable ();
 180         case LPT_PEEL_SIMPLE:
 181           peel_loop_simple (loops, loop);
 182           break;
 183         case LPT_UNROLL_CONSTANT:
 184           unroll_loop_constant_iterations (loops, loop);
 185           break;
 186         case LPT_UNROLL_RUNTIME:
 187           unroll_loop_runtime_iterations (loops, loop);
 188           break;
 189         case LPT_UNROLL_STUPID:
 190           unroll_loop_stupid (loops, loop);
 191           break;
 192         case LPT_NONE:
 193           check = false;
 194           break;
 195         default:
 196           gcc_unreachable ();
 197         }
 198       if (check)
 199         {
 200 #ifdef ENABLE_CHECKING
 201           verify_dominators (CDI_DOMINATORS);
 202           verify_loop_structure (loops);
 203 #endif
 204         }
 205       loop = next;
 206     }
 207
 208   iv_analysis_done ();
 209 }
 210
 211 /* Check whether exit of the LOOP is at the end of loop body.  */
 212
 213 static bool
 214 loop_exit_at_end_p (struct loop *loop)
 215 {
 216   struct niter_desc *desc = get_simple_loop_desc (loop);
 217   rtx insn;
 218
 219   if (desc->in_edge->dest != loop->latch)
 220     return false;
 221
 222   /* Check that the latch is empty.  */
 223   FOR_BB_INSNS (loop->latch, insn)
 224     {
 225       if (INSN_P (insn))
 226         return false;
 227     }
 228
 229   return true;
 230 }
 231
 232 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
 233 static void
 234 peel_loops_completely (struct loops *loops, int flags)
 235 {
 236   struct loop *loop, *next;
 237
 238   loop = loops->tree_root;
 239   while (loop->inner)
 240     loop = loop->inner;
 241
 242   while (loop != loops->tree_root)
 243     {
 244       if (loop->next)
 245         {
 246           next = loop->next;
 247           while (next->inner)
 248             next = next->inner;
 249         }
 250       else
 251         next = loop->outer;
 252
 253       loop->lpt_decision.decision = LPT_NONE;
 254
 255       if (dump_file)
 256         fprintf (dump_file,
 257                  "\n;; *** Considering loop %d for complete peeling ***\n",
 258                  loop->num);
 259
 260       loop->ninsns = num_loop_insns (loop);
 261
 262       decide_peel_once_rolling (loop, flags);
 263       if (loop->lpt_decision.decision == LPT_NONE)
 264         decide_peel_completely (loop, flags);
 265
 266       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
 267         {
 268           peel_loop_completely (loops, loop);
 269 #ifdef ENABLE_CHECKING
 270           verify_dominators (CDI_DOMINATORS);
 271           verify_loop_structure (loops);
 272 #endif
 273         }
 274       loop = next;
 275     }
 276 }
 277
 278 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
 279 static void
 280 decide_unrolling_and_peeling (struct loops *loops, int flags)
 281 {
 282   struct loop *loop = loops->tree_root, *next;
 283
 284   while (loop->inner)
 285     loop = loop->inner;
 286
 287   /* Scan the loops, inner ones first.  */
 288   while (loop != loops->tree_root)
 289     {
 290       if (loop->next)
 291         {
 292           next = loop->next;
 293           while (next->inner)
 294             next = next->inner;
 295         }
 296       else
 297         next = loop->outer;
 298
 299       loop->lpt_decision.decision = LPT_NONE;
 300
 301       if (dump_file)
 302         fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
 303
 304       /* Do not peel cold areas.  */
 305       if (!maybe_hot_bb_p (loop->header))
 306         {
 307           if (dump_file)
 308             fprintf (dump_file, ";; Not considering loop, cold area\n");
 309           loop = next;
 310           continue;
 311         }
 312
 313       /* Can the loop be manipulated?  */
 314       if (!can_duplicate_loop_p (loop))
 315         {
 316           if (dump_file)
 317             fprintf (dump_file,
 318                      ";; Not considering loop, cannot duplicate\n");
 319           loop = next;
 320           continue;
 321         }
 322
 323       /* Skip non-innermost loops.  */
 324       if (loop->inner)
 325         {
 326           if (dump_file)
 327             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 328           loop = next;
 329           continue;
 330         }
 331
 332       loop->ninsns = num_loop_insns (loop);
 333       loop->av_ninsns = average_num_loop_insns (loop);
 334
 335       /* Try transformations one by one in decreasing order of
 336          priority.  */
 337
 338       decide_unroll_constant_iterations (loop, flags);
 339       if (loop->lpt_decision.decision == LPT_NONE)
 340         decide_unroll_runtime_iterations (loop, flags);
 341       if (loop->lpt_decision.decision == LPT_NONE)
 342         decide_unroll_stupid (loop, flags);
 343       if (loop->lpt_decision.decision == LPT_NONE)
 344         decide_peel_simple (loop, flags);
 345
 346       loop = next;
 347     }
 348 }
 349
 350 /* Decide whether the LOOP is once rolling and suitable for complete
 351    peeling.  */
 352 static void
 353 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 354 {
 355   struct niter_desc *desc;
 356
 357   if (dump_file)
 358     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 359
 360   /* Is the loop small enough?  */
 361   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
 362     {
 363       if (dump_file)
 364         fprintf (dump_file, ";; Not considering loop, is too big\n");
 365       return;
 366     }
 367
 368   /* Check for simple loops.  */
 369   desc = get_simple_loop_desc (loop);
 370
 371   /* Check number of iterations.  */
 372   if (!desc->simple_p
 373       || desc->assumptions
 374       || !desc->const_iter
 375       || desc->niter != 0)
 376     {
 377       if (dump_file)
 378         fprintf (dump_file,
 379                  ";; Unable to prove that the loop rolls exactly once\n");
 380       return;
 381     }
 382
 383   /* Success.  */
 384   if (dump_file)
 385     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
 386   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 387 }
 388
 389 /* Decide whether the LOOP is suitable for complete peeling.  */
 390 static void
 391 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 392 {
 393   unsigned npeel;
 394   struct niter_desc *desc;
 395
 396   if (dump_file)
 397     fprintf (dump_file, "\n;; Considering peeling completely\n");
 398
 399   /* Skip non-innermost loops.  */
 400   if (loop->inner)
 401     {
 402       if (dump_file)
 403         fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 404       return;
 405     }
 406
 407   /* Do not peel cold areas.  */
 408   if (!maybe_hot_bb_p (loop->header))
 409     {
 410       if (dump_file)
 411         fprintf (dump_file, ";; Not considering loop, cold area\n");
 412       return;
 413     }
 414
 415   /* Can the loop be manipulated?  */
 416   if (!can_duplicate_loop_p (loop))
 417     {
 418       if (dump_file)
 419         fprintf (dump_file,
 420                  ";; Not considering loop, cannot duplicate\n");
 421       return;
 422     }
 423
 424   /* npeel = number of iterations to peel.  */
 425   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
 426   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 427     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 428
 429   /* Is the loop small enough?  */
 430   if (!npeel)
 431     {
 432       if (dump_file)
 433         fprintf (dump_file, ";; Not considering loop, is too big\n");
 434       return;
 435     }
 436
 437   /* Check for simple loops.  */
 438   desc = get_simple_loop_desc (loop);
 439
 440   /* Check number of iterations.  */
 441   if (!desc->simple_p
 442       || desc->assumptions
 443       || !desc->const_iter)
 444     {
 445       if (dump_file)
 446         fprintf (dump_file,
 447                  ";; Unable to prove that the loop iterates constant times\n");
 448       return;
 449     }
 450
 451   if (desc->niter > npeel - 1)
 452     {
 453       if (dump_file)
 454         {
 455           fprintf (dump_file,
 456                    ";; Not peeling loop completely, rolls too much (");
 457           fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
 458           fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
 459         }
 460       return;
 461     }
 462
 463   /* Success.  */
 464   if (dump_file)
 465     fprintf (dump_file, ";; Decided to peel loop completely\n");
 466   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 467 }
 468
 469 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
 470    completely.  The transformation done:
 471
 472    for (i = 0; i < 4; i++)
 473      body;
 474
 475    ==>
 476
 477    i = 0;
 478    body; i++;
 479    body; i++;
 480    body; i++;
 481    body; i++;
 482    */
 483 static void
 484 peel_loop_completely (struct loops *loops, struct loop *loop)
 485 {
 486   sbitmap wont_exit;
 487   unsigned HOST_WIDE_INT npeel;
 488   unsigned n_remove_edges, i;
 489   edge *remove_edges, ein;
 490   struct niter_desc *desc = get_simple_loop_desc (loop);
 491   struct opt_info *opt_info = NULL;
 492
 493   npeel = desc->niter;
 494
 495   if (npeel)
 496     {
 497       wont_exit = sbitmap_alloc (npeel + 1);
 498       sbitmap_ones (wont_exit);
 499       RESET_BIT (wont_exit, 0);
 500       if (desc->noloop_assumptions)
 501         RESET_BIT (wont_exit, 1);
 502
 503       remove_edges = xcalloc (npeel, sizeof (edge));
 504       n_remove_edges = 0;
 505
 506       if (flag_split_ivs_in_unroller)
 507         opt_info = analyze_insns_in_loop (loop);
 508
 509       opt_info_start_duplication (opt_info);
 510       if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 511                 loops, npeel,
 512                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
 513                 DLTHE_FLAG_UPDATE_FREQ))
 514         abort ();
 515
 516       free (wont_exit);
 517
 518       if (opt_info)
 519         {
 520           apply_opt_in_copies (opt_info, npeel, false, true);
 521           free_opt_info (opt_info);
 522         }
 523
 524       /* Remove the exit edges.  */
 525       for (i = 0; i < n_remove_edges; i++)
 526         remove_path (loops, remove_edges[i]);
 527       free (remove_edges);
 528     }
 529
 530   ein = desc->in_edge;
 531   free_simple_loop_desc (loop);
 532
 533   /* Now remove the unreachable part of the last iteration and cancel
 534      the loop.  */
 535   remove_path (loops, ein);
 536
 537   if (dump_file)
 538     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
 539 }
 540
 541 /* Decide whether to unroll LOOP iterating constant number of times
 542    and how much.  */
 543
 544 static void
 545 decide_unroll_constant_iterations (struct loop *loop, int flags)
 546 {
 547   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 548   struct niter_desc *desc;
 549
 550   if (!(flags & UAP_UNROLL))
 551     {
 552       /* We were not asked to, just return back silently.  */
 553       return;
 554     }
 555
 556   if (dump_file)
 557     fprintf (dump_file,
 558              "\n;; Considering unrolling loop with constant "
 559              "number of iterations\n");
 560
 561   /* nunroll = total number of copies of the original loop body in
 562      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 563   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 564   nunroll_by_av
 565     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 566   if (nunroll > nunroll_by_av)
 567     nunroll = nunroll_by_av;
 568   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 569     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 570
 571   /* Skip big loops.  */
 572   if (nunroll <= 1)
 573     {
 574       if (dump_file)
 575         fprintf (dump_file, ";; Not considering loop, is too big\n");
 576       return;
 577     }
 578
 579   /* Check for simple loops.  */
 580   desc = get_simple_loop_desc (loop);
 581
 582   /* Check number of iterations.  */
 583   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 584     {
 585       if (dump_file)
 586         fprintf (dump_file,
 587                  ";; Unable to prove that the loop iterates constant times\n");
 588       return;
 589     }
 590
 591   /* Check whether the loop rolls enough to consider.  */
 592   if (desc->niter < 2 * nunroll)
 593     {
 594       if (dump_file)
 595         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 596       return;
 597     }
 598
 599   /* Success; now compute number of iterations to unroll.  We alter
 600      nunroll so that as few as possible copies of loop body are
 601      necessary, while still not decreasing the number of unrollings
 602      too much (at most by 1).  */
 603   best_copies = 2 * nunroll + 10;
 604
 605   i = 2 * nunroll + 2;
 606   if (i - 1 >= desc->niter)
 607     i = desc->niter - 2;
 608
 609   for (; i >= nunroll - 1; i--)
 610     {
 611       unsigned exit_mod = desc->niter % (i + 1);
 612
 613       if (!loop_exit_at_end_p (loop))
 614         n_copies = exit_mod + i + 1;
 615       else if (exit_mod != (unsigned) i
 616                || desc->noloop_assumptions != NULL_RTX)
 617         n_copies = exit_mod + i + 2;
 618       else
 619         n_copies = i + 1;
 620
 621       if (n_copies < best_copies)
 622         {
 623           best_copies = n_copies;
 624           best_unroll = i;
 625         }
 626     }
 627
 628   if (dump_file)
 629     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
 630              best_unroll + 1, best_copies, nunroll);
 631
 632   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 633   loop->lpt_decision.times = best_unroll;
 634
 635   if (dump_file)
 636     fprintf (dump_file,
 637              ";; Decided to unroll the constant times rolling loop, %d times.\n",
 638              loop->lpt_decision.times);
 639 }
 640
 641 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
 642    times.  The transformation does this:
 643
 644    for (i = 0; i < 102; i++)
 645      body;
 646
 647    ==>
 648
 649    i = 0;
 650    body; i++;
 651    body; i++;
 652    while (i < 102)
 653      {
 654        body; i++;
 655        body; i++;
 656        body; i++;
 657        body; i++;
 658      }
 659   */
 660 static void
 661 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
 662 {
 663   unsigned HOST_WIDE_INT niter;
 664   unsigned exit_mod;
 665   sbitmap wont_exit;
 666   unsigned n_remove_edges, i;
 667   edge *remove_edges;
 668   unsigned max_unroll = loop->lpt_decision.times;
 669   struct niter_desc *desc = get_simple_loop_desc (loop);
 670   bool exit_at_end = loop_exit_at_end_p (loop);
 671   struct opt_info *opt_info = NULL;
 672
 673   niter = desc->niter;
 674
 675   /* Should not get here (such loop should be peeled instead).  */
 676   gcc_assert (niter > max_unroll + 1);
 677
 678   exit_mod = niter % (max_unroll + 1);
 679
 680   wont_exit = sbitmap_alloc (max_unroll + 1);
 681   sbitmap_ones (wont_exit);
 682
 683   remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
 684   n_remove_edges = 0;
 685   if (flag_split_ivs_in_unroller
 686       || flag_variable_expansion_in_unroller)
 687     opt_info = analyze_insns_in_loop (loop);
 688
 689   if (!exit_at_end)
 690     {
 691       /* The exit is not at the end of the loop; leave exit test
 692          in the first copy, so that the loops that start with test
 693          of exit condition have continuous body after unrolling.  */
 694
 695       if (dump_file)
 696         fprintf (dump_file, ";; Condition on beginning of loop.\n");
 697
 698       /* Peel exit_mod iterations.  */
 699       RESET_BIT (wont_exit, 0);
 700       if (desc->noloop_assumptions)
 701         RESET_BIT (wont_exit, 1);
 702
 703       if (exit_mod)
 704         {
 705           opt_info_start_duplication (opt_info);
 706           if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 707                                               loops, exit_mod,
 708                                               wont_exit, desc->out_edge,
 709                                               remove_edges, &n_remove_edges,
 710                                               DLTHE_FLAG_UPDATE_FREQ))
 711             abort ();
 712
 713           if (opt_info && exit_mod > 1)
 714             apply_opt_in_copies (opt_info, exit_mod, false, false);
 715
 716           desc->noloop_assumptions = NULL_RTX;
 717           desc->niter -= exit_mod;
 718           desc->niter_max -= exit_mod;
 719         }
 720
 721       SET_BIT (wont_exit, 1);
 722     }
 723   else
 724     {
 725       /* Leave exit test in last copy, for the same reason as above if
 726          the loop tests the condition at the end of loop body.  */
 727
 728       if (dump_file)
 729         fprintf (dump_file, ";; Condition on end of loop.\n");
 730
 731       /* We know that niter >= max_unroll + 2; so we do not need to care of
 732          case when we would exit before reaching the loop.  So just peel
 733          exit_mod + 1 iterations.  */
 734       if (exit_mod != max_unroll
 735           || desc->noloop_assumptions)
 736         {
 737           RESET_BIT (wont_exit, 0);
 738           if (desc->noloop_assumptions)
 739             RESET_BIT (wont_exit, 1);
 740
 741           opt_info_start_duplication (opt_info);
 742           if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 743                 loops, exit_mod + 1,
 744                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
 745                 DLTHE_FLAG_UPDATE_FREQ))
 746             abort ();
 747
 748           if (opt_info && exit_mod > 0)
 749             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 750
 751           desc->niter -= exit_mod + 1;
 752           desc->niter_max -= exit_mod + 1;
 753           desc->noloop_assumptions = NULL_RTX;
 754
 755           SET_BIT (wont_exit, 0);
 756           SET_BIT (wont_exit, 1);
 757         }
 758
 759       RESET_BIT (wont_exit, max_unroll);
 760     }
 761
 762   /* Now unroll the loop.  */
 763
 764   opt_info_start_duplication (opt_info);
 765   if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 766                 loops, max_unroll,
 767                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
 768                 DLTHE_FLAG_UPDATE_FREQ))
 769     abort ();
 770
 771   if (opt_info)
 772     {
 773       apply_opt_in_copies (opt_info, max_unroll, true, true);
 774       free_opt_info (opt_info);
 775     }
 776
 777   free (wont_exit);
 778
 779   if (exit_at_end)
 780     {
 781       basic_block exit_block = desc->in_edge->src->rbi->copy;
 782       /* Find a new in and out edge; they are in the last copy we have made.  */
 783
 784       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 785         {
 786           desc->out_edge = EDGE_SUCC (exit_block, 0);
 787           desc->in_edge = EDGE_SUCC (exit_block, 1);
 788         }
 789       else
 790         {
 791           desc->out_edge = EDGE_SUCC (exit_block, 1);
 792           desc->in_edge = EDGE_SUCC (exit_block, 0);
 793         }
 794     }
 795
 796   desc->niter /= max_unroll + 1;
 797   desc->niter_max /= max_unroll + 1;
 798   desc->niter_expr = GEN_INT (desc->niter);
 799
 800   /* Remove the edges.  */
 801   for (i = 0; i < n_remove_edges; i++)
 802     remove_path (loops, remove_edges[i]);
 803   free (remove_edges);
 804
 805   if (dump_file)
 806     fprintf (dump_file,
 807              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 808              max_unroll, num_loop_insns (loop));
 809 }
 810
 811 /* Decide whether to unroll LOOP iterating runtime computable number of times
 812    and how much.  */
 813 static void
 814 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 815 {
 816   unsigned nunroll, nunroll_by_av, i;
 817   struct niter_desc *desc;
 818
 819   if (!(flags & UAP_UNROLL))
 820     {
 821       /* We were not asked to, just return back silently.  */
 822       return;
 823     }
 824
 825   if (dump_file)
 826     fprintf (dump_file,
 827              "\n;; Considering unrolling loop with runtime "
 828              "computable number of iterations\n");
 829
 830   /* nunroll = total number of copies of the original loop body in
 831      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 832   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 833   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 834   if (nunroll > nunroll_by_av)
 835     nunroll = nunroll_by_av;
 836   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 837     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 838
 839   /* Skip big loops.  */
 840   if (nunroll <= 1)
 841     {
 842       if (dump_file)
 843         fprintf (dump_file, ";; Not considering loop, is too big\n");
 844       return;
 845     }
 846
 847   /* Check for simple loops.  */
 848   desc = get_simple_loop_desc (loop);
 849
 850   /* Check simpleness.  */
 851   if (!desc->simple_p || desc->assumptions)
 852     {
 853       if (dump_file)
 854         fprintf (dump_file,
 855                  ";; Unable to prove that the number of iterations "
 856                  "can be counted in runtime\n");
 857       return;
 858     }
 859
 860   if (desc->const_iter)
 861     {
 862       if (dump_file)
 863         fprintf (dump_file, ";; Loop iterates constant times\n");
 864       return;
 865     }
 866
 867   /* If we have profile feedback, check whether the loop rolls.  */
 868   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
 869     {
 870       if (dump_file)
 871         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 872       return;
 873     }
 874
 875   /* Success; now force nunroll to be power of 2, as we are unable to
 876      cope with overflows in computation of number of iterations.  */
 877   for (i = 1; 2 * i <= nunroll; i *= 2)
 878     continue;
 879
 880   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 881   loop->lpt_decision.times = i - 1;
 882
 883   if (dump_file)
 884     fprintf (dump_file,
 885              ";; Decided to unroll the runtime computable "
 886              "times rolling loop, %d times.\n",
 887              loop->lpt_decision.times);
 888 }
 889
 890 /* Unroll LOOP for that we are able to count number of iterations in runtime
 891    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
 892    extra care for case n < 0):
 893
 894    for (i = 0; i < n; i++)
 895      body;
 896
 897    ==>
 898
 899    i = 0;
 900    mod = n % 4;
 901
 902    switch (mod)
 903      {
 904        case 3:
 905          body; i++;
 906        case 2:
 907          body; i++;
 908        case 1:
 909          body; i++;
 910        case 0: ;
 911      }
 912
 913    while (i < n)
 914      {
 915        body; i++;
 916        body; i++;
 917        body; i++;
 918        body; i++;
 919      }
 920    */
 921 static void
 922 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
 923 {
 924   rtx old_niter, niter, init_code, branch_code, tmp;
 925   unsigned i, j, p;
 926   basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
 927   unsigned n_dom_bbs;
 928   sbitmap wont_exit;
 929   int may_exit_copy;
 930   unsigned n_peel, n_remove_edges;
 931   edge *remove_edges, e;
 932   bool extra_zero_check, last_may_exit;
 933   unsigned max_unroll = loop->lpt_decision.times;
 934   struct niter_desc *desc = get_simple_loop_desc (loop);
 935   bool exit_at_end = loop_exit_at_end_p (loop);
 936   struct opt_info *opt_info = NULL;
 937
 938   if (flag_split_ivs_in_unroller
 939       || flag_variable_expansion_in_unroller)
 940     opt_info = analyze_insns_in_loop (loop);
 941
 942   /* Remember blocks whose dominators will have to be updated.  */
 943   dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
 944   n_dom_bbs = 0;
 945
 946   body = get_loop_body (loop);
 947   for (i = 0; i < loop->num_nodes; i++)
 948     {
 949       unsigned nldom;
 950       basic_block *ldom;
 951
 952       nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
 953       for (j = 0; j < nldom; j++)
 954         if (!flow_bb_inside_loop_p (loop, ldom[j]))
 955           dom_bbs[n_dom_bbs++] = ldom[j];
 956
 957       free (ldom);
 958     }
 959   free (body);
 960
 961   if (!exit_at_end)
 962     {
 963       /* Leave exit in first copy (for explanation why see comment in
 964          unroll_loop_constant_iterations).  */
 965       may_exit_copy = 0;
 966       n_peel = max_unroll - 1;
 967       extra_zero_check = true;
 968       last_may_exit = false;
 969     }
 970   else
 971     {
 972       /* Leave exit in last copy (for explanation why see comment in
 973          unroll_loop_constant_iterations).  */
 974       may_exit_copy = max_unroll;
 975       n_peel = max_unroll;
 976       extra_zero_check = false;
 977       last_may_exit = true;
 978     }
 979
 980   /* Get expression for number of iterations.  */
 981   start_sequence ();
 982   old_niter = niter = gen_reg_rtx (desc->mode);
 983   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 984   if (tmp != niter)
 985     emit_move_insn (niter, tmp);
 986
 987   /* Count modulo by ANDing it with max_unroll; we use the fact that
 988      the number of unrollings is a power of two, and thus this is correct
 989      even if there is overflow in the computation.  */
 990   niter = expand_simple_binop (desc->mode, AND,
 991                                niter,
 992                                GEN_INT (max_unroll),
 993                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 994
 995   init_code = get_insns ();
 996   end_sequence ();
 997
 998   /* Precondition the loop.  */
 999   loop_split_edge_with (loop_preheader_edge (loop), init_code);
1000
1001   remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
1002   n_remove_edges = 0;
1003
1004   wont_exit = sbitmap_alloc (max_unroll + 2);
1005
1006   /* Peel the first copy of loop body (almost always we must leave exit test
1007      here; the only exception is when we have extra zero check and the number
1008      of iterations is reliable.  Also record the place of (possible) extra
1009      zero check.  */
1010   sbitmap_zero (wont_exit);
1011   if (extra_zero_check
1012       && !desc->noloop_assumptions)
1013     SET_BIT (wont_exit, 1);
1014   ezc_swtch = loop_preheader_edge (loop)->src;
1015   if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1016                 loops, 1,
1017                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
1018                 DLTHE_FLAG_UPDATE_FREQ))
1019     abort ();
1020
1021   /* Record the place where switch will be built for preconditioning.  */
1022   swtch = loop_split_edge_with (loop_preheader_edge (loop),
1023                                 NULL_RTX);
1024
1025   for (i = 0; i < n_peel; i++)
1026     {
1027       /* Peel the copy.  */
1028       sbitmap_zero (wont_exit);
1029       if (i != n_peel - 1 || !last_may_exit)
1030         SET_BIT (wont_exit, 1);
1031       if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1032                 loops, 1,
1033                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
1034                 DLTHE_FLAG_UPDATE_FREQ))
1035         abort ();
1036
1037       /* Create item for switch.  */
1038       j = n_peel - i - (extra_zero_check ? 0 : 1);
1039       p = REG_BR_PROB_BASE / (i + 2);
1040
1041       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1042       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1043                                           block_label (preheader), p, NULL_RTX);
1044
1045       swtch = loop_split_edge_with (EDGE_PRED (swtch, 0), branch_code);
1046       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1047       EDGE_SUCC (swtch, 0)->probability = REG_BR_PROB_BASE - p;
1048       e = make_edge (swtch, preheader,
1049                      EDGE_SUCC (swtch, 0)->flags & EDGE_IRREDUCIBLE_LOOP);
1050       e->probability = p;
1051     }
1052
1053   if (extra_zero_check)
1054     {
1055       /* Add branch for zero iterations.  */
1056       p = REG_BR_PROB_BASE / (max_unroll + 1);
1057       swtch = ezc_swtch;
1058       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1059       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1060                                           block_label (preheader), p, NULL_RTX);
1061
1062       swtch = loop_split_edge_with (EDGE_SUCC (swtch, 0), branch_code);
1063       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1064       EDGE_SUCC (swtch, 0)->probability = REG_BR_PROB_BASE - p;
1065       e = make_edge (swtch, preheader,
1066                      EDGE_SUCC (swtch, 0)->flags & EDGE_IRREDUCIBLE_LOOP);
1067       e->probability = p;
1068     }
1069
1070   /* Recount dominators for outer blocks.  */
1071   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1072
1073   /* And unroll loop.  */
1074
1075   sbitmap_ones (wont_exit);
1076   RESET_BIT (wont_exit, may_exit_copy);
1077   opt_info_start_duplication (opt_info);
1078
1079   if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1080                 loops, max_unroll,
1081                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
1082                 DLTHE_FLAG_UPDATE_FREQ))
1083     abort ();
1084
1085   if (opt_info)
1086     {
1087       apply_opt_in_copies (opt_info, max_unroll, true, true);
1088       free_opt_info (opt_info);
1089     }
1090
1091   free (wont_exit);
1092
1093   if (exit_at_end)
1094     {
1095       basic_block exit_block = desc->in_edge->src->rbi->copy;
1096       /* Find a new in and out edge; they are in the last copy we have made.  */
1097
1098       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1099         {
1100           desc->out_edge = EDGE_SUCC (exit_block, 0);
1101           desc->in_edge = EDGE_SUCC (exit_block, 1);
1102         }
1103       else
1104         {
1105           desc->out_edge = EDGE_SUCC (exit_block, 1);
1106           desc->in_edge = EDGE_SUCC (exit_block, 0);
1107         }
1108     }
1109
1110   /* Remove the edges.  */
1111   for (i = 0; i < n_remove_edges; i++)
1112     remove_path (loops, remove_edges[i]);
1113   free (remove_edges);
1114
1115   /* We must be careful when updating the number of iterations due to
1116      preconditioning and the fact that the value must be valid at entry
1117      of the loop.  After passing through the above code, we see that
1118      the correct new number of iterations is this:  */
1119   gcc_assert (!desc->const_iter);
1120   desc->niter_expr =
1121     simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1));
1122   desc->niter_max /= max_unroll + 1;
1123   if (exit_at_end)
1124     {
1125       desc->niter_expr =
1126         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1127       desc->noloop_assumptions = NULL_RTX;
1128       desc->niter_max--;
1129     }
1130
1131   if (dump_file)
1132     fprintf (dump_file,
1133              ";; Unrolled loop %d times, counting # of iterations "
1134              "in runtime, %i insns\n",
1135              max_unroll, num_loop_insns (loop));
1136 }
1137
1138 /* Decide whether to simply peel LOOP and how much.  */
1139 static void
1140 decide_peel_simple (struct loop *loop, int flags)
1141 {
1142   unsigned npeel;
1143   struct niter_desc *desc;
1144
1145   if (!(flags & UAP_PEEL))
1146     {
1147       /* We were not asked to, just return back silently.  */
1148       return;
1149     }
1150
1151   if (dump_file)
1152     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1153
1154   /* npeel = number of iterations to peel.  */
1155   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1156   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1157     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1158
1159   /* Skip big loops.  */
1160   if (!npeel)
1161     {
1162       if (dump_file)
1163         fprintf (dump_file, ";; Not considering loop, is too big\n");
1164       return;
1165     }
1166
1167   /* Check for simple loops.  */
1168   desc = get_simple_loop_desc (loop);
1169
1170   /* Check number of iterations.  */
1171   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1172     {
1173       if (dump_file)
1174         fprintf (dump_file, ";; Loop iterates constant times\n");
1175       return;
1176     }
1177
1178   /* Do not simply peel loops with branches inside -- it increases number
1179      of mispredicts.  */
1180   if (num_loop_branches (loop) > 1)
1181     {
1182       if (dump_file)
1183         fprintf (dump_file, ";; Not peeling, contains branches\n");
1184       return;
1185     }
1186
1187   if (loop->header->count)
1188     {
1189       unsigned niter = expected_loop_iterations (loop);
1190       if (niter + 1 > npeel)
1191         {
1192           if (dump_file)
1193             {
1194               fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1195               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1196                        (HOST_WIDEST_INT) (niter + 1));
1197               fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1198                        npeel);
1199             }
1200           return;
1201         }
1202       npeel = niter + 1;
1203     }
1204   else
1205     {
1206       /* For now we have no good heuristics to decide whether loop peeling
1207          will be effective, so disable it.  */
1208       if (dump_file)
1209         fprintf (dump_file,
1210                  ";; Not peeling loop, no evidence it will be profitable\n");
1211       return;
1212     }
1213
1214   /* Success.  */
1215   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1216   loop->lpt_decision.times = npeel;
1217
1218   if (dump_file)
1219     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1220              loop->lpt_decision.times);
1221 }
1222
1223 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1224    while (cond)
1225      body;
1226
1227    ==>
1228
1229    if (!cond) goto end;
1230    body;
1231    if (!cond) goto end;
1232    body;
1233    while (cond)
1234      body;
1235    end: ;
1236    */
1237 static void
1238 peel_loop_simple (struct loops *loops, struct loop *loop)
1239 {
1240   sbitmap wont_exit;
1241   unsigned npeel = loop->lpt_decision.times;
1242   struct niter_desc *desc = get_simple_loop_desc (loop);
1243   struct opt_info *opt_info = NULL;
1244
1245   if (flag_split_ivs_in_unroller && npeel > 1)
1246     opt_info = analyze_insns_in_loop (loop);
1247
1248   wont_exit = sbitmap_alloc (npeel + 1);
1249   sbitmap_zero (wont_exit);
1250
1251   opt_info_start_duplication (opt_info);
1252
1253   if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1254                 loops, npeel, wont_exit, NULL, NULL, NULL,
1255                 DLTHE_FLAG_UPDATE_FREQ))
1256     abort ();
1257
1258   free (wont_exit);
1259
1260   if (opt_info)
1261     {
1262       apply_opt_in_copies (opt_info, npeel, false, false);
1263       free_opt_info (opt_info);
1264     }
1265
1266   if (desc->simple_p)
1267     {
1268       if (desc->const_iter)
1269         {
1270           desc->niter -= npeel;
1271           desc->niter_expr = GEN_INT (desc->niter);
1272           desc->noloop_assumptions = NULL_RTX;
1273         }
1274       else
1275         {
1276           /* We cannot just update niter_expr, as its value might be clobbered
1277              inside loop.  We could handle this by counting the number into
1278              temporary just like we do in runtime unrolling, but it does not
1279              seem worthwhile.  */
1280           free_simple_loop_desc (loop);
1281         }
1282     }
1283   if (dump_file)
1284     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1285 }
1286
1287 /* Decide whether to unroll LOOP stupidly and how much.  */
1288 static void
1289 decide_unroll_stupid (struct loop *loop, int flags)
1290 {
1291   unsigned nunroll, nunroll_by_av, i;
1292   struct niter_desc *desc;
1293
1294   if (!(flags & UAP_UNROLL_ALL))
1295     {
1296       /* We were not asked to, just return back silently.  */
1297       return;
1298     }
1299
1300   if (dump_file)
1301     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1302
1303   /* nunroll = total number of copies of the original loop body in
1304      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1305   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1306   nunroll_by_av
1307     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1308   if (nunroll > nunroll_by_av)
1309     nunroll = nunroll_by_av;
1310   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1311     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1312
1313   /* Skip big loops.  */
1314   if (nunroll <= 1)
1315     {
1316       if (dump_file)
1317         fprintf (dump_file, ";; Not considering loop, is too big\n");
1318       return;
1319     }
1320
1321   /* Check for simple loops.  */
1322   desc = get_simple_loop_desc (loop);
1323
1324   /* Check simpleness.  */
1325   if (desc->simple_p && !desc->assumptions)
1326     {
1327       if (dump_file)
1328         fprintf (dump_file, ";; The loop is simple\n");
1329       return;
1330     }
1331
1332   /* Do not unroll loops with branches inside -- it increases number
1333      of mispredicts.  */
1334   if (num_loop_branches (loop) > 1)
1335     {
1336       if (dump_file)
1337         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1338       return;
1339     }
1340
1341   /* If we have profile feedback, check whether the loop rolls.  */
1342   if (loop->header->count
1343       && expected_loop_iterations (loop) < 2 * nunroll)
1344     {
1345       if (dump_file)
1346         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1347       return;
1348     }
1349
1350   /* Success.  Now force nunroll to be power of 2, as it seems that this
1351      improves results (partially because of better alignments, partially
1352      because of some dark magic).  */
1353   for (i = 1; 2 * i <= nunroll; i *= 2)
1354     continue;
1355
1356   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1357   loop->lpt_decision.times = i - 1;
1358
1359   if (dump_file)
1360     fprintf (dump_file,
1361              ";; Decided to unroll the loop stupidly, %d times.\n",
1362              loop->lpt_decision.times);
1363 }
1364
1365 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1366    while (cond)
1367      body;
1368
1369    ==>
1370
1371    while (cond)
1372      {
1373        body;
1374        if (!cond) break;
1375        body;
1376        if (!cond) break;
1377        body;
1378        if (!cond) break;
1379        body;
1380      }
1381    */
1382 static void
1383 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1384 {
1385   sbitmap wont_exit;
1386   unsigned nunroll = loop->lpt_decision.times;
1387   struct niter_desc *desc = get_simple_loop_desc (loop);
1388   struct opt_info *opt_info = NULL;
1389
1390   if (flag_split_ivs_in_unroller
1391       || flag_variable_expansion_in_unroller)
1392     opt_info = analyze_insns_in_loop (loop);
1393
1394
1395   wont_exit = sbitmap_alloc (nunroll + 1);
1396   sbitmap_zero (wont_exit);
1397   opt_info_start_duplication (opt_info);
1398
1399   if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1400                 loops, nunroll, wont_exit, NULL, NULL, NULL,
1401                 DLTHE_FLAG_UPDATE_FREQ))
1402     abort ();
1403
1404   if (opt_info)
1405     {
1406       apply_opt_in_copies (opt_info, nunroll, true, true);
1407       free_opt_info (opt_info);
1408     }
1409
1410   free (wont_exit);
1411
1412   if (desc->simple_p)
1413     {
1414       /* We indeed may get here provided that there are nontrivial assumptions
1415          for a loop to be really simple.  We could update the counts, but the
1416          problem is that we are unable to decide which exit will be taken
1417          (not really true in case the number of iterations is constant,
1418          but noone will do anything with this information, so we do not
1419          worry about it).  */
1420       desc->simple_p = false;
1421     }
1422
1423   if (dump_file)
1424     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1425              nunroll, num_loop_insns (loop));
1426 }
1427
1428 /* A hash function for information about insns to split.  */
1429
1430 static hashval_t
1431 si_info_hash (const void *ivts)
1432 {
1433   return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
1434 }
1435
1436 /* An equality functions for information about insns to split.  */
1437
1438 static int
1439 si_info_eq (const void *ivts1, const void *ivts2)
1440 {
1441   const struct iv_to_split *i1 = ivts1;
1442   const struct iv_to_split *i2 = ivts2;
1443
1444   return i1->insn == i2->insn;
1445 }
1446
1447 /* Return a hash for VES, which is really a "var_to_expand *".  */
1448
1449 static hashval_t
1450 ve_info_hash (const void *ves)
1451 {
1452   return htab_hash_pointer (((struct var_to_expand *) ves)->insn);
1453 }
1454
1455 /* Return true if IVTS1 and IVTS2 (which are really both of type
1456    "var_to_expand *") refer to the same instruction.  */
1457
1458 static int
1459 ve_info_eq (const void *ivts1, const void *ivts2)
1460 {
1461   const struct var_to_expand *i1 = ivts1;
1462   const struct var_to_expand *i2 = ivts2;
1463
1464   return i1->insn == i2->insn;
1465 }
1466
1467 /* Returns true if REG is referenced in one insn in LOOP.  */
1468
1469 bool
1470 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1471 {
1472   basic_block *body, bb;
1473   unsigned i;
1474   int count_ref = 0;
1475   rtx insn;
1476
1477   body = get_loop_body (loop);
1478   for (i = 0; i < loop->num_nodes; i++)
1479     {
1480       bb = body[i];
1481
1482       FOR_BB_INSNS (bb, insn)
1483       {
1484         if (rtx_referenced_p (reg, insn))
1485           count_ref++;
1486       }
1487     }
1488   return (count_ref  == 1);
1489 }
1490
1491 /* Determine whether INSN contains an accumulator
1492    which can be expanded into separate copies,
1493    one for each copy of the LOOP body.
1494
1495    for (i = 0 ; i < n; i++)
1496      sum += a[i];
1497
1498    ==>
1499
1500    sum += a[i]
1501    ....
1502    i = i+1;
1503    sum1 += a[i]
1504    ....
1505    i = i+1
1506    sum2 += a[i];
1507    ....
1508
1509    Return NULL if INSN contains no opportunity for expansion of accumulator.
1510    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1511    information and return a pointer to it.
1512 */
1513
1514 static struct var_to_expand *
1515 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1516 {
1517   rtx set, dest, src, op1;
1518   struct var_to_expand *ves;
1519   enum machine_mode mode1, mode2;
1520
1521   set = single_set (insn);
1522   if (!set)
1523     return NULL;
1524
1525   dest = SET_DEST (set);
1526   src = SET_SRC (set);
1527
1528   if (GET_CODE (src) != PLUS
1529       && GET_CODE (src) != MINUS
1530       && GET_CODE (src) != MULT)
1531     return NULL;
1532
1533   if (!XEXP (src, 0))
1534     return NULL;
1535
1536   op1 = XEXP (src, 0);
1537
1538   if (!REG_P (dest)
1539       && !(GET_CODE (dest) == SUBREG
1540            && REG_P (SUBREG_REG (dest))))
1541     return NULL;
1542
1543   if (!rtx_equal_p (dest, op1))
1544     return NULL;
1545
1546   if (!referenced_in_one_insn_in_loop_p (loop, dest))
1547     return NULL;
1548
1549   if (rtx_referenced_p (dest, XEXP (src, 1)))
1550     return NULL;
1551
1552   mode1 = GET_MODE (dest);
1553   mode2 = GET_MODE (XEXP (src, 1));
1554   if ((FLOAT_MODE_P (mode1)
1555        || FLOAT_MODE_P (mode2))
1556       && !flag_unsafe_math_optimizations)
1557     return NULL;
1558
1559   /* Record the accumulator to expand.  */
1560   ves = xmalloc (sizeof (struct var_to_expand));
1561   ves->insn = insn;
1562   VARRAY_RTX_INIT (ves->var_expansions, 1, "var_expansions");
1563   ves->reg = copy_rtx (dest);
1564   ves->op = GET_CODE (src);
1565   ves->expansion_count = 0;
1566   ves->reuse_expansion = 0;
1567   return ves;
1568 }
1569
1570 /* Determine whether there is an induction variable in INSN that
1571    we would like to split during unrolling.
1572
1573    I.e. replace
1574
1575    i = i + 1;
1576    ...
1577    i = i + 1;
1578    ...
1579    i = i + 1;
1580    ...
1581
1582    type chains by
1583
1584    i0 = i + 1
1585    ...
1586    i = i0 + 1
1587    ...
1588    i = i0 + 2
1589    ...
1590
1591    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1592    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1593    pointer to it.  */
1594
1595 static struct iv_to_split *
1596 analyze_iv_to_split_insn (rtx insn)
1597 {
1598   rtx set, dest;
1599   struct rtx_iv iv;
1600   struct iv_to_split *ivts;
1601
1602   /* For now we just split the basic induction variables.  Later this may be
1603      extended for example by selecting also addresses of memory references.  */
1604   set = single_set (insn);
1605   if (!set)
1606     return NULL;
1607
1608   dest = SET_DEST (set);
1609   if (!REG_P (dest))
1610     return NULL;
1611
1612   if (!biv_p (insn, dest))
1613     return NULL;
1614
1615   if (!iv_analyze (insn, dest, &iv))
1616     abort ();
1617
1618   if (iv.step == const0_rtx
1619       || iv.mode != iv.extend_mode)
1620     return NULL;
1621
1622   /* Record the insn to split.  */
1623   ivts = xmalloc (sizeof (struct iv_to_split));
1624   ivts->insn = insn;
1625   ivts->base_var = NULL_RTX;
1626   ivts->step = iv.step;
1627   ivts->n_loc = 1;
1628   ivts->loc[0] = 1;
1629
1630   return ivts;
1631 }
1632
1633 /* Determines which of insns in LOOP can be optimized.
1634    Return a OPT_INFO struct with the relevant hash tables filled
1635    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1636    is undefined for the return value.  */
1637
1638 static struct opt_info *
1639 analyze_insns_in_loop (struct loop *loop)
1640 {
1641   basic_block *body, bb;
1642   unsigned i, n_edges = 0;
1643   struct opt_info *opt_info = xcalloc (1, sizeof (struct opt_info));
1644   rtx insn;
1645   struct iv_to_split *ivts = NULL;
1646   struct var_to_expand *ves = NULL;
1647   PTR *slot1;
1648   PTR *slot2;
1649   edge *edges = get_loop_exit_edges (loop, &n_edges);
1650   basic_block preheader;
1651   bool can_apply = false;
1652
1653   iv_analysis_loop_init (loop);
1654
1655   body = get_loop_body (loop);
1656
1657   if (flag_split_ivs_in_unroller)
1658     opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1659                                             si_info_hash, si_info_eq, free);
1660
1661   /* Record the loop exit bb and loop preheader before the unrolling.  */
1662   if (!loop_preheader_edge (loop)->src)
1663     {
1664       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1665       opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1666     }
1667   else
1668     opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1669
1670   if (n_edges == 1
1671       && !(edges[0]->flags & EDGE_COMPLEX)
1672       && (edges[0]->flags & EDGE_LOOP_EXIT))
1673     {
1674       opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
1675       can_apply = true;
1676     }
1677
1678   if (flag_variable_expansion_in_unroller
1679       && can_apply)
1680     opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1681                                                       ve_info_hash, ve_info_eq, free);
1682
1683   for (i = 0; i < loop->num_nodes; i++)
1684     {
1685       bb = body[i];
1686       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1687         continue;
1688
1689       FOR_BB_INSNS (bb, insn)
1690       {
1691         if (!INSN_P (insn))
1692           continue;
1693
1694         if (opt_info->insns_to_split)
1695           ivts = analyze_iv_to_split_insn (insn);
1696
1697         if (ivts)
1698           {
1699             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1700             *slot1 = ivts;
1701             continue;
1702           }
1703
1704         if (opt_info->insns_with_var_to_expand)
1705           ves = analyze_insn_to_expand_var (loop, insn);
1706
1707         if (ves)
1708           {
1709             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1710             *slot2 = ves;
1711           }
1712       }
1713     }
1714
1715   free (edges);
1716   free (body);
1717   return opt_info;
1718 }
1719
1720 /* Called just before loop duplication.  Records start of duplicated area
1721    to OPT_INFO.  */
1722
1723 static void
1724 opt_info_start_duplication (struct opt_info *opt_info)
1725 {
1726   if (opt_info)
1727     opt_info->first_new_block = last_basic_block;
1728 }
1729
1730 /* Determine the number of iterations between initialization of the base
1731    variable and the current copy (N_COPY).  N_COPIES is the total number
1732    of newly created copies.  UNROLLING is true if we are unrolling
1733    (not peeling) the loop.  */
1734
1735 static unsigned
1736 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1737 {
1738   if (unrolling)
1739     {
1740       /* If we are unrolling, initialization is done in the original loop
1741          body (number 0).  */
1742       return n_copy;
1743     }
1744   else
1745     {
1746       /* If we are peeling, the copy in that the initialization occurs has
1747          number 1.  The original loop (number 0) is the last.  */
1748       if (n_copy)
1749         return n_copy - 1;
1750       else
1751         return n_copies;
1752     }
1753 }
1754
1755 /* Locate in EXPR the expression corresponding to the location recorded
1756    in IVTS, and return a pointer to the RTX for this location.  */
1757
1758 static rtx *
1759 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1760 {
1761   unsigned i;
1762   rtx *ret = &expr;
1763
1764   for (i = 0; i < ivts->n_loc; i++)
1765     ret = &XEXP (*ret, ivts->loc[i]);
1766
1767   return ret;
1768 }
1769
1770 /* Allocate basic variable for the induction variable chain.  Callback for
1771    htab_traverse.  */
1772
1773 static int
1774 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1775 {
1776   struct iv_to_split *ivts = *slot;
1777   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1778
1779   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1780
1781   return 1;
1782 }
1783
1784 /* Insert initialization of basic variable of IVTS before INSN, taking
1785    the initial value from INSN.  */
1786
1787 static void
1788 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1789 {
1790   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1791   rtx seq;
1792
1793   start_sequence ();
1794   expr = force_operand (expr, ivts->base_var);
1795   if (expr != ivts->base_var)
1796     emit_move_insn (ivts->base_var, expr);
1797   seq = get_insns ();
1798   end_sequence ();
1799
1800   emit_insn_before (seq, insn);
1801 }
1802
1803 /* Replace the use of induction variable described in IVTS in INSN
1804    by base variable + DELTA * step.  */
1805
1806 static void
1807 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1808 {
1809   rtx expr, *loc, seq, incr, var;
1810   enum machine_mode mode = GET_MODE (ivts->base_var);
1811   rtx src, dest, set;
1812
1813   /* Construct base + DELTA * step.  */
1814   if (!delta)
1815     expr = ivts->base_var;
1816   else
1817     {
1818       incr = simplify_gen_binary (MULT, mode,
1819                                   ivts->step, gen_int_mode (delta, mode));
1820       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1821                                   ivts->base_var, incr);
1822     }
1823
1824   /* Figure out where to do the replacement.  */
1825   loc = get_ivts_expr (single_set (insn), ivts);
1826
1827   /* If we can make the replacement right away, we're done.  */
1828   if (validate_change (insn, loc, expr, 0))
1829     return;
1830
1831   /* Otherwise, force EXPR into a register and try again.  */
1832   start_sequence ();
1833   var = gen_reg_rtx (mode);
1834   expr = force_operand (expr, var);
1835   if (expr != var)
1836     emit_move_insn (var, expr);
1837   seq = get_insns ();
1838   end_sequence ();
1839   emit_insn_before (seq, insn);
1840
1841   if (validate_change (insn, loc, var, 0))
1842     return;
1843
1844   /* The last chance.  Try recreating the assignment in insn
1845      completely from scratch.  */
1846   set = single_set (insn);
1847   gcc_assert (set);
1848
1849   start_sequence ();
1850   *loc = var;
1851   src = copy_rtx (SET_SRC (set));
1852   dest = copy_rtx (SET_DEST (set));
1853   src = force_operand (src, dest);
1854   if (src != dest)
1855     emit_move_insn (dest, src);
1856   seq = get_insns ();
1857   end_sequence ();
1858
1859   emit_insn_before (seq, insn);
1860   delete_insn (insn);
1861 }
1862
1863
1864 /* Return one expansion of the accumulator recorded in struct VE.  */
1865
1866 static rtx
1867 get_expansion (struct var_to_expand *ve)
1868 {
1869   rtx reg;
1870
1871   if (ve->reuse_expansion == 0)
1872     reg = ve->reg;
1873   else
1874     reg = VARRAY_RTX (ve->var_expansions,  ve->reuse_expansion - 1);
1875
1876   if (VARRAY_ACTIVE_SIZE (ve->var_expansions) == (unsigned) ve->reuse_expansion)
1877     ve->reuse_expansion = 0;
1878   else
1879     ve->reuse_expansion++;
1880
1881   return reg;
1882 }
1883
1884
1885 /* Given INSN replace the uses of the accumulator recorded in VE
1886    with a new register.  */
1887
1888 static void
1889 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1890 {
1891   rtx new_reg, set;
1892   bool really_new_expansion = false;
1893
1894   set = single_set (insn);
1895   if (!set)
1896     abort ();
1897
1898   /* Generate a new register only if the expansion limit has not been
1899      reached.  Else reuse an already existing expansion.  */
1900   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1901     {
1902       really_new_expansion = true;
1903       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1904     }
1905   else
1906     new_reg = get_expansion (ve);
1907
1908   validate_change (insn, &SET_DEST (set), new_reg, 1);
1909   validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1910
1911   if (apply_change_group ())
1912     if (really_new_expansion)
1913       {
1914         VARRAY_PUSH_RTX (ve->var_expansions, new_reg);
1915         ve->expansion_count++;
1916       }
1917 }
1918
1919 /* Initialize the variable expansions in loop preheader.
1920    Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1921    basic block where the initialization of the expansions
1922    should take place.  */
1923
1924 static int
1925 insert_var_expansion_initialization (void **slot, void *place_p)
1926 {
1927   struct var_to_expand *ve = *slot;
1928   basic_block place = (basic_block)place_p;
1929   rtx seq, var, zero_init, insn;
1930   unsigned i;
1931
1932   if (VARRAY_ACTIVE_SIZE (ve->var_expansions) == 0)
1933     return 1;
1934
1935   start_sequence ();
1936   if (ve->op == PLUS || ve->op == MINUS)
1937     for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
1938       {
1939         var = VARRAY_RTX (ve->var_expansions, i);
1940         zero_init =  CONST0_RTX (GET_MODE (var));
1941         emit_move_insn (var, zero_init);
1942       }
1943   else if (ve->op == MULT)
1944     for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
1945       {
1946         var = VARRAY_RTX (ve->var_expansions, i);
1947         zero_init =  CONST1_RTX (GET_MODE (var));
1948         emit_move_insn (var, zero_init);
1949       }
1950
1951   seq = get_insns ();
1952   end_sequence ();
1953
1954   insn = BB_HEAD (place);
1955   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1956     insn = NEXT_INSN (insn);
1957
1958   emit_insn_after (seq, insn);
1959   /* Continue traversing the hash table.  */
1960   return 1;
1961 }
1962
1963 /*  Combine the variable expansions at the loop exit.
1964     Callbacks for htab_traverse.  PLACE_P is the loop exit
1965     basic block where the summation of the expansions should
1966     take place.  */
1967
1968 static int
1969 combine_var_copies_in_loop_exit (void **slot, void *place_p)
1970 {
1971   struct var_to_expand *ve = *slot;
1972   basic_block place = (basic_block)place_p;
1973   rtx sum = ve->reg;
1974   rtx expr, seq, var, insn;
1975   unsigned i;
1976
1977   if (VARRAY_ACTIVE_SIZE (ve->var_expansions) == 0)
1978     return 1;
1979
1980   start_sequence ();
1981   if (ve->op == PLUS || ve->op == MINUS)
1982     for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
1983       {
1984         var = VARRAY_RTX (ve->var_expansions, i);
1985         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
1986                                    var, sum);
1987       }
1988   else if (ve->op == MULT)
1989     for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
1990       {
1991         var = VARRAY_RTX (ve->var_expansions, i);
1992         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
1993                                    var, sum);
1994       }
1995
1996   expr = force_operand (sum, ve->reg);
1997   if (expr != ve->reg)
1998     emit_move_insn (ve->reg, expr);
1999   seq = get_insns ();
2000   end_sequence ();
2001
2002   insn = BB_HEAD (place);
2003   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2004     insn = NEXT_INSN (insn);
2005
2006   emit_insn_after (seq, insn);
2007
2008   /* Continue traversing the hash table.  */
2009   return 1;
2010 }
2011
2012 /* Apply loop optimizations in loop copies using the
2013    data which gathered during the unrolling.  Structure
2014    OPT_INFO record that data.
2015
2016    UNROLLING is true if we unrolled (not peeled) the loop.
2017    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2018    the loop (as it should happen in complete unrolling, but not in ordinary
2019    peeling of the loop).  */
2020
2021 static void
2022 apply_opt_in_copies (struct opt_info *opt_info,
2023                      unsigned n_copies, bool unrolling,
2024                      bool rewrite_original_loop)
2025 {
2026   unsigned i, delta;
2027   basic_block bb, orig_bb;
2028   rtx insn, orig_insn, next;
2029   struct iv_to_split ivts_templ, *ivts;
2030   struct var_to_expand ve_templ, *ves;
2031
2032   /* Sanity check -- we need to put initialization in the original loop
2033      body.  */
2034   gcc_assert (!unrolling || rewrite_original_loop);
2035
2036   /* Allocate the basic variables (i0).  */
2037   if (opt_info->insns_to_split)
2038     htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2039
2040   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2041     {
2042       bb = BASIC_BLOCK (i);
2043       orig_bb = bb->rbi->original;
2044
2045       delta = determine_split_iv_delta (bb->rbi->copy_number, n_copies,
2046                                         unrolling);
2047       orig_insn = BB_HEAD (orig_bb);
2048       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2049         {
2050           next = NEXT_INSN (insn);
2051           if (!INSN_P (insn))
2052             continue;
2053
2054           while (!INSN_P (orig_insn))
2055             orig_insn = NEXT_INSN (orig_insn);
2056
2057           ivts_templ.insn = orig_insn;
2058           ve_templ.insn = orig_insn;
2059
2060           /* Apply splitting iv optimization.  */
2061           if (opt_info->insns_to_split)
2062             {
2063               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2064
2065               if (ivts)
2066                 {
2067 #ifdef ENABLE_CHECKING
2068                   gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
2069 #endif
2070
2071                   if (!delta)
2072                     insert_base_initialization (ivts, insn);
2073                   split_iv (ivts, insn, delta);
2074                 }
2075             }
2076           /* Apply variable expansion optimization.  */
2077           if (unrolling && opt_info->insns_with_var_to_expand)
2078             {
2079               ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2080               if (ves)
2081                 {
2082 #ifdef ENABLE_CHECKING
2083                   gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
2084 #endif
2085                   expand_var_during_unrolling (ves, insn);
2086                 }
2087             }
2088           orig_insn = NEXT_INSN (orig_insn);
2089         }
2090     }
2091
2092   if (!rewrite_original_loop)
2093     return;
2094
2095   /* Initialize the variable expansions in the loop preheader
2096      and take care of combining them at the loop exit.  */
2097   if (opt_info->insns_with_var_to_expand)
2098     {
2099       htab_traverse (opt_info->insns_with_var_to_expand,
2100                      insert_var_expansion_initialization,
2101                      opt_info->loop_preheader);
2102       htab_traverse (opt_info->insns_with_var_to_expand,
2103                      combine_var_copies_in_loop_exit,
2104                      opt_info->loop_exit);
2105     }
2106
2107   /* Rewrite also the original loop body.  Find them as originals of the blocks
2108      in the last copied iteration, i.e. those that have
2109      bb->rbi->original->copy == bb.  */
2110   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2111     {
2112       bb = BASIC_BLOCK (i);
2113       orig_bb = bb->rbi->original;
2114       if (orig_bb->rbi->copy != bb)
2115         continue;
2116
2117       delta = determine_split_iv_delta (0, n_copies, unrolling);
2118       for (orig_insn = BB_HEAD (orig_bb);
2119            orig_insn != NEXT_INSN (BB_END (bb));
2120            orig_insn = next)
2121         {
2122           next = NEXT_INSN (orig_insn);
2123
2124           if (!INSN_P (orig_insn))
2125             continue;
2126
2127           ivts_templ.insn = orig_insn;
2128           if (opt_info->insns_to_split)
2129             {
2130               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2131               if (ivts)
2132                 {
2133                   if (!delta)
2134                     insert_base_initialization (ivts, orig_insn);
2135                   split_iv (ivts, orig_insn, delta);
2136                   continue;
2137                 }
2138             }
2139
2140         }
2141     }
2142 }
2143
2144 /*  Release the data structures used for the variable expansion
2145     optimization.  Callbacks for htab_traverse.  */
2146
2147 static int
2148 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2149 {
2150   struct var_to_expand *ve = *slot;
2151
2152   VARRAY_CLEAR (ve->var_expansions);
2153
2154   /* Continue traversing the hash table.  */
2155   return 1;
2156 }
2157
2158 /* Release OPT_INFO.  */
2159
2160 static void
2161 free_opt_info (struct opt_info *opt_info)
2162 {
2163   if (opt_info->insns_to_split)
2164     htab_delete (opt_info->insns_to_split);
2165   if (opt_info->insns_with_var_to_expand)
2166     {
2167       htab_traverse (opt_info->insns_with_var_to_expand,
2168                      release_var_copies, NULL);
2169       htab_delete (opt_info->insns_with_var_to_expand);
2170     }
2171   free (opt_info);
2172 }