gcc/loop-unroll.c

   1 /* Loop unrolling and peeling.
   2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 2, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "basic-block.h"
  29 #include "cfgloop.h"
  30 #include "cfglayout.h"
  31 #include "params.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "hashtab.h"
  35 #include "recog.h"
  36
  37 /* This pass performs loop unrolling and peeling.  We only perform these
  38    optimizations on innermost loops (with single exception) because
  39    the impact on performance is greatest here, and we want to avoid
  40    unnecessary code size growth.  The gain is caused by greater sequentiality
  41    of code, better code to optimize for further passes and in some cases
  42    by fewer testings of exit conditions.  The main problem is code growth,
  43    that impacts performance negatively due to effect of caches.
  44
  45    What we do:
  46
  47    -- complete peeling of once-rolling loops; this is the above mentioned
  48       exception, as this causes loop to be cancelled completely and
  49       does not cause code growth
  50    -- complete peeling of loops that roll (small) constant times.
  51    -- simple peeling of first iterations of loops that do not roll much
  52       (according to profile feedback)
  53    -- unrolling of loops that roll constant times; this is almost always
  54       win, as we get rid of exit condition tests.
  55    -- unrolling of loops that roll number of times that we can compute
  56       in runtime; we also get rid of exit condition tests here, but there
  57       is the extra expense for calculating the number of iterations
  58    -- simple unrolling of remaining loops; this is performed only if we
  59       are asked to, as the gain is questionable in this case and often
  60       it may even slow down the code
  61    For more detailed descriptions of each of those, see comments at
  62    appropriate function below.
  63
  64    There is a lot of parameters (defined and described in params.def) that
  65    control how much we unroll/peel.
  66
  67    ??? A great problem is that we don't have a good way how to determine
  68    how many times we should unroll the loop; the experiments I have made
  69    showed that this choice may affect performance in order of several %.
  70    */
  71
  72 /* Information about induction variables to split.  */
  73
  74 struct iv_to_split
  75 {
  76   rtx insn;             /* The insn in that the induction variable occurs.  */
  77   rtx base_var;         /* The variable on that the values in the further
  78                            iterations are based.  */
  79   rtx step;             /* Step of the induction variable.  */
  80   unsigned n_loc;
  81   unsigned loc[3];      /* Location where the definition of the induction
  82                            variable occurs in the insn.  For example if
  83                            N_LOC is 2, the expression is located at
  84                            XEXP (XEXP (single_set, loc[0]), loc[1]).  */
  85 };
  86
  87 DEF_VEC_P(rtx);
  88 DEF_VEC_ALLOC_P(rtx,heap);
  89
  90 /* Information about accumulators to expand.  */
  91
  92 struct var_to_expand
  93 {
  94   rtx insn;                        /* The insn in that the variable expansion occurs.  */
  95   rtx reg;                         /* The accumulator which is expanded.  */
  96   VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
  97   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  98                                       or multiplication.  */
  99   int expansion_count;             /* Count the number of expansions generated so far.  */
 100   int reuse_expansion;             /* The expansion we intend to reuse to expand
 101                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
 102                                       the original accumulator.  Else use
 103                                       var_expansions[REUSE_EXPANSION - 1].  */
 104 };
 105
 106 /* Information about optimization applied in
 107    the unrolled loop.  */
 108
 109 struct opt_info
 110 {
 111   htab_t insns_to_split;           /* A hashtable of insns to split.  */
 112   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
 113                                       to expand.  */
 114   unsigned first_new_block;        /* The first basic block that was
 115                                       duplicated.  */
 116   basic_block loop_exit;           /* The loop exit basic block.  */
 117   basic_block loop_preheader;      /* The loop preheader basic block.  */
 118 };
 119
 120 static void decide_unrolling_and_peeling (struct loops *, int);
 121 static void peel_loops_completely (struct loops *, int);
 122 static void decide_peel_simple (struct loop *, int);
 123 static void decide_peel_once_rolling (struct loop *, int);
 124 static void decide_peel_completely (struct loop *, int);
 125 static void decide_unroll_stupid (struct loop *, int);
 126 static void decide_unroll_constant_iterations (struct loop *, int);
 127 static void decide_unroll_runtime_iterations (struct loop *, int);
 128 static void peel_loop_simple (struct loops *, struct loop *);
 129 static void peel_loop_completely (struct loops *, struct loop *);
 130 static void unroll_loop_stupid (struct loops *, struct loop *);
 131 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
 132 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
 133 static struct opt_info *analyze_insns_in_loop (struct loop *);
 134 static void opt_info_start_duplication (struct opt_info *);
 135 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 136 static void free_opt_info (struct opt_info *);
 137 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
 138 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
 139 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
 140 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
 141 static int insert_var_expansion_initialization (void **, void *);
 142 static int combine_var_copies_in_loop_exit (void **, void *);
 143 static int release_var_copies (void **, void *);
 144 static rtx get_expansion (struct var_to_expand *);
 145
 146 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 147 void
 148 unroll_and_peel_loops (struct loops *loops, int flags)
 149 {
 150   struct loop *loop, *next;
 151   bool check;
 152
 153   /* First perform complete loop peeling (it is almost surely a win,
 154      and affects parameters for further decision a lot).  */
 155   peel_loops_completely (loops, flags);
 156
 157   /* Now decide rest of unrolling and peeling.  */
 158   decide_unrolling_and_peeling (loops, flags);
 159
 160   loop = loops->tree_root;
 161   while (loop->inner)
 162     loop = loop->inner;
 163
 164   /* Scan the loops, inner ones first.  */
 165   while (loop != loops->tree_root)
 166     {
 167       if (loop->next)
 168         {
 169           next = loop->next;
 170           while (next->inner)
 171             next = next->inner;
 172         }
 173       else
 174         next = loop->outer;
 175
 176       check = true;
 177       /* And perform the appropriate transformations.  */
 178       switch (loop->lpt_decision.decision)
 179         {
 180         case LPT_PEEL_COMPLETELY:
 181           /* Already done.  */
 182           gcc_unreachable ();
 183         case LPT_PEEL_SIMPLE:
 184           peel_loop_simple (loops, loop);
 185           break;
 186         case LPT_UNROLL_CONSTANT:
 187           unroll_loop_constant_iterations (loops, loop);
 188           break;
 189         case LPT_UNROLL_RUNTIME:
 190           unroll_loop_runtime_iterations (loops, loop);
 191           break;
 192         case LPT_UNROLL_STUPID:
 193           unroll_loop_stupid (loops, loop);
 194           break;
 195         case LPT_NONE:
 196           check = false;
 197           break;
 198         default:
 199           gcc_unreachable ();
 200         }
 201       if (check)
 202         {
 203 #ifdef ENABLE_CHECKING
 204           verify_dominators (CDI_DOMINATORS);
 205           verify_loop_structure (loops);
 206 #endif
 207         }
 208       loop = next;
 209     }
 210
 211   iv_analysis_done ();
 212 }
 213
 214 /* Check whether exit of the LOOP is at the end of loop body.  */
 215
 216 static bool
 217 loop_exit_at_end_p (struct loop *loop)
 218 {
 219   struct niter_desc *desc = get_simple_loop_desc (loop);
 220   rtx insn;
 221
 222   if (desc->in_edge->dest != loop->latch)
 223     return false;
 224
 225   /* Check that the latch is empty.  */
 226   FOR_BB_INSNS (loop->latch, insn)
 227     {
 228       if (INSN_P (insn))
 229         return false;
 230     }
 231
 232   return true;
 233 }
 234
 235 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
 236 static void
 237 peel_loops_completely (struct loops *loops, int flags)
 238 {
 239   struct loop *loop, *next;
 240
 241   loop = loops->tree_root;
 242   while (loop->inner)
 243     loop = loop->inner;
 244
 245   while (loop != loops->tree_root)
 246     {
 247       if (loop->next)
 248         {
 249           next = loop->next;
 250           while (next->inner)
 251             next = next->inner;
 252         }
 253       else
 254         next = loop->outer;
 255
 256       loop->lpt_decision.decision = LPT_NONE;
 257
 258       if (dump_file)
 259         fprintf (dump_file,
 260                  "\n;; *** Considering loop %d for complete peeling ***\n",
 261                  loop->num);
 262
 263       loop->ninsns = num_loop_insns (loop);
 264
 265       decide_peel_once_rolling (loop, flags);
 266       if (loop->lpt_decision.decision == LPT_NONE)
 267         decide_peel_completely (loop, flags);
 268
 269       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
 270         {
 271           peel_loop_completely (loops, loop);
 272 #ifdef ENABLE_CHECKING
 273           verify_dominators (CDI_DOMINATORS);
 274           verify_loop_structure (loops);
 275 #endif
 276         }
 277       loop = next;
 278     }
 279 }
 280
 281 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
 282 static void
 283 decide_unrolling_and_peeling (struct loops *loops, int flags)
 284 {
 285   struct loop *loop = loops->tree_root, *next;
 286
 287   while (loop->inner)
 288     loop = loop->inner;
 289
 290   /* Scan the loops, inner ones first.  */
 291   while (loop != loops->tree_root)
 292     {
 293       if (loop->next)
 294         {
 295           next = loop->next;
 296           while (next->inner)
 297             next = next->inner;
 298         }
 299       else
 300         next = loop->outer;
 301
 302       loop->lpt_decision.decision = LPT_NONE;
 303
 304       if (dump_file)
 305         fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
 306
 307       /* Do not peel cold areas.  */
 308       if (!maybe_hot_bb_p (loop->header))
 309         {
 310           if (dump_file)
 311             fprintf (dump_file, ";; Not considering loop, cold area\n");
 312           loop = next;
 313           continue;
 314         }
 315
 316       /* Can the loop be manipulated?  */
 317       if (!can_duplicate_loop_p (loop))
 318         {
 319           if (dump_file)
 320             fprintf (dump_file,
 321                      ";; Not considering loop, cannot duplicate\n");
 322           loop = next;
 323           continue;
 324         }
 325
 326       /* Skip non-innermost loops.  */
 327       if (loop->inner)
 328         {
 329           if (dump_file)
 330             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 331           loop = next;
 332           continue;
 333         }
 334
 335       loop->ninsns = num_loop_insns (loop);
 336       loop->av_ninsns = average_num_loop_insns (loop);
 337
 338       /* Try transformations one by one in decreasing order of
 339          priority.  */
 340
 341       decide_unroll_constant_iterations (loop, flags);
 342       if (loop->lpt_decision.decision == LPT_NONE)
 343         decide_unroll_runtime_iterations (loop, flags);
 344       if (loop->lpt_decision.decision == LPT_NONE)
 345         decide_unroll_stupid (loop, flags);
 346       if (loop->lpt_decision.decision == LPT_NONE)
 347         decide_peel_simple (loop, flags);
 348
 349       loop = next;
 350     }
 351 }
 352
 353 /* Decide whether the LOOP is once rolling and suitable for complete
 354    peeling.  */
 355 static void
 356 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 357 {
 358   struct niter_desc *desc;
 359
 360   if (dump_file)
 361     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 362
 363   /* Is the loop small enough?  */
 364   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
 365     {
 366       if (dump_file)
 367         fprintf (dump_file, ";; Not considering loop, is too big\n");
 368       return;
 369     }
 370
 371   /* Check for simple loops.  */
 372   desc = get_simple_loop_desc (loop);
 373
 374   /* Check number of iterations.  */
 375   if (!desc->simple_p
 376       || desc->assumptions
 377       || desc->infinite
 378       || !desc->const_iter
 379       || desc->niter != 0)
 380     {
 381       if (dump_file)
 382         fprintf (dump_file,
 383                  ";; Unable to prove that the loop rolls exactly once\n");
 384       return;
 385     }
 386
 387   /* Success.  */
 388   if (dump_file)
 389     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
 390   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 391 }
 392
 393 /* Decide whether the LOOP is suitable for complete peeling.  */
 394 static void
 395 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 396 {
 397   unsigned npeel;
 398   struct niter_desc *desc;
 399
 400   if (dump_file)
 401     fprintf (dump_file, "\n;; Considering peeling completely\n");
 402
 403   /* Skip non-innermost loops.  */
 404   if (loop->inner)
 405     {
 406       if (dump_file)
 407         fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 408       return;
 409     }
 410
 411   /* Do not peel cold areas.  */
 412   if (!maybe_hot_bb_p (loop->header))
 413     {
 414       if (dump_file)
 415         fprintf (dump_file, ";; Not considering loop, cold area\n");
 416       return;
 417     }
 418
 419   /* Can the loop be manipulated?  */
 420   if (!can_duplicate_loop_p (loop))
 421     {
 422       if (dump_file)
 423         fprintf (dump_file,
 424                  ";; Not considering loop, cannot duplicate\n");
 425       return;
 426     }
 427
 428   /* npeel = number of iterations to peel.  */
 429   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
 430   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 431     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 432
 433   /* Is the loop small enough?  */
 434   if (!npeel)
 435     {
 436       if (dump_file)
 437         fprintf (dump_file, ";; Not considering loop, is too big\n");
 438       return;
 439     }
 440
 441   /* Check for simple loops.  */
 442   desc = get_simple_loop_desc (loop);
 443
 444   /* Check number of iterations.  */
 445   if (!desc->simple_p
 446       || desc->assumptions
 447       || !desc->const_iter
 448       || desc->infinite)
 449     {
 450       if (dump_file)
 451         fprintf (dump_file,
 452                  ";; Unable to prove that the loop iterates constant times\n");
 453       return;
 454     }
 455
 456   if (desc->niter > npeel - 1)
 457     {
 458       if (dump_file)
 459         {
 460           fprintf (dump_file,
 461                    ";; Not peeling loop completely, rolls too much (");
 462           fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
 463           fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
 464         }
 465       return;
 466     }
 467
 468   /* Success.  */
 469   if (dump_file)
 470     fprintf (dump_file, ";; Decided to peel loop completely\n");
 471   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 472 }
 473
 474 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
 475    completely.  The transformation done:
 476
 477    for (i = 0; i < 4; i++)
 478      body;
 479
 480    ==>
 481
 482    i = 0;
 483    body; i++;
 484    body; i++;
 485    body; i++;
 486    body; i++;
 487    */
 488 static void
 489 peel_loop_completely (struct loops *loops, struct loop *loop)
 490 {
 491   sbitmap wont_exit;
 492   unsigned HOST_WIDE_INT npeel;
 493   unsigned n_remove_edges, i;
 494   edge *remove_edges, ein;
 495   struct niter_desc *desc = get_simple_loop_desc (loop);
 496   struct opt_info *opt_info = NULL;
 497
 498   npeel = desc->niter;
 499
 500   if (npeel)
 501     {
 502       bool ok;
 503
 504       wont_exit = sbitmap_alloc (npeel + 1);
 505       sbitmap_ones (wont_exit);
 506       RESET_BIT (wont_exit, 0);
 507       if (desc->noloop_assumptions)
 508         RESET_BIT (wont_exit, 1);
 509
 510       remove_edges = xcalloc (npeel, sizeof (edge));
 511       n_remove_edges = 0;
 512
 513       if (flag_split_ivs_in_unroller)
 514         opt_info = analyze_insns_in_loop (loop);
 515
 516       opt_info_start_duplication (opt_info);
 517       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 518                                           loops, npeel,
 519                                           wont_exit, desc->out_edge,
 520                                           remove_edges, &n_remove_edges,
 521                                           DLTHE_FLAG_UPDATE_FREQ
 522                                           | DLTHE_FLAG_COMPLETTE_PEEL
 523                                           | (opt_info
 524                                              ? DLTHE_RECORD_COPY_NUMBER : 0));
 525       gcc_assert (ok);
 526
 527       free (wont_exit);
 528
 529       if (opt_info)
 530         {
 531           apply_opt_in_copies (opt_info, npeel, false, true);
 532           free_opt_info (opt_info);
 533         }
 534
 535       /* Remove the exit edges.  */
 536       for (i = 0; i < n_remove_edges; i++)
 537         remove_path (loops, remove_edges[i]);
 538       free (remove_edges);
 539     }
 540
 541   ein = desc->in_edge;
 542   free_simple_loop_desc (loop);
 543
 544   /* Now remove the unreachable part of the last iteration and cancel
 545      the loop.  */
 546   remove_path (loops, ein);
 547
 548   if (dump_file)
 549     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
 550 }
 551
 552 /* Decide whether to unroll LOOP iterating constant number of times
 553    and how much.  */
 554
 555 static void
 556 decide_unroll_constant_iterations (struct loop *loop, int flags)
 557 {
 558   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 559   struct niter_desc *desc;
 560
 561   if (!(flags & UAP_UNROLL))
 562     {
 563       /* We were not asked to, just return back silently.  */
 564       return;
 565     }
 566
 567   if (dump_file)
 568     fprintf (dump_file,
 569              "\n;; Considering unrolling loop with constant "
 570              "number of iterations\n");
 571
 572   /* nunroll = total number of copies of the original loop body in
 573      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 574   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 575   nunroll_by_av
 576     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 577   if (nunroll > nunroll_by_av)
 578     nunroll = nunroll_by_av;
 579   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 580     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 581
 582   /* Skip big loops.  */
 583   if (nunroll <= 1)
 584     {
 585       if (dump_file)
 586         fprintf (dump_file, ";; Not considering loop, is too big\n");
 587       return;
 588     }
 589
 590   /* Check for simple loops.  */
 591   desc = get_simple_loop_desc (loop);
 592
 593   /* Check number of iterations.  */
 594   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 595     {
 596       if (dump_file)
 597         fprintf (dump_file,
 598                  ";; Unable to prove that the loop iterates constant times\n");
 599       return;
 600     }
 601
 602   /* Check whether the loop rolls enough to consider.  */
 603   if (desc->niter < 2 * nunroll)
 604     {
 605       if (dump_file)
 606         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 607       return;
 608     }
 609
 610   /* Success; now compute number of iterations to unroll.  We alter
 611      nunroll so that as few as possible copies of loop body are
 612      necessary, while still not decreasing the number of unrollings
 613      too much (at most by 1).  */
 614   best_copies = 2 * nunroll + 10;
 615
 616   i = 2 * nunroll + 2;
 617   if (i - 1 >= desc->niter)
 618     i = desc->niter - 2;
 619
 620   for (; i >= nunroll - 1; i--)
 621     {
 622       unsigned exit_mod = desc->niter % (i + 1);
 623
 624       if (!loop_exit_at_end_p (loop))
 625         n_copies = exit_mod + i + 1;
 626       else if (exit_mod != (unsigned) i
 627                || desc->noloop_assumptions != NULL_RTX)
 628         n_copies = exit_mod + i + 2;
 629       else
 630         n_copies = i + 1;
 631
 632       if (n_copies < best_copies)
 633         {
 634           best_copies = n_copies;
 635           best_unroll = i;
 636         }
 637     }
 638
 639   if (dump_file)
 640     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
 641              best_unroll + 1, best_copies, nunroll);
 642
 643   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 644   loop->lpt_decision.times = best_unroll;
 645
 646   if (dump_file)
 647     fprintf (dump_file,
 648              ";; Decided to unroll the constant times rolling loop, %d times.\n",
 649              loop->lpt_decision.times);
 650 }
 651
 652 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
 653    times.  The transformation does this:
 654
 655    for (i = 0; i < 102; i++)
 656      body;
 657
 658    ==>
 659
 660    i = 0;
 661    body; i++;
 662    body; i++;
 663    while (i < 102)
 664      {
 665        body; i++;
 666        body; i++;
 667        body; i++;
 668        body; i++;
 669      }
 670   */
 671 static void
 672 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
 673 {
 674   unsigned HOST_WIDE_INT niter;
 675   unsigned exit_mod;
 676   sbitmap wont_exit;
 677   unsigned n_remove_edges, i;
 678   edge *remove_edges;
 679   unsigned max_unroll = loop->lpt_decision.times;
 680   struct niter_desc *desc = get_simple_loop_desc (loop);
 681   bool exit_at_end = loop_exit_at_end_p (loop);
 682   struct opt_info *opt_info = NULL;
 683   bool ok;
 684
 685   niter = desc->niter;
 686
 687   /* Should not get here (such loop should be peeled instead).  */
 688   gcc_assert (niter > max_unroll + 1);
 689
 690   exit_mod = niter % (max_unroll + 1);
 691
 692   wont_exit = sbitmap_alloc (max_unroll + 1);
 693   sbitmap_ones (wont_exit);
 694
 695   remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
 696   n_remove_edges = 0;
 697   if (flag_split_ivs_in_unroller
 698       || flag_variable_expansion_in_unroller)
 699     opt_info = analyze_insns_in_loop (loop);
 700
 701   if (!exit_at_end)
 702     {
 703       /* The exit is not at the end of the loop; leave exit test
 704          in the first copy, so that the loops that start with test
 705          of exit condition have continuous body after unrolling.  */
 706
 707       if (dump_file)
 708         fprintf (dump_file, ";; Condition on beginning of loop.\n");
 709
 710       /* Peel exit_mod iterations.  */
 711       RESET_BIT (wont_exit, 0);
 712       if (desc->noloop_assumptions)
 713         RESET_BIT (wont_exit, 1);
 714
 715       if (exit_mod)
 716         {
 717           opt_info_start_duplication (opt_info);
 718           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 719                                               loops, exit_mod,
 720                                               wont_exit, desc->out_edge,
 721                                               remove_edges, &n_remove_edges,
 722                                               DLTHE_FLAG_UPDATE_FREQ
 723                                               | (opt_info && exit_mod > 1
 724                                                  ? DLTHE_RECORD_COPY_NUMBER
 725                                                    : 0));
 726           gcc_assert (ok);
 727
 728           if (opt_info && exit_mod > 1)
 729             apply_opt_in_copies (opt_info, exit_mod, false, false);
 730
 731           desc->noloop_assumptions = NULL_RTX;
 732           desc->niter -= exit_mod;
 733           desc->niter_max -= exit_mod;
 734         }
 735
 736       SET_BIT (wont_exit, 1);
 737     }
 738   else
 739     {
 740       /* Leave exit test in last copy, for the same reason as above if
 741          the loop tests the condition at the end of loop body.  */
 742
 743       if (dump_file)
 744         fprintf (dump_file, ";; Condition on end of loop.\n");
 745
 746       /* We know that niter >= max_unroll + 2; so we do not need to care of
 747          case when we would exit before reaching the loop.  So just peel
 748          exit_mod + 1 iterations.  */
 749       if (exit_mod != max_unroll
 750           || desc->noloop_assumptions)
 751         {
 752           RESET_BIT (wont_exit, 0);
 753           if (desc->noloop_assumptions)
 754             RESET_BIT (wont_exit, 1);
 755
 756           opt_info_start_duplication (opt_info);
 757           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 758                                               loops, exit_mod + 1,
 759                                               wont_exit, desc->out_edge,
 760                                               remove_edges, &n_remove_edges,
 761                                               DLTHE_FLAG_UPDATE_FREQ
 762                                               | (opt_info && exit_mod > 0
 763                                                  ? DLTHE_RECORD_COPY_NUMBER
 764                                                    : 0));
 765           gcc_assert (ok);
 766
 767           if (opt_info && exit_mod > 0)
 768             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 769
 770           desc->niter -= exit_mod + 1;
 771           desc->niter_max -= exit_mod + 1;
 772           desc->noloop_assumptions = NULL_RTX;
 773
 774           SET_BIT (wont_exit, 0);
 775           SET_BIT (wont_exit, 1);
 776         }
 777
 778       RESET_BIT (wont_exit, max_unroll);
 779     }
 780
 781   /* Now unroll the loop.  */
 782
 783   opt_info_start_duplication (opt_info);
 784   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 785                                       loops, max_unroll,
 786                                       wont_exit, desc->out_edge,
 787                                       remove_edges, &n_remove_edges,
 788                                       DLTHE_FLAG_UPDATE_FREQ
 789                                       | (opt_info
 790                                          ? DLTHE_RECORD_COPY_NUMBER
 791                                            : 0));
 792   gcc_assert (ok);
 793
 794   if (opt_info)
 795     {
 796       apply_opt_in_copies (opt_info, max_unroll, true, true);
 797       free_opt_info (opt_info);
 798     }
 799
 800   free (wont_exit);
 801
 802   if (exit_at_end)
 803     {
 804       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 805       /* Find a new in and out edge; they are in the last copy we have made.  */
 806
 807       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 808         {
 809           desc->out_edge = EDGE_SUCC (exit_block, 0);
 810           desc->in_edge = EDGE_SUCC (exit_block, 1);
 811         }
 812       else
 813         {
 814           desc->out_edge = EDGE_SUCC (exit_block, 1);
 815           desc->in_edge = EDGE_SUCC (exit_block, 0);
 816         }
 817     }
 818
 819   desc->niter /= max_unroll + 1;
 820   desc->niter_max /= max_unroll + 1;
 821   desc->niter_expr = GEN_INT (desc->niter);
 822
 823   /* Remove the edges.  */
 824   for (i = 0; i < n_remove_edges; i++)
 825     remove_path (loops, remove_edges[i]);
 826   free (remove_edges);
 827
 828   if (dump_file)
 829     fprintf (dump_file,
 830              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 831              max_unroll, num_loop_insns (loop));
 832 }
 833
 834 /* Decide whether to unroll LOOP iterating runtime computable number of times
 835    and how much.  */
 836 static void
 837 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 838 {
 839   unsigned nunroll, nunroll_by_av, i;
 840   struct niter_desc *desc;
 841
 842   if (!(flags & UAP_UNROLL))
 843     {
 844       /* We were not asked to, just return back silently.  */
 845       return;
 846     }
 847
 848   if (dump_file)
 849     fprintf (dump_file,
 850              "\n;; Considering unrolling loop with runtime "
 851              "computable number of iterations\n");
 852
 853   /* nunroll = total number of copies of the original loop body in
 854      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 855   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 856   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 857   if (nunroll > nunroll_by_av)
 858     nunroll = nunroll_by_av;
 859   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 860     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 861
 862   /* Skip big loops.  */
 863   if (nunroll <= 1)
 864     {
 865       if (dump_file)
 866         fprintf (dump_file, ";; Not considering loop, is too big\n");
 867       return;
 868     }
 869
 870   /* Check for simple loops.  */
 871   desc = get_simple_loop_desc (loop);
 872
 873   /* Check simpleness.  */
 874   if (!desc->simple_p || desc->assumptions)
 875     {
 876       if (dump_file)
 877         fprintf (dump_file,
 878                  ";; Unable to prove that the number of iterations "
 879                  "can be counted in runtime\n");
 880       return;
 881     }
 882
 883   if (desc->const_iter)
 884     {
 885       if (dump_file)
 886         fprintf (dump_file, ";; Loop iterates constant times\n");
 887       return;
 888     }
 889
 890   /* If we have profile feedback, check whether the loop rolls.  */
 891   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
 892     {
 893       if (dump_file)
 894         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 895       return;
 896     }
 897
 898   /* Success; now force nunroll to be power of 2, as we are unable to
 899      cope with overflows in computation of number of iterations.  */
 900   for (i = 1; 2 * i <= nunroll; i *= 2)
 901     continue;
 902
 903   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 904   loop->lpt_decision.times = i - 1;
 905
 906   if (dump_file)
 907     fprintf (dump_file,
 908              ";; Decided to unroll the runtime computable "
 909              "times rolling loop, %d times.\n",
 910              loop->lpt_decision.times);
 911 }
 912
 913 /* Unroll LOOP for that we are able to count number of iterations in runtime
 914    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
 915    extra care for case n < 0):
 916
 917    for (i = 0; i < n; i++)
 918      body;
 919
 920    ==>
 921
 922    i = 0;
 923    mod = n % 4;
 924
 925    switch (mod)
 926      {
 927        case 3:
 928          body; i++;
 929        case 2:
 930          body; i++;
 931        case 1:
 932          body; i++;
 933        case 0: ;
 934      }
 935
 936    while (i < n)
 937      {
 938        body; i++;
 939        body; i++;
 940        body; i++;
 941        body; i++;
 942      }
 943    */
 944 static void
 945 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
 946 {
 947   rtx old_niter, niter, init_code, branch_code, tmp;
 948   unsigned i, j, p;
 949   basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
 950   unsigned n_dom_bbs;
 951   sbitmap wont_exit;
 952   int may_exit_copy;
 953   unsigned n_peel, n_remove_edges;
 954   edge *remove_edges, e;
 955   bool extra_zero_check, last_may_exit;
 956   unsigned max_unroll = loop->lpt_decision.times;
 957   struct niter_desc *desc = get_simple_loop_desc (loop);
 958   bool exit_at_end = loop_exit_at_end_p (loop);
 959   struct opt_info *opt_info = NULL;
 960   bool ok;
 961
 962   if (flag_split_ivs_in_unroller
 963       || flag_variable_expansion_in_unroller)
 964     opt_info = analyze_insns_in_loop (loop);
 965
 966   /* Remember blocks whose dominators will have to be updated.  */
 967   dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
 968   n_dom_bbs = 0;
 969
 970   body = get_loop_body (loop);
 971   for (i = 0; i < loop->num_nodes; i++)
 972     {
 973       unsigned nldom;
 974       basic_block *ldom;
 975
 976       nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
 977       for (j = 0; j < nldom; j++)
 978         if (!flow_bb_inside_loop_p (loop, ldom[j]))
 979           dom_bbs[n_dom_bbs++] = ldom[j];
 980
 981       free (ldom);
 982     }
 983   free (body);
 984
 985   if (!exit_at_end)
 986     {
 987       /* Leave exit in first copy (for explanation why see comment in
 988          unroll_loop_constant_iterations).  */
 989       may_exit_copy = 0;
 990       n_peel = max_unroll - 1;
 991       extra_zero_check = true;
 992       last_may_exit = false;
 993     }
 994   else
 995     {
 996       /* Leave exit in last copy (for explanation why see comment in
 997          unroll_loop_constant_iterations).  */
 998       may_exit_copy = max_unroll;
 999       n_peel = max_unroll;
1000       extra_zero_check = false;
1001       last_may_exit = true;
1002     }
1003
1004   /* Get expression for number of iterations.  */
1005   start_sequence ();
1006   old_niter = niter = gen_reg_rtx (desc->mode);
1007   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
1008   if (tmp != niter)
1009     emit_move_insn (niter, tmp);
1010
1011   /* Count modulo by ANDing it with max_unroll; we use the fact that
1012      the number of unrollings is a power of two, and thus this is correct
1013      even if there is overflow in the computation.  */
1014   niter = expand_simple_binop (desc->mode, AND,
1015                                niter,
1016                                GEN_INT (max_unroll),
1017                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
1018
1019   init_code = get_insns ();
1020   end_sequence ();
1021
1022   /* Precondition the loop.  */
1023   loop_split_edge_with (loop_preheader_edge (loop), init_code);
1024
1025   remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
1026   n_remove_edges = 0;
1027
1028   wont_exit = sbitmap_alloc (max_unroll + 2);
1029
1030   /* Peel the first copy of loop body (almost always we must leave exit test
1031      here; the only exception is when we have extra zero check and the number
1032      of iterations is reliable.  Also record the place of (possible) extra
1033      zero check.  */
1034   sbitmap_zero (wont_exit);
1035   if (extra_zero_check
1036       && !desc->noloop_assumptions)
1037     SET_BIT (wont_exit, 1);
1038   ezc_swtch = loop_preheader_edge (loop)->src;
1039   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1040                                       loops, 1,
1041                                       wont_exit, desc->out_edge,
1042                                       remove_edges, &n_remove_edges,
1043                                       DLTHE_FLAG_UPDATE_FREQ);
1044   gcc_assert (ok);
1045
1046   /* Record the place where switch will be built for preconditioning.  */
1047   swtch = loop_split_edge_with (loop_preheader_edge (loop),
1048                                 NULL_RTX);
1049
1050   for (i = 0; i < n_peel; i++)
1051     {
1052       /* Peel the copy.  */
1053       sbitmap_zero (wont_exit);
1054       if (i != n_peel - 1 || !last_may_exit)
1055         SET_BIT (wont_exit, 1);
1056       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1057                                           loops, 1,
1058                                           wont_exit, desc->out_edge,
1059                                           remove_edges, &n_remove_edges,
1060                                           DLTHE_FLAG_UPDATE_FREQ);
1061       gcc_assert (ok);
1062
1063       /* Create item for switch.  */
1064       j = n_peel - i - (extra_zero_check ? 0 : 1);
1065       p = REG_BR_PROB_BASE / (i + 2);
1066
1067       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1068       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1069                                           block_label (preheader), p,
1070                                           NULL_RTX);
1071
1072       swtch = loop_split_edge_with (single_pred_edge (swtch), branch_code);
1073       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1074       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1075       e = make_edge (swtch, preheader,
1076                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1077       e->probability = p;
1078     }
1079
1080   if (extra_zero_check)
1081     {
1082       /* Add branch for zero iterations.  */
1083       p = REG_BR_PROB_BASE / (max_unroll + 1);
1084       swtch = ezc_swtch;
1085       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1086       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1087                                           block_label (preheader), p,
1088                                           NULL_RTX);
1089
1090       swtch = loop_split_edge_with (single_succ_edge (swtch), branch_code);
1091       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1092       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1093       e = make_edge (swtch, preheader,
1094                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1095       e->probability = p;
1096     }
1097
1098   /* Recount dominators for outer blocks.  */
1099   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1100
1101   /* And unroll loop.  */
1102
1103   sbitmap_ones (wont_exit);
1104   RESET_BIT (wont_exit, may_exit_copy);
1105   opt_info_start_duplication (opt_info);
1106
1107   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1108                                       loops, max_unroll,
1109                                       wont_exit, desc->out_edge,
1110                                       remove_edges, &n_remove_edges,
1111                                       DLTHE_FLAG_UPDATE_FREQ
1112                                       | (opt_info
1113                                          ? DLTHE_RECORD_COPY_NUMBER
1114                                            : 0));
1115   gcc_assert (ok);
1116
1117   if (opt_info)
1118     {
1119       apply_opt_in_copies (opt_info, max_unroll, true, true);
1120       free_opt_info (opt_info);
1121     }
1122
1123   free (wont_exit);
1124
1125   if (exit_at_end)
1126     {
1127       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1128       /* Find a new in and out edge; they are in the last copy we have
1129          made.  */
1130
1131       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1132         {
1133           desc->out_edge = EDGE_SUCC (exit_block, 0);
1134           desc->in_edge = EDGE_SUCC (exit_block, 1);
1135         }
1136       else
1137         {
1138           desc->out_edge = EDGE_SUCC (exit_block, 1);
1139           desc->in_edge = EDGE_SUCC (exit_block, 0);
1140         }
1141     }
1142
1143   /* Remove the edges.  */
1144   for (i = 0; i < n_remove_edges; i++)
1145     remove_path (loops, remove_edges[i]);
1146   free (remove_edges);
1147
1148   /* We must be careful when updating the number of iterations due to
1149      preconditioning and the fact that the value must be valid at entry
1150      of the loop.  After passing through the above code, we see that
1151      the correct new number of iterations is this:  */
1152   gcc_assert (!desc->const_iter);
1153   desc->niter_expr =
1154     simplify_gen_binary (UDIV, desc->mode, old_niter,
1155                          GEN_INT (max_unroll + 1));
1156   desc->niter_max /= max_unroll + 1;
1157   if (exit_at_end)
1158     {
1159       desc->niter_expr =
1160         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1161       desc->noloop_assumptions = NULL_RTX;
1162       desc->niter_max--;
1163     }
1164
1165   if (dump_file)
1166     fprintf (dump_file,
1167              ";; Unrolled loop %d times, counting # of iterations "
1168              "in runtime, %i insns\n",
1169              max_unroll, num_loop_insns (loop));
1170 }
1171
1172 /* Decide whether to simply peel LOOP and how much.  */
1173 static void
1174 decide_peel_simple (struct loop *loop, int flags)
1175 {
1176   unsigned npeel;
1177   struct niter_desc *desc;
1178
1179   if (!(flags & UAP_PEEL))
1180     {
1181       /* We were not asked to, just return back silently.  */
1182       return;
1183     }
1184
1185   if (dump_file)
1186     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1187
1188   /* npeel = number of iterations to peel.  */
1189   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1190   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1191     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1192
1193   /* Skip big loops.  */
1194   if (!npeel)
1195     {
1196       if (dump_file)
1197         fprintf (dump_file, ";; Not considering loop, is too big\n");
1198       return;
1199     }
1200
1201   /* Check for simple loops.  */
1202   desc = get_simple_loop_desc (loop);
1203
1204   /* Check number of iterations.  */
1205   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1206     {
1207       if (dump_file)
1208         fprintf (dump_file, ";; Loop iterates constant times\n");
1209       return;
1210     }
1211
1212   /* Do not simply peel loops with branches inside -- it increases number
1213      of mispredicts.  */
1214   if (num_loop_branches (loop) > 1)
1215     {
1216       if (dump_file)
1217         fprintf (dump_file, ";; Not peeling, contains branches\n");
1218       return;
1219     }
1220
1221   if (loop->header->count)
1222     {
1223       unsigned niter = expected_loop_iterations (loop);
1224       if (niter + 1 > npeel)
1225         {
1226           if (dump_file)
1227             {
1228               fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1229               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1230                        (HOST_WIDEST_INT) (niter + 1));
1231               fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1232                        npeel);
1233             }
1234           return;
1235         }
1236       npeel = niter + 1;
1237     }
1238   else
1239     {
1240       /* For now we have no good heuristics to decide whether loop peeling
1241          will be effective, so disable it.  */
1242       if (dump_file)
1243         fprintf (dump_file,
1244                  ";; Not peeling loop, no evidence it will be profitable\n");
1245       return;
1246     }
1247
1248   /* Success.  */
1249   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1250   loop->lpt_decision.times = npeel;
1251
1252   if (dump_file)
1253     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1254              loop->lpt_decision.times);
1255 }
1256
1257 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1258    while (cond)
1259      body;
1260
1261    ==>
1262
1263    if (!cond) goto end;
1264    body;
1265    if (!cond) goto end;
1266    body;
1267    while (cond)
1268      body;
1269    end: ;
1270    */
1271 static void
1272 peel_loop_simple (struct loops *loops, struct loop *loop)
1273 {
1274   sbitmap wont_exit;
1275   unsigned npeel = loop->lpt_decision.times;
1276   struct niter_desc *desc = get_simple_loop_desc (loop);
1277   struct opt_info *opt_info = NULL;
1278   bool ok;
1279
1280   if (flag_split_ivs_in_unroller && npeel > 1)
1281     opt_info = analyze_insns_in_loop (loop);
1282
1283   wont_exit = sbitmap_alloc (npeel + 1);
1284   sbitmap_zero (wont_exit);
1285
1286   opt_info_start_duplication (opt_info);
1287
1288   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1289                                       loops, npeel, wont_exit,
1290                                       NULL, NULL,
1291                                       NULL, DLTHE_FLAG_UPDATE_FREQ
1292                                       | (opt_info
1293                                          ? DLTHE_RECORD_COPY_NUMBER
1294                                            : 0));
1295   gcc_assert (ok);
1296
1297   free (wont_exit);
1298
1299   if (opt_info)
1300     {
1301       apply_opt_in_copies (opt_info, npeel, false, false);
1302       free_opt_info (opt_info);
1303     }
1304
1305   if (desc->simple_p)
1306     {
1307       if (desc->const_iter)
1308         {
1309           desc->niter -= npeel;
1310           desc->niter_expr = GEN_INT (desc->niter);
1311           desc->noloop_assumptions = NULL_RTX;
1312         }
1313       else
1314         {
1315           /* We cannot just update niter_expr, as its value might be clobbered
1316              inside loop.  We could handle this by counting the number into
1317              temporary just like we do in runtime unrolling, but it does not
1318              seem worthwhile.  */
1319           free_simple_loop_desc (loop);
1320         }
1321     }
1322   if (dump_file)
1323     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1324 }
1325
1326 /* Decide whether to unroll LOOP stupidly and how much.  */
1327 static void
1328 decide_unroll_stupid (struct loop *loop, int flags)
1329 {
1330   unsigned nunroll, nunroll_by_av, i;
1331   struct niter_desc *desc;
1332
1333   if (!(flags & UAP_UNROLL_ALL))
1334     {
1335       /* We were not asked to, just return back silently.  */
1336       return;
1337     }
1338
1339   if (dump_file)
1340     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1341
1342   /* nunroll = total number of copies of the original loop body in
1343      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1344   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1345   nunroll_by_av
1346     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1347   if (nunroll > nunroll_by_av)
1348     nunroll = nunroll_by_av;
1349   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1350     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1351
1352   /* Skip big loops.  */
1353   if (nunroll <= 1)
1354     {
1355       if (dump_file)
1356         fprintf (dump_file, ";; Not considering loop, is too big\n");
1357       return;
1358     }
1359
1360   /* Check for simple loops.  */
1361   desc = get_simple_loop_desc (loop);
1362
1363   /* Check simpleness.  */
1364   if (desc->simple_p && !desc->assumptions)
1365     {
1366       if (dump_file)
1367         fprintf (dump_file, ";; The loop is simple\n");
1368       return;
1369     }
1370
1371   /* Do not unroll loops with branches inside -- it increases number
1372      of mispredicts.  */
1373   if (num_loop_branches (loop) > 1)
1374     {
1375       if (dump_file)
1376         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1377       return;
1378     }
1379
1380   /* If we have profile feedback, check whether the loop rolls.  */
1381   if (loop->header->count
1382       && expected_loop_iterations (loop) < 2 * nunroll)
1383     {
1384       if (dump_file)
1385         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1386       return;
1387     }
1388
1389   /* Success.  Now force nunroll to be power of 2, as it seems that this
1390      improves results (partially because of better alignments, partially
1391      because of some dark magic).  */
1392   for (i = 1; 2 * i <= nunroll; i *= 2)
1393     continue;
1394
1395   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1396   loop->lpt_decision.times = i - 1;
1397
1398   if (dump_file)
1399     fprintf (dump_file,
1400              ";; Decided to unroll the loop stupidly, %d times.\n",
1401              loop->lpt_decision.times);
1402 }
1403
1404 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1405    while (cond)
1406      body;
1407
1408    ==>
1409
1410    while (cond)
1411      {
1412        body;
1413        if (!cond) break;
1414        body;
1415        if (!cond) break;
1416        body;
1417        if (!cond) break;
1418        body;
1419      }
1420    */
1421 static void
1422 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1423 {
1424   sbitmap wont_exit;
1425   unsigned nunroll = loop->lpt_decision.times;
1426   struct niter_desc *desc = get_simple_loop_desc (loop);
1427   struct opt_info *opt_info = NULL;
1428   bool ok;
1429
1430   if (flag_split_ivs_in_unroller
1431       || flag_variable_expansion_in_unroller)
1432     opt_info = analyze_insns_in_loop (loop);
1433
1434
1435   wont_exit = sbitmap_alloc (nunroll + 1);
1436   sbitmap_zero (wont_exit);
1437   opt_info_start_duplication (opt_info);
1438
1439   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1440                                       loops, nunroll, wont_exit,
1441                                       NULL, NULL, NULL,
1442                                       DLTHE_FLAG_UPDATE_FREQ
1443                                       | (opt_info
1444                                          ? DLTHE_RECORD_COPY_NUMBER
1445                                            : 0));
1446   gcc_assert (ok);
1447
1448   if (opt_info)
1449     {
1450       apply_opt_in_copies (opt_info, nunroll, true, true);
1451       free_opt_info (opt_info);
1452     }
1453
1454   free (wont_exit);
1455
1456   if (desc->simple_p)
1457     {
1458       /* We indeed may get here provided that there are nontrivial assumptions
1459          for a loop to be really simple.  We could update the counts, but the
1460          problem is that we are unable to decide which exit will be taken
1461          (not really true in case the number of iterations is constant,
1462          but noone will do anything with this information, so we do not
1463          worry about it).  */
1464       desc->simple_p = false;
1465     }
1466
1467   if (dump_file)
1468     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1469              nunroll, num_loop_insns (loop));
1470 }
1471
1472 /* A hash function for information about insns to split.  */
1473
1474 static hashval_t
1475 si_info_hash (const void *ivts)
1476 {
1477   return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
1478 }
1479
1480 /* An equality functions for information about insns to split.  */
1481
1482 static int
1483 si_info_eq (const void *ivts1, const void *ivts2)
1484 {
1485   const struct iv_to_split *i1 = ivts1;
1486   const struct iv_to_split *i2 = ivts2;
1487
1488   return i1->insn == i2->insn;
1489 }
1490
1491 /* Return a hash for VES, which is really a "var_to_expand *".  */
1492
1493 static hashval_t
1494 ve_info_hash (const void *ves)
1495 {
1496   return htab_hash_pointer (((struct var_to_expand *) ves)->insn);
1497 }
1498
1499 /* Return true if IVTS1 and IVTS2 (which are really both of type
1500    "var_to_expand *") refer to the same instruction.  */
1501
1502 static int
1503 ve_info_eq (const void *ivts1, const void *ivts2)
1504 {
1505   const struct var_to_expand *i1 = ivts1;
1506   const struct var_to_expand *i2 = ivts2;
1507
1508   return i1->insn == i2->insn;
1509 }
1510
1511 /* Returns true if REG is referenced in one insn in LOOP.  */
1512
1513 bool
1514 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1515 {
1516   basic_block *body, bb;
1517   unsigned i;
1518   int count_ref = 0;
1519   rtx insn;
1520
1521   body = get_loop_body (loop);
1522   for (i = 0; i < loop->num_nodes; i++)
1523     {
1524       bb = body[i];
1525
1526       FOR_BB_INSNS (bb, insn)
1527       {
1528         if (rtx_referenced_p (reg, insn))
1529           count_ref++;
1530       }
1531     }
1532   return (count_ref  == 1);
1533 }
1534
1535 /* Determine whether INSN contains an accumulator
1536    which can be expanded into separate copies,
1537    one for each copy of the LOOP body.
1538
1539    for (i = 0 ; i < n; i++)
1540      sum += a[i];
1541
1542    ==>
1543
1544    sum += a[i]
1545    ....
1546    i = i+1;
1547    sum1 += a[i]
1548    ....
1549    i = i+1
1550    sum2 += a[i];
1551    ....
1552
1553    Return NULL if INSN contains no opportunity for expansion of accumulator.
1554    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1555    information and return a pointer to it.
1556 */
1557
1558 static struct var_to_expand *
1559 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1560 {
1561   rtx set, dest, src, op1;
1562   struct var_to_expand *ves;
1563   enum machine_mode mode1, mode2;
1564
1565   set = single_set (insn);
1566   if (!set)
1567     return NULL;
1568
1569   dest = SET_DEST (set);
1570   src = SET_SRC (set);
1571
1572   if (GET_CODE (src) != PLUS
1573       && GET_CODE (src) != MINUS
1574       && GET_CODE (src) != MULT)
1575     return NULL;
1576
1577   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1578      in MD.  But if there is no optab to generate the insn, we can not
1579      perform the variable expansion.  This can happen if an MD provides
1580      an insn but not a named pattern to generate it, for example to avoid
1581      producing code that needs additional mode switches like for x87/mmx.
1582
1583      So we check have_insn_for which looks for an optab for the operation
1584      in SRC.  If it doesn't exist, we can't perform the expansion even
1585      though INSN is valid.  */
1586   if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
1587     return NULL;
1588
1589   if (!XEXP (src, 0))
1590     return NULL;
1591
1592   op1 = XEXP (src, 0);
1593
1594   if (!REG_P (dest)
1595       && !(GET_CODE (dest) == SUBREG
1596            && REG_P (SUBREG_REG (dest))))
1597     return NULL;
1598
1599   if (!rtx_equal_p (dest, op1))
1600     return NULL;
1601
1602   if (!referenced_in_one_insn_in_loop_p (loop, dest))
1603     return NULL;
1604
1605   if (rtx_referenced_p (dest, XEXP (src, 1)))
1606     return NULL;
1607
1608   mode1 = GET_MODE (dest);
1609   mode2 = GET_MODE (XEXP (src, 1));
1610   if ((FLOAT_MODE_P (mode1)
1611        || FLOAT_MODE_P (mode2))
1612       && !flag_unsafe_math_optimizations)
1613     return NULL;
1614
1615   /* Record the accumulator to expand.  */
1616   ves = xmalloc (sizeof (struct var_to_expand));
1617   ves->insn = insn;
1618   ves->var_expansions = VEC_alloc (rtx, heap, 1);
1619   ves->reg = copy_rtx (dest);
1620   ves->op = GET_CODE (src);
1621   ves->expansion_count = 0;
1622   ves->reuse_expansion = 0;
1623   return ves;
1624 }
1625
1626 /* Determine whether there is an induction variable in INSN that
1627    we would like to split during unrolling.
1628
1629    I.e. replace
1630
1631    i = i + 1;
1632    ...
1633    i = i + 1;
1634    ...
1635    i = i + 1;
1636    ...
1637
1638    type chains by
1639
1640    i0 = i + 1
1641    ...
1642    i = i0 + 1
1643    ...
1644    i = i0 + 2
1645    ...
1646
1647    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1648    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1649    pointer to it.  */
1650
1651 static struct iv_to_split *
1652 analyze_iv_to_split_insn (rtx insn)
1653 {
1654   rtx set, dest;
1655   struct rtx_iv iv;
1656   struct iv_to_split *ivts;
1657   bool ok;
1658
1659   /* For now we just split the basic induction variables.  Later this may be
1660      extended for example by selecting also addresses of memory references.  */
1661   set = single_set (insn);
1662   if (!set)
1663     return NULL;
1664
1665   dest = SET_DEST (set);
1666   if (!REG_P (dest))
1667     return NULL;
1668
1669   if (!biv_p (insn, dest))
1670     return NULL;
1671
1672   ok = iv_analyze_result (insn, dest, &iv);
1673   gcc_assert (ok);
1674
1675   if (iv.step == const0_rtx
1676       || iv.mode != iv.extend_mode)
1677     return NULL;
1678
1679   /* Record the insn to split.  */
1680   ivts = xmalloc (sizeof (struct iv_to_split));
1681   ivts->insn = insn;
1682   ivts->base_var = NULL_RTX;
1683   ivts->step = iv.step;
1684   ivts->n_loc = 1;
1685   ivts->loc[0] = 1;
1686
1687   return ivts;
1688 }
1689
1690 /* Determines which of insns in LOOP can be optimized.
1691    Return a OPT_INFO struct with the relevant hash tables filled
1692    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1693    is undefined for the return value.  */
1694
1695 static struct opt_info *
1696 analyze_insns_in_loop (struct loop *loop)
1697 {
1698   basic_block *body, bb;
1699   unsigned i, num_edges = 0;
1700   struct opt_info *opt_info = xcalloc (1, sizeof (struct opt_info));
1701   rtx insn;
1702   struct iv_to_split *ivts = NULL;
1703   struct var_to_expand *ves = NULL;
1704   PTR *slot1;
1705   PTR *slot2;
1706   edge *edges = get_loop_exit_edges (loop, &num_edges);
1707   bool can_apply = false;
1708
1709   iv_analysis_loop_init (loop);
1710
1711   body = get_loop_body (loop);
1712
1713   if (flag_split_ivs_in_unroller)
1714     opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1715                                             si_info_hash, si_info_eq, free);
1716
1717   /* Record the loop exit bb and loop preheader before the unrolling.  */
1718   if (!loop_preheader_edge (loop)->src)
1719     {
1720       loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1721       opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1722     }
1723   else
1724     opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1725
1726   if (num_edges == 1
1727       && !(edges[0]->flags & EDGE_COMPLEX))
1728     {
1729       opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
1730       can_apply = true;
1731     }
1732
1733   if (flag_variable_expansion_in_unroller
1734       && can_apply)
1735     opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1736                                                       ve_info_hash, ve_info_eq, free);
1737
1738   for (i = 0; i < loop->num_nodes; i++)
1739     {
1740       bb = body[i];
1741       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1742         continue;
1743
1744       FOR_BB_INSNS (bb, insn)
1745       {
1746         if (!INSN_P (insn))
1747           continue;
1748
1749         if (opt_info->insns_to_split)
1750           ivts = analyze_iv_to_split_insn (insn);
1751
1752         if (ivts)
1753           {
1754             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1755             *slot1 = ivts;
1756             continue;
1757           }
1758
1759         if (opt_info->insns_with_var_to_expand)
1760           ves = analyze_insn_to_expand_var (loop, insn);
1761
1762         if (ves)
1763           {
1764             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1765             *slot2 = ves;
1766           }
1767       }
1768     }
1769
1770   free (edges);
1771   free (body);
1772   return opt_info;
1773 }
1774
1775 /* Called just before loop duplication.  Records start of duplicated area
1776    to OPT_INFO.  */
1777
1778 static void
1779 opt_info_start_duplication (struct opt_info *opt_info)
1780 {
1781   if (opt_info)
1782     opt_info->first_new_block = last_basic_block;
1783 }
1784
1785 /* Determine the number of iterations between initialization of the base
1786    variable and the current copy (N_COPY).  N_COPIES is the total number
1787    of newly created copies.  UNROLLING is true if we are unrolling
1788    (not peeling) the loop.  */
1789
1790 static unsigned
1791 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1792 {
1793   if (unrolling)
1794     {
1795       /* If we are unrolling, initialization is done in the original loop
1796          body (number 0).  */
1797       return n_copy;
1798     }
1799   else
1800     {
1801       /* If we are peeling, the copy in that the initialization occurs has
1802          number 1.  The original loop (number 0) is the last.  */
1803       if (n_copy)
1804         return n_copy - 1;
1805       else
1806         return n_copies;
1807     }
1808 }
1809
1810 /* Locate in EXPR the expression corresponding to the location recorded
1811    in IVTS, and return a pointer to the RTX for this location.  */
1812
1813 static rtx *
1814 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1815 {
1816   unsigned i;
1817   rtx *ret = &expr;
1818
1819   for (i = 0; i < ivts->n_loc; i++)
1820     ret = &XEXP (*ret, ivts->loc[i]);
1821
1822   return ret;
1823 }
1824
1825 /* Allocate basic variable for the induction variable chain.  Callback for
1826    htab_traverse.  */
1827
1828 static int
1829 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1830 {
1831   struct iv_to_split *ivts = *slot;
1832   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1833
1834   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1835
1836   return 1;
1837 }
1838
1839 /* Insert initialization of basic variable of IVTS before INSN, taking
1840    the initial value from INSN.  */
1841
1842 static void
1843 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1844 {
1845   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1846   rtx seq;
1847
1848   start_sequence ();
1849   expr = force_operand (expr, ivts->base_var);
1850   if (expr != ivts->base_var)
1851     emit_move_insn (ivts->base_var, expr);
1852   seq = get_insns ();
1853   end_sequence ();
1854
1855   emit_insn_before (seq, insn);
1856 }
1857
1858 /* Replace the use of induction variable described in IVTS in INSN
1859    by base variable + DELTA * step.  */
1860
1861 static void
1862 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1863 {
1864   rtx expr, *loc, seq, incr, var;
1865   enum machine_mode mode = GET_MODE (ivts->base_var);
1866   rtx src, dest, set;
1867
1868   /* Construct base + DELTA * step.  */
1869   if (!delta)
1870     expr = ivts->base_var;
1871   else
1872     {
1873       incr = simplify_gen_binary (MULT, mode,
1874                                   ivts->step, gen_int_mode (delta, mode));
1875       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1876                                   ivts->base_var, incr);
1877     }
1878
1879   /* Figure out where to do the replacement.  */
1880   loc = get_ivts_expr (single_set (insn), ivts);
1881
1882   /* If we can make the replacement right away, we're done.  */
1883   if (validate_change (insn, loc, expr, 0))
1884     return;
1885
1886   /* Otherwise, force EXPR into a register and try again.  */
1887   start_sequence ();
1888   var = gen_reg_rtx (mode);
1889   expr = force_operand (expr, var);
1890   if (expr != var)
1891     emit_move_insn (var, expr);
1892   seq = get_insns ();
1893   end_sequence ();
1894   emit_insn_before (seq, insn);
1895
1896   if (validate_change (insn, loc, var, 0))
1897     return;
1898
1899   /* The last chance.  Try recreating the assignment in insn
1900      completely from scratch.  */
1901   set = single_set (insn);
1902   gcc_assert (set);
1903
1904   start_sequence ();
1905   *loc = var;
1906   src = copy_rtx (SET_SRC (set));
1907   dest = copy_rtx (SET_DEST (set));
1908   src = force_operand (src, dest);
1909   if (src != dest)
1910     emit_move_insn (dest, src);
1911   seq = get_insns ();
1912   end_sequence ();
1913
1914   emit_insn_before (seq, insn);
1915   delete_insn (insn);
1916 }
1917
1918
1919 /* Return one expansion of the accumulator recorded in struct VE.  */
1920
1921 static rtx
1922 get_expansion (struct var_to_expand *ve)
1923 {
1924   rtx reg;
1925
1926   if (ve->reuse_expansion == 0)
1927     reg = ve->reg;
1928   else
1929     reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1930
1931   if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1932     ve->reuse_expansion = 0;
1933   else
1934     ve->reuse_expansion++;
1935
1936   return reg;
1937 }
1938
1939
1940 /* Given INSN replace the uses of the accumulator recorded in VE
1941    with a new register.  */
1942
1943 static void
1944 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1945 {
1946   rtx new_reg, set;
1947   bool really_new_expansion = false;
1948
1949   set = single_set (insn);
1950   gcc_assert (set);
1951
1952   /* Generate a new register only if the expansion limit has not been
1953      reached.  Else reuse an already existing expansion.  */
1954   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1955     {
1956       really_new_expansion = true;
1957       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1958     }
1959   else
1960     new_reg = get_expansion (ve);
1961
1962   validate_change (insn, &SET_DEST (set), new_reg, 1);
1963   validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1964
1965   if (apply_change_group ())
1966     if (really_new_expansion)
1967       {
1968         VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
1969         ve->expansion_count++;
1970       }
1971 }
1972
1973 /* Initialize the variable expansions in loop preheader.
1974    Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1975    basic block where the initialization of the expansions
1976    should take place.  */
1977
1978 static int
1979 insert_var_expansion_initialization (void **slot, void *place_p)
1980 {
1981   struct var_to_expand *ve = *slot;
1982   basic_block place = (basic_block)place_p;
1983   rtx seq, var, zero_init, insn;
1984   unsigned i;
1985
1986   if (VEC_length (rtx, ve->var_expansions) == 0)
1987     return 1;
1988
1989   start_sequence ();
1990   if (ve->op == PLUS || ve->op == MINUS)
1991     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1992       {
1993         zero_init =  CONST0_RTX (GET_MODE (var));
1994         emit_move_insn (var, zero_init);
1995       }
1996   else if (ve->op == MULT)
1997     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1998       {
1999         zero_init =  CONST1_RTX (GET_MODE (var));
2000         emit_move_insn (var, zero_init);
2001       }
2002
2003   seq = get_insns ();
2004   end_sequence ();
2005
2006   insn = BB_HEAD (place);
2007   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2008     insn = NEXT_INSN (insn);
2009
2010   emit_insn_after (seq, insn);
2011   /* Continue traversing the hash table.  */
2012   return 1;
2013 }
2014
2015 /*  Combine the variable expansions at the loop exit.
2016     Callbacks for htab_traverse.  PLACE_P is the loop exit
2017     basic block where the summation of the expansions should
2018     take place.  */
2019
2020 static int
2021 combine_var_copies_in_loop_exit (void **slot, void *place_p)
2022 {
2023   struct var_to_expand *ve = *slot;
2024   basic_block place = (basic_block)place_p;
2025   rtx sum = ve->reg;
2026   rtx expr, seq, var, insn;
2027   unsigned i;
2028
2029   if (VEC_length (rtx, ve->var_expansions) == 0)
2030     return 1;
2031
2032   start_sequence ();
2033   if (ve->op == PLUS || ve->op == MINUS)
2034     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2035       {
2036         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2037                                    var, sum);
2038       }
2039   else if (ve->op == MULT)
2040     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2041       {
2042         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2043                                    var, sum);
2044       }
2045
2046   expr = force_operand (sum, ve->reg);
2047   if (expr != ve->reg)
2048     emit_move_insn (ve->reg, expr);
2049   seq = get_insns ();
2050   end_sequence ();
2051
2052   insn = BB_HEAD (place);
2053   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2054     insn = NEXT_INSN (insn);
2055
2056   emit_insn_after (seq, insn);
2057
2058   /* Continue traversing the hash table.  */
2059   return 1;
2060 }
2061
2062 /* Apply loop optimizations in loop copies using the
2063    data which gathered during the unrolling.  Structure
2064    OPT_INFO record that data.
2065
2066    UNROLLING is true if we unrolled (not peeled) the loop.
2067    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2068    the loop (as it should happen in complete unrolling, but not in ordinary
2069    peeling of the loop).  */
2070
2071 static void
2072 apply_opt_in_copies (struct opt_info *opt_info,
2073                      unsigned n_copies, bool unrolling,
2074                      bool rewrite_original_loop)
2075 {
2076   unsigned i, delta;
2077   basic_block bb, orig_bb;
2078   rtx insn, orig_insn, next;
2079   struct iv_to_split ivts_templ, *ivts;
2080   struct var_to_expand ve_templ, *ves;
2081
2082   /* Sanity check -- we need to put initialization in the original loop
2083      body.  */
2084   gcc_assert (!unrolling || rewrite_original_loop);
2085
2086   /* Allocate the basic variables (i0).  */
2087   if (opt_info->insns_to_split)
2088     htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2089
2090   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2091     {
2092       bb = BASIC_BLOCK (i);
2093       orig_bb = get_bb_original (bb);
2094
2095       /* bb->aux holds position in copy sequence initialized by
2096          duplicate_loop_to_header_edge.  */
2097       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2098                                         unrolling);
2099       bb->aux = 0;
2100       orig_insn = BB_HEAD (orig_bb);
2101       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2102         {
2103           next = NEXT_INSN (insn);
2104           if (!INSN_P (insn))
2105             continue;
2106
2107           while (!INSN_P (orig_insn))
2108             orig_insn = NEXT_INSN (orig_insn);
2109
2110           ivts_templ.insn = orig_insn;
2111           ve_templ.insn = orig_insn;
2112
2113           /* Apply splitting iv optimization.  */
2114           if (opt_info->insns_to_split)
2115             {
2116               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2117
2118               if (ivts)
2119                 {
2120                   gcc_assert (GET_CODE (PATTERN (insn))
2121                               == GET_CODE (PATTERN (orig_insn)));
2122
2123                   if (!delta)
2124                     insert_base_initialization (ivts, insn);
2125                   split_iv (ivts, insn, delta);
2126                 }
2127             }
2128           /* Apply variable expansion optimization.  */
2129           if (unrolling && opt_info->insns_with_var_to_expand)
2130             {
2131               ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2132               if (ves)
2133                 {
2134                   gcc_assert (GET_CODE (PATTERN (insn))
2135                               == GET_CODE (PATTERN (orig_insn)));
2136                   expand_var_during_unrolling (ves, insn);
2137                 }
2138             }
2139           orig_insn = NEXT_INSN (orig_insn);
2140         }
2141     }
2142
2143   if (!rewrite_original_loop)
2144     return;
2145
2146   /* Initialize the variable expansions in the loop preheader
2147      and take care of combining them at the loop exit.  */
2148   if (opt_info->insns_with_var_to_expand)
2149     {
2150       htab_traverse (opt_info->insns_with_var_to_expand,
2151                      insert_var_expansion_initialization,
2152                      opt_info->loop_preheader);
2153       htab_traverse (opt_info->insns_with_var_to_expand,
2154                      combine_var_copies_in_loop_exit,
2155                      opt_info->loop_exit);
2156     }
2157
2158   /* Rewrite also the original loop body.  Find them as originals of the blocks
2159      in the last copied iteration, i.e. those that have
2160      get_bb_copy (get_bb_original (bb)) == bb.  */
2161   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2162     {
2163       bb = BASIC_BLOCK (i);
2164       orig_bb = get_bb_original (bb);
2165       if (get_bb_copy (orig_bb) != bb)
2166         continue;
2167
2168       delta = determine_split_iv_delta (0, n_copies, unrolling);
2169       for (orig_insn = BB_HEAD (orig_bb);
2170            orig_insn != NEXT_INSN (BB_END (bb));
2171            orig_insn = next)
2172         {
2173           next = NEXT_INSN (orig_insn);
2174
2175           if (!INSN_P (orig_insn))
2176             continue;
2177
2178           ivts_templ.insn = orig_insn;
2179           if (opt_info->insns_to_split)
2180             {
2181               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2182               if (ivts)
2183                 {
2184                   if (!delta)
2185                     insert_base_initialization (ivts, orig_insn);
2186                   split_iv (ivts, orig_insn, delta);
2187                   continue;
2188                 }
2189             }
2190
2191         }
2192     }
2193 }
2194
2195 /*  Release the data structures used for the variable expansion
2196     optimization.  Callbacks for htab_traverse.  */
2197
2198 static int
2199 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2200 {
2201   struct var_to_expand *ve = *slot;
2202
2203   VEC_free (rtx, heap, ve->var_expansions);
2204
2205   /* Continue traversing the hash table.  */
2206   return 1;
2207 }
2208
2209 /* Release OPT_INFO.  */
2210
2211 static void
2212 free_opt_info (struct opt_info *opt_info)
2213 {
2214   if (opt_info->insns_to_split)
2215     htab_delete (opt_info->insns_to_split);
2216   if (opt_info->insns_with_var_to_expand)
2217     {
2218       htab_traverse (opt_info->insns_with_var_to_expand,
2219                      release_var_copies, NULL);
2220       htab_delete (opt_info->insns_with_var_to_expand);
2221     }
2222   free (opt_info);
2223 }