gcc/loop-unroll.c

   1 /* Loop unrolling and peeling.
   2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 2, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "basic-block.h"
  29 #include "cfgloop.h"
  30 #include "cfglayout.h"
  31 #include "params.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "hashtab.h"
  35 #include "recog.h"
  36
  37 /* This pass performs loop unrolling and peeling.  We only perform these
  38    optimizations on innermost loops (with single exception) because
  39    the impact on performance is greatest here, and we want to avoid
  40    unnecessary code size growth.  The gain is caused by greater sequentiality
  41    of code, better code to optimize for further passes and in some cases
  42    by fewer testings of exit conditions.  The main problem is code growth,
  43    that impacts performance negatively due to effect of caches.
  44
  45    What we do:
  46
  47    -- complete peeling of once-rolling loops; this is the above mentioned
  48       exception, as this causes loop to be cancelled completely and
  49       does not cause code growth
  50    -- complete peeling of loops that roll (small) constant times.
  51    -- simple peeling of first iterations of loops that do not roll much
  52       (according to profile feedback)
  53    -- unrolling of loops that roll constant times; this is almost always
  54       win, as we get rid of exit condition tests.
  55    -- unrolling of loops that roll number of times that we can compute
  56       in runtime; we also get rid of exit condition tests here, but there
  57       is the extra expense for calculating the number of iterations
  58    -- simple unrolling of remaining loops; this is performed only if we
  59       are asked to, as the gain is questionable in this case and often
  60       it may even slow down the code
  61    For more detailed descriptions of each of those, see comments at
  62    appropriate function below.
  63
  64    There is a lot of parameters (defined and described in params.def) that
  65    control how much we unroll/peel.
  66
  67    ??? A great problem is that we don't have a good way how to determine
  68    how many times we should unroll the loop; the experiments I have made
  69    showed that this choice may affect performance in order of several %.
  70    */
  71
  72 /* Information about induction variables to split.  */
  73
  74 struct iv_to_split
  75 {
  76   rtx insn;             /* The insn in that the induction variable occurs.  */
  77   rtx base_var;         /* The variable on that the values in the further
  78                            iterations are based.  */
  79   rtx step;             /* Step of the induction variable.  */
  80   unsigned n_loc;
  81   unsigned loc[3];      /* Location where the definition of the induction
  82                            variable occurs in the insn.  For example if
  83                            N_LOC is 2, the expression is located at
  84                            XEXP (XEXP (single_set, loc[0]), loc[1]).  */
  85 };
  86
  87 DEF_VEC_P(rtx);
  88 DEF_VEC_ALLOC_P(rtx,heap);
  89
  90 /* Information about accumulators to expand.  */
  91
  92 struct var_to_expand
  93 {
  94   rtx insn;                        /* The insn in that the variable expansion occurs.  */
  95   rtx reg;                         /* The accumulator which is expanded.  */
  96   VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
  97   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  98                                       or multiplication.  */
  99   int expansion_count;             /* Count the number of expansions generated so far.  */
 100   int reuse_expansion;             /* The expansion we intend to reuse to expand
 101                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
 102                                       the original accumulator.  Else use
 103                                       var_expansions[REUSE_EXPANSION - 1].  */
 104 };
 105
 106 /* Information about optimization applied in
 107    the unrolled loop.  */
 108
 109 struct opt_info
 110 {
 111   htab_t insns_to_split;           /* A hashtable of insns to split.  */
 112   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
 113                                       to expand.  */
 114   unsigned first_new_block;        /* The first basic block that was
 115                                       duplicated.  */
 116   basic_block loop_exit;           /* The loop exit basic block.  */
 117   basic_block loop_preheader;      /* The loop preheader basic block.  */
 118 };
 119
 120 static void decide_unrolling_and_peeling (struct loops *, int);
 121 static void peel_loops_completely (struct loops *, int);
 122 static void decide_peel_simple (struct loop *, int);
 123 static void decide_peel_once_rolling (struct loop *, int);
 124 static void decide_peel_completely (struct loop *, int);
 125 static void decide_unroll_stupid (struct loop *, int);
 126 static void decide_unroll_constant_iterations (struct loop *, int);
 127 static void decide_unroll_runtime_iterations (struct loop *, int);
 128 static void peel_loop_simple (struct loops *, struct loop *);
 129 static void peel_loop_completely (struct loops *, struct loop *);
 130 static void unroll_loop_stupid (struct loops *, struct loop *);
 131 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
 132 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
 133 static struct opt_info *analyze_insns_in_loop (struct loop *);
 134 static void opt_info_start_duplication (struct opt_info *);
 135 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 136 static void free_opt_info (struct opt_info *);
 137 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
 138 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
 139 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
 140 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
 141 static int insert_var_expansion_initialization (void **, void *);
 142 static int combine_var_copies_in_loop_exit (void **, void *);
 143 static int release_var_copies (void **, void *);
 144 static rtx get_expansion (struct var_to_expand *);
 145
 146 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 147 void
 148 unroll_and_peel_loops (struct loops *loops, int flags)
 149 {
 150   struct loop *loop, *next;
 151   bool check;
 152
 153   /* First perform complete loop peeling (it is almost surely a win,
 154      and affects parameters for further decision a lot).  */
 155   peel_loops_completely (loops, flags);
 156
 157   /* Now decide rest of unrolling and peeling.  */
 158   decide_unrolling_and_peeling (loops, flags);
 159
 160   loop = loops->tree_root;
 161   while (loop->inner)
 162     loop = loop->inner;
 163
 164   /* Scan the loops, inner ones first.  */
 165   while (loop != loops->tree_root)
 166     {
 167       if (loop->next)
 168         {
 169           next = loop->next;
 170           while (next->inner)
 171             next = next->inner;
 172         }
 173       else
 174         next = loop->outer;
 175
 176       check = true;
 177       /* And perform the appropriate transformations.  */
 178       switch (loop->lpt_decision.decision)
 179         {
 180         case LPT_PEEL_COMPLETELY:
 181           /* Already done.  */
 182           gcc_unreachable ();
 183         case LPT_PEEL_SIMPLE:
 184           peel_loop_simple (loops, loop);
 185           break;
 186         case LPT_UNROLL_CONSTANT:
 187           unroll_loop_constant_iterations (loops, loop);
 188           break;
 189         case LPT_UNROLL_RUNTIME:
 190           unroll_loop_runtime_iterations (loops, loop);
 191           break;
 192         case LPT_UNROLL_STUPID:
 193           unroll_loop_stupid (loops, loop);
 194           break;
 195         case LPT_NONE:
 196           check = false;
 197           break;
 198         default:
 199           gcc_unreachable ();
 200         }
 201       if (check)
 202         {
 203 #ifdef ENABLE_CHECKING
 204           verify_dominators (CDI_DOMINATORS);
 205           verify_loop_structure (loops);
 206 #endif
 207         }
 208       loop = next;
 209     }
 210
 211   iv_analysis_done ();
 212 }
 213
 214 /* Check whether exit of the LOOP is at the end of loop body.  */
 215
 216 static bool
 217 loop_exit_at_end_p (struct loop *loop)
 218 {
 219   struct niter_desc *desc = get_simple_loop_desc (loop);
 220   rtx insn;
 221
 222   if (desc->in_edge->dest != loop->latch)
 223     return false;
 224
 225   /* Check that the latch is empty.  */
 226   FOR_BB_INSNS (loop->latch, insn)
 227     {
 228       if (INSN_P (insn))
 229         return false;
 230     }
 231
 232   return true;
 233 }
 234
 235 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
 236 static void
 237 peel_loops_completely (struct loops *loops, int flags)
 238 {
 239   struct loop *loop, *next;
 240
 241   loop = loops->tree_root;
 242   while (loop->inner)
 243     loop = loop->inner;
 244
 245   while (loop != loops->tree_root)
 246     {
 247       if (loop->next)
 248         {
 249           next = loop->next;
 250           while (next->inner)
 251             next = next->inner;
 252         }
 253       else
 254         next = loop->outer;
 255
 256       loop->lpt_decision.decision = LPT_NONE;
 257
 258       if (dump_file)
 259         fprintf (dump_file,
 260                  "\n;; *** Considering loop %d for complete peeling ***\n",
 261                  loop->num);
 262
 263       loop->ninsns = num_loop_insns (loop);
 264
 265       decide_peel_once_rolling (loop, flags);
 266       if (loop->lpt_decision.decision == LPT_NONE)
 267         decide_peel_completely (loop, flags);
 268
 269       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
 270         {
 271           peel_loop_completely (loops, loop);
 272 #ifdef ENABLE_CHECKING
 273           verify_dominators (CDI_DOMINATORS);
 274           verify_loop_structure (loops);
 275 #endif
 276         }
 277       loop = next;
 278     }
 279 }
 280
 281 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
 282 static void
 283 decide_unrolling_and_peeling (struct loops *loops, int flags)
 284 {
 285   struct loop *loop = loops->tree_root, *next;
 286
 287   while (loop->inner)
 288     loop = loop->inner;
 289
 290   /* Scan the loops, inner ones first.  */
 291   while (loop != loops->tree_root)
 292     {
 293       if (loop->next)
 294         {
 295           next = loop->next;
 296           while (next->inner)
 297             next = next->inner;
 298         }
 299       else
 300         next = loop->outer;
 301
 302       loop->lpt_decision.decision = LPT_NONE;
 303
 304       if (dump_file)
 305         fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
 306
 307       /* Do not peel cold areas.  */
 308       if (!maybe_hot_bb_p (loop->header))
 309         {
 310           if (dump_file)
 311             fprintf (dump_file, ";; Not considering loop, cold area\n");
 312           loop = next;
 313           continue;
 314         }
 315
 316       /* Can the loop be manipulated?  */
 317       if (!can_duplicate_loop_p (loop))
 318         {
 319           if (dump_file)
 320             fprintf (dump_file,
 321                      ";; Not considering loop, cannot duplicate\n");
 322           loop = next;
 323           continue;
 324         }
 325
 326       /* Skip non-innermost loops.  */
 327       if (loop->inner)
 328         {
 329           if (dump_file)
 330             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 331           loop = next;
 332           continue;
 333         }
 334
 335       loop->ninsns = num_loop_insns (loop);
 336       loop->av_ninsns = average_num_loop_insns (loop);
 337
 338       /* Try transformations one by one in decreasing order of
 339          priority.  */
 340
 341       decide_unroll_constant_iterations (loop, flags);
 342       if (loop->lpt_decision.decision == LPT_NONE)
 343         decide_unroll_runtime_iterations (loop, flags);
 344       if (loop->lpt_decision.decision == LPT_NONE)
 345         decide_unroll_stupid (loop, flags);
 346       if (loop->lpt_decision.decision == LPT_NONE)
 347         decide_peel_simple (loop, flags);
 348
 349       loop = next;
 350     }
 351 }
 352
 353 /* Decide whether the LOOP is once rolling and suitable for complete
 354    peeling.  */
 355 static void
 356 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 357 {
 358   struct niter_desc *desc;
 359
 360   if (dump_file)
 361     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 362
 363   /* Is the loop small enough?  */
 364   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
 365     {
 366       if (dump_file)
 367         fprintf (dump_file, ";; Not considering loop, is too big\n");
 368       return;
 369     }
 370
 371   /* Check for simple loops.  */
 372   desc = get_simple_loop_desc (loop);
 373
 374   /* Check number of iterations.  */
 375   if (!desc->simple_p
 376       || desc->assumptions
 377       || desc->infinite
 378       || !desc->const_iter
 379       || desc->niter != 0)
 380     {
 381       if (dump_file)
 382         fprintf (dump_file,
 383                  ";; Unable to prove that the loop rolls exactly once\n");
 384       return;
 385     }
 386
 387   /* Success.  */
 388   if (dump_file)
 389     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
 390   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 391 }
 392
 393 /* Decide whether the LOOP is suitable for complete peeling.  */
 394 static void
 395 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 396 {
 397   unsigned npeel;
 398   struct niter_desc *desc;
 399
 400   if (dump_file)
 401     fprintf (dump_file, "\n;; Considering peeling completely\n");
 402
 403   /* Skip non-innermost loops.  */
 404   if (loop->inner)
 405     {
 406       if (dump_file)
 407         fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 408       return;
 409     }
 410
 411   /* Do not peel cold areas.  */
 412   if (!maybe_hot_bb_p (loop->header))
 413     {
 414       if (dump_file)
 415         fprintf (dump_file, ";; Not considering loop, cold area\n");
 416       return;
 417     }
 418
 419   /* Can the loop be manipulated?  */
 420   if (!can_duplicate_loop_p (loop))
 421     {
 422       if (dump_file)
 423         fprintf (dump_file,
 424                  ";; Not considering loop, cannot duplicate\n");
 425       return;
 426     }
 427
 428   /* npeel = number of iterations to peel.  */
 429   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
 430   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 431     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 432
 433   /* Is the loop small enough?  */
 434   if (!npeel)
 435     {
 436       if (dump_file)
 437         fprintf (dump_file, ";; Not considering loop, is too big\n");
 438       return;
 439     }
 440
 441   /* Check for simple loops.  */
 442   desc = get_simple_loop_desc (loop);
 443
 444   /* Check number of iterations.  */
 445   if (!desc->simple_p
 446       || desc->assumptions
 447       || !desc->const_iter
 448       || desc->infinite)
 449     {
 450       if (dump_file)
 451         fprintf (dump_file,
 452                  ";; Unable to prove that the loop iterates constant times\n");
 453       return;
 454     }
 455
 456   if (desc->niter > npeel - 1)
 457     {
 458       if (dump_file)
 459         {
 460           fprintf (dump_file,
 461                    ";; Not peeling loop completely, rolls too much (");
 462           fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
 463           fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
 464         }
 465       return;
 466     }
 467
 468   /* Success.  */
 469   if (dump_file)
 470     fprintf (dump_file, ";; Decided to peel loop completely\n");
 471   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 472 }
 473
 474 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
 475    completely.  The transformation done:
 476
 477    for (i = 0; i < 4; i++)
 478      body;
 479
 480    ==>
 481
 482    i = 0;
 483    body; i++;
 484    body; i++;
 485    body; i++;
 486    body; i++;
 487    */
 488 static void
 489 peel_loop_completely (struct loops *loops, struct loop *loop)
 490 {
 491   sbitmap wont_exit;
 492   unsigned HOST_WIDE_INT npeel;
 493   unsigned n_remove_edges, i;
 494   edge *remove_edges, ein;
 495   struct niter_desc *desc = get_simple_loop_desc (loop);
 496   struct opt_info *opt_info = NULL;
 497
 498   npeel = desc->niter;
 499
 500   if (npeel)
 501     {
 502       bool ok;
 503
 504       wont_exit = sbitmap_alloc (npeel + 1);
 505       sbitmap_ones (wont_exit);
 506       RESET_BIT (wont_exit, 0);
 507       if (desc->noloop_assumptions)
 508         RESET_BIT (wont_exit, 1);
 509
 510       remove_edges = XCNEWVEC (edge, npeel);
 511       n_remove_edges = 0;
 512
 513       if (flag_split_ivs_in_unroller)
 514         opt_info = analyze_insns_in_loop (loop);
 515
 516       opt_info_start_duplication (opt_info);
 517       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 518                                           loops, npeel,
 519                                           wont_exit, desc->out_edge,
 520                                           remove_edges, &n_remove_edges,
 521                                           DLTHE_FLAG_UPDATE_FREQ
 522                                           | DLTHE_FLAG_COMPLETTE_PEEL
 523                                           | (opt_info
 524                                              ? DLTHE_RECORD_COPY_NUMBER : 0));
 525       gcc_assert (ok);
 526
 527       free (wont_exit);
 528
 529       if (opt_info)
 530         {
 531           apply_opt_in_copies (opt_info, npeel, false, true);
 532           free_opt_info (opt_info);
 533         }
 534
 535       /* Remove the exit edges.  */
 536       for (i = 0; i < n_remove_edges; i++)
 537         remove_path (loops, remove_edges[i]);
 538       free (remove_edges);
 539     }
 540
 541   ein = desc->in_edge;
 542   free_simple_loop_desc (loop);
 543
 544   /* Now remove the unreachable part of the last iteration and cancel
 545      the loop.  */
 546   remove_path (loops, ein);
 547
 548   if (dump_file)
 549     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
 550 }
 551
 552 /* Decide whether to unroll LOOP iterating constant number of times
 553    and how much.  */
 554
 555 static void
 556 decide_unroll_constant_iterations (struct loop *loop, int flags)
 557 {
 558   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 559   struct niter_desc *desc;
 560
 561   if (!(flags & UAP_UNROLL))
 562     {
 563       /* We were not asked to, just return back silently.  */
 564       return;
 565     }
 566
 567   if (dump_file)
 568     fprintf (dump_file,
 569              "\n;; Considering unrolling loop with constant "
 570              "number of iterations\n");
 571
 572   /* nunroll = total number of copies of the original loop body in
 573      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 574   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 575   nunroll_by_av
 576     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 577   if (nunroll > nunroll_by_av)
 578     nunroll = nunroll_by_av;
 579   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 580     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 581
 582   /* Skip big loops.  */
 583   if (nunroll <= 1)
 584     {
 585       if (dump_file)
 586         fprintf (dump_file, ";; Not considering loop, is too big\n");
 587       return;
 588     }
 589
 590   /* Check for simple loops.  */
 591   desc = get_simple_loop_desc (loop);
 592
 593   /* Check number of iterations.  */
 594   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 595     {
 596       if (dump_file)
 597         fprintf (dump_file,
 598                  ";; Unable to prove that the loop iterates constant times\n");
 599       return;
 600     }
 601
 602   /* Check whether the loop rolls enough to consider.  */
 603   if (desc->niter < 2 * nunroll)
 604     {
 605       if (dump_file)
 606         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 607       return;
 608     }
 609
 610   /* Success; now compute number of iterations to unroll.  We alter
 611      nunroll so that as few as possible copies of loop body are
 612      necessary, while still not decreasing the number of unrollings
 613      too much (at most by 1).  */
 614   best_copies = 2 * nunroll + 10;
 615
 616   i = 2 * nunroll + 2;
 617   if (i - 1 >= desc->niter)
 618     i = desc->niter - 2;
 619
 620   for (; i >= nunroll - 1; i--)
 621     {
 622       unsigned exit_mod = desc->niter % (i + 1);
 623
 624       if (!loop_exit_at_end_p (loop))
 625         n_copies = exit_mod + i + 1;
 626       else if (exit_mod != (unsigned) i
 627                || desc->noloop_assumptions != NULL_RTX)
 628         n_copies = exit_mod + i + 2;
 629       else
 630         n_copies = i + 1;
 631
 632       if (n_copies < best_copies)
 633         {
 634           best_copies = n_copies;
 635           best_unroll = i;
 636         }
 637     }
 638
 639   if (dump_file)
 640     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
 641              best_unroll + 1, best_copies, nunroll);
 642
 643   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 644   loop->lpt_decision.times = best_unroll;
 645
 646   if (dump_file)
 647     fprintf (dump_file,
 648              ";; Decided to unroll the constant times rolling loop, %d times.\n",
 649              loop->lpt_decision.times);
 650 }
 651
 652 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
 653    times.  The transformation does this:
 654
 655    for (i = 0; i < 102; i++)
 656      body;
 657
 658    ==>
 659
 660    i = 0;
 661    body; i++;
 662    body; i++;
 663    while (i < 102)
 664      {
 665        body; i++;
 666        body; i++;
 667        body; i++;
 668        body; i++;
 669      }
 670   */
 671 static void
 672 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
 673 {
 674   unsigned HOST_WIDE_INT niter;
 675   unsigned exit_mod;
 676   sbitmap wont_exit;
 677   unsigned n_remove_edges, i;
 678   edge *remove_edges;
 679   unsigned max_unroll = loop->lpt_decision.times;
 680   struct niter_desc *desc = get_simple_loop_desc (loop);
 681   bool exit_at_end = loop_exit_at_end_p (loop);
 682   struct opt_info *opt_info = NULL;
 683   bool ok;
 684
 685   niter = desc->niter;
 686
 687   /* Should not get here (such loop should be peeled instead).  */
 688   gcc_assert (niter > max_unroll + 1);
 689
 690   exit_mod = niter % (max_unroll + 1);
 691
 692   wont_exit = sbitmap_alloc (max_unroll + 1);
 693   sbitmap_ones (wont_exit);
 694
 695   remove_edges = XCNEWVEC (edge, max_unroll + exit_mod + 1);
 696   n_remove_edges = 0;
 697   if (flag_split_ivs_in_unroller
 698       || flag_variable_expansion_in_unroller)
 699     opt_info = analyze_insns_in_loop (loop);
 700
 701   if (!exit_at_end)
 702     {
 703       /* The exit is not at the end of the loop; leave exit test
 704          in the first copy, so that the loops that start with test
 705          of exit condition have continuous body after unrolling.  */
 706
 707       if (dump_file)
 708         fprintf (dump_file, ";; Condition on beginning of loop.\n");
 709
 710       /* Peel exit_mod iterations.  */
 711       RESET_BIT (wont_exit, 0);
 712       if (desc->noloop_assumptions)
 713         RESET_BIT (wont_exit, 1);
 714
 715       if (exit_mod)
 716         {
 717           opt_info_start_duplication (opt_info);
 718           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 719                                               loops, exit_mod,
 720                                               wont_exit, desc->out_edge,
 721                                               remove_edges, &n_remove_edges,
 722                                               DLTHE_FLAG_UPDATE_FREQ
 723                                               | (opt_info && exit_mod > 1
 724                                                  ? DLTHE_RECORD_COPY_NUMBER
 725                                                    : 0));
 726           gcc_assert (ok);
 727
 728           if (opt_info && exit_mod > 1)
 729             apply_opt_in_copies (opt_info, exit_mod, false, false);
 730
 731           desc->noloop_assumptions = NULL_RTX;
 732           desc->niter -= exit_mod;
 733           desc->niter_max -= exit_mod;
 734         }
 735
 736       SET_BIT (wont_exit, 1);
 737     }
 738   else
 739     {
 740       /* Leave exit test in last copy, for the same reason as above if
 741          the loop tests the condition at the end of loop body.  */
 742
 743       if (dump_file)
 744         fprintf (dump_file, ";; Condition on end of loop.\n");
 745
 746       /* We know that niter >= max_unroll + 2; so we do not need to care of
 747          case when we would exit before reaching the loop.  So just peel
 748          exit_mod + 1 iterations.  */
 749       if (exit_mod != max_unroll
 750           || desc->noloop_assumptions)
 751         {
 752           RESET_BIT (wont_exit, 0);
 753           if (desc->noloop_assumptions)
 754             RESET_BIT (wont_exit, 1);
 755
 756           opt_info_start_duplication (opt_info);
 757           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 758                                               loops, exit_mod + 1,
 759                                               wont_exit, desc->out_edge,
 760                                               remove_edges, &n_remove_edges,
 761                                               DLTHE_FLAG_UPDATE_FREQ
 762                                               | (opt_info && exit_mod > 0
 763                                                  ? DLTHE_RECORD_COPY_NUMBER
 764                                                    : 0));
 765           gcc_assert (ok);
 766
 767           if (opt_info && exit_mod > 0)
 768             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 769
 770           desc->niter -= exit_mod + 1;
 771           desc->niter_max -= exit_mod + 1;
 772           desc->noloop_assumptions = NULL_RTX;
 773
 774           SET_BIT (wont_exit, 0);
 775           SET_BIT (wont_exit, 1);
 776         }
 777
 778       RESET_BIT (wont_exit, max_unroll);
 779     }
 780
 781   /* Now unroll the loop.  */
 782
 783   opt_info_start_duplication (opt_info);
 784   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 785                                       loops, max_unroll,
 786                                       wont_exit, desc->out_edge,
 787                                       remove_edges, &n_remove_edges,
 788                                       DLTHE_FLAG_UPDATE_FREQ
 789                                       | (opt_info
 790                                          ? DLTHE_RECORD_COPY_NUMBER
 791                                            : 0));
 792   gcc_assert (ok);
 793
 794   if (opt_info)
 795     {
 796       apply_opt_in_copies (opt_info, max_unroll, true, true);
 797       free_opt_info (opt_info);
 798     }
 799
 800   free (wont_exit);
 801
 802   if (exit_at_end)
 803     {
 804       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 805       /* Find a new in and out edge; they are in the last copy we have made.  */
 806
 807       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 808         {
 809           desc->out_edge = EDGE_SUCC (exit_block, 0);
 810           desc->in_edge = EDGE_SUCC (exit_block, 1);
 811         }
 812       else
 813         {
 814           desc->out_edge = EDGE_SUCC (exit_block, 1);
 815           desc->in_edge = EDGE_SUCC (exit_block, 0);
 816         }
 817     }
 818
 819   desc->niter /= max_unroll + 1;
 820   desc->niter_max /= max_unroll + 1;
 821   desc->niter_expr = GEN_INT (desc->niter);
 822
 823   /* Remove the edges.  */
 824   for (i = 0; i < n_remove_edges; i++)
 825     remove_path (loops, remove_edges[i]);
 826   free (remove_edges);
 827
 828   if (dump_file)
 829     fprintf (dump_file,
 830              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 831              max_unroll, num_loop_insns (loop));
 832 }
 833
 834 /* Decide whether to unroll LOOP iterating runtime computable number of times
 835    and how much.  */
 836 static void
 837 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 838 {
 839   unsigned nunroll, nunroll_by_av, i;
 840   struct niter_desc *desc;
 841
 842   if (!(flags & UAP_UNROLL))
 843     {
 844       /* We were not asked to, just return back silently.  */
 845       return;
 846     }
 847
 848   if (dump_file)
 849     fprintf (dump_file,
 850              "\n;; Considering unrolling loop with runtime "
 851              "computable number of iterations\n");
 852
 853   /* nunroll = total number of copies of the original loop body in
 854      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 855   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 856   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 857   if (nunroll > nunroll_by_av)
 858     nunroll = nunroll_by_av;
 859   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 860     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 861
 862   /* Skip big loops.  */
 863   if (nunroll <= 1)
 864     {
 865       if (dump_file)
 866         fprintf (dump_file, ";; Not considering loop, is too big\n");
 867       return;
 868     }
 869
 870   /* Check for simple loops.  */
 871   desc = get_simple_loop_desc (loop);
 872
 873   /* Check simpleness.  */
 874   if (!desc->simple_p || desc->assumptions)
 875     {
 876       if (dump_file)
 877         fprintf (dump_file,
 878                  ";; Unable to prove that the number of iterations "
 879                  "can be counted in runtime\n");
 880       return;
 881     }
 882
 883   if (desc->const_iter)
 884     {
 885       if (dump_file)
 886         fprintf (dump_file, ";; Loop iterates constant times\n");
 887       return;
 888     }
 889
 890   /* If we have profile feedback, check whether the loop rolls.  */
 891   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
 892     {
 893       if (dump_file)
 894         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 895       return;
 896     }
 897
 898   /* Success; now force nunroll to be power of 2, as we are unable to
 899      cope with overflows in computation of number of iterations.  */
 900   for (i = 1; 2 * i <= nunroll; i *= 2)
 901     continue;
 902
 903   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 904   loop->lpt_decision.times = i - 1;
 905
 906   if (dump_file)
 907     fprintf (dump_file,
 908              ";; Decided to unroll the runtime computable "
 909              "times rolling loop, %d times.\n",
 910              loop->lpt_decision.times);
 911 }
 912
 913 /* Unroll LOOP for that we are able to count number of iterations in runtime
 914    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
 915    extra care for case n < 0):
 916
 917    for (i = 0; i < n; i++)
 918      body;
 919
 920    ==>
 921
 922    i = 0;
 923    mod = n % 4;
 924
 925    switch (mod)
 926      {
 927        case 3:
 928          body; i++;
 929        case 2:
 930          body; i++;
 931        case 1:
 932          body; i++;
 933        case 0: ;
 934      }
 935
 936    while (i < n)
 937      {
 938        body; i++;
 939        body; i++;
 940        body; i++;
 941        body; i++;
 942      }
 943    */
 944 static void
 945 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
 946 {
 947   rtx old_niter, niter, init_code, branch_code, tmp;
 948   unsigned i, j, p;
 949   basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
 950   unsigned n_dom_bbs;
 951   sbitmap wont_exit;
 952   int may_exit_copy;
 953   unsigned n_peel, n_remove_edges;
 954   edge *remove_edges, e;
 955   bool extra_zero_check, last_may_exit;
 956   unsigned max_unroll = loop->lpt_decision.times;
 957   struct niter_desc *desc = get_simple_loop_desc (loop);
 958   bool exit_at_end = loop_exit_at_end_p (loop);
 959   struct opt_info *opt_info = NULL;
 960   bool ok;
 961
 962   if (flag_split_ivs_in_unroller
 963       || flag_variable_expansion_in_unroller)
 964     opt_info = analyze_insns_in_loop (loop);
 965
 966   /* Remember blocks whose dominators will have to be updated.  */
 967   dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
 968   n_dom_bbs = 0;
 969
 970   body = get_loop_body (loop);
 971   for (i = 0; i < loop->num_nodes; i++)
 972     {
 973       unsigned nldom;
 974       basic_block *ldom;
 975
 976       nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
 977       for (j = 0; j < nldom; j++)
 978         if (!flow_bb_inside_loop_p (loop, ldom[j]))
 979           dom_bbs[n_dom_bbs++] = ldom[j];
 980
 981       free (ldom);
 982     }
 983   free (body);
 984
 985   if (!exit_at_end)
 986     {
 987       /* Leave exit in first copy (for explanation why see comment in
 988          unroll_loop_constant_iterations).  */
 989       may_exit_copy = 0;
 990       n_peel = max_unroll - 1;
 991       extra_zero_check = true;
 992       last_may_exit = false;
 993     }
 994   else
 995     {
 996       /* Leave exit in last copy (for explanation why see comment in
 997          unroll_loop_constant_iterations).  */
 998       may_exit_copy = max_unroll;
 999       n_peel = max_unroll;
1000       extra_zero_check = false;
1001       last_may_exit = true;
1002     }
1003
1004   /* Get expression for number of iterations.  */
1005   start_sequence ();
1006   old_niter = niter = gen_reg_rtx (desc->mode);
1007   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
1008   if (tmp != niter)
1009     emit_move_insn (niter, tmp);
1010
1011   /* Count modulo by ANDing it with max_unroll; we use the fact that
1012      the number of unrollings is a power of two, and thus this is correct
1013      even if there is overflow in the computation.  */
1014   niter = expand_simple_binop (desc->mode, AND,
1015                                niter,
1016                                GEN_INT (max_unroll),
1017                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
1018
1019   init_code = get_insns ();
1020   end_sequence ();
1021
1022   /* Precondition the loop.  */
1023   loop_split_edge_with (loop_preheader_edge (loop), init_code);
1024
1025   remove_edges = XCNEWVEC (edge, max_unroll + n_peel + 1);
1026   n_remove_edges = 0;
1027
1028   wont_exit = sbitmap_alloc (max_unroll + 2);
1029
1030   /* Peel the first copy of loop body (almost always we must leave exit test
1031      here; the only exception is when we have extra zero check and the number
1032      of iterations is reliable.  Also record the place of (possible) extra
1033      zero check.  */
1034   sbitmap_zero (wont_exit);
1035   if (extra_zero_check
1036       && !desc->noloop_assumptions)
1037     SET_BIT (wont_exit, 1);
1038   ezc_swtch = loop_preheader_edge (loop)->src;
1039   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1040                                       loops, 1,
1041                                       wont_exit, desc->out_edge,
1042                                       remove_edges, &n_remove_edges,
1043                                       DLTHE_FLAG_UPDATE_FREQ);
1044   gcc_assert (ok);
1045
1046   /* Record the place where switch will be built for preconditioning.  */
1047   swtch = loop_split_edge_with (loop_preheader_edge (loop),
1048                                 NULL_RTX);
1049
1050   for (i = 0; i < n_peel; i++)
1051     {
1052       /* Peel the copy.  */
1053       sbitmap_zero (wont_exit);
1054       if (i != n_peel - 1 || !last_may_exit)
1055         SET_BIT (wont_exit, 1);
1056       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1057                                           loops, 1,
1058                                           wont_exit, desc->out_edge,
1059                                           remove_edges, &n_remove_edges,
1060                                           DLTHE_FLAG_UPDATE_FREQ);
1061       gcc_assert (ok);
1062
1063       /* Create item for switch.  */
1064       j = n_peel - i - (extra_zero_check ? 0 : 1);
1065       p = REG_BR_PROB_BASE / (i + 2);
1066
1067       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1068       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1069                                           block_label (preheader), p,
1070                                           NULL_RTX);
1071
1072       swtch = loop_split_edge_with (single_pred_edge (swtch), branch_code);
1073       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1074       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1075       e = make_edge (swtch, preheader,
1076                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1077       e->probability = p;
1078     }
1079
1080   if (extra_zero_check)
1081     {
1082       /* Add branch for zero iterations.  */
1083       p = REG_BR_PROB_BASE / (max_unroll + 1);
1084       swtch = ezc_swtch;
1085       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1086       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1087                                           block_label (preheader), p,
1088                                           NULL_RTX);
1089
1090       swtch = loop_split_edge_with (single_succ_edge (swtch), branch_code);
1091       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1092       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1093       e = make_edge (swtch, preheader,
1094                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1095       e->probability = p;
1096     }
1097
1098   /* Recount dominators for outer blocks.  */
1099   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1100
1101   /* And unroll loop.  */
1102
1103   sbitmap_ones (wont_exit);
1104   RESET_BIT (wont_exit, may_exit_copy);
1105   opt_info_start_duplication (opt_info);
1106
1107   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1108                                       loops, max_unroll,
1109                                       wont_exit, desc->out_edge,
1110                                       remove_edges, &n_remove_edges,
1111                                       DLTHE_FLAG_UPDATE_FREQ
1112                                       | (opt_info
1113                                          ? DLTHE_RECORD_COPY_NUMBER
1114                                            : 0));
1115   gcc_assert (ok);
1116
1117   if (opt_info)
1118     {
1119       apply_opt_in_copies (opt_info, max_unroll, true, true);
1120       free_opt_info (opt_info);
1121     }
1122
1123   free (wont_exit);
1124
1125   if (exit_at_end)
1126     {
1127       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1128       /* Find a new in and out edge; they are in the last copy we have
1129          made.  */
1130
1131       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1132         {
1133           desc->out_edge = EDGE_SUCC (exit_block, 0);
1134           desc->in_edge = EDGE_SUCC (exit_block, 1);
1135         }
1136       else
1137         {
1138           desc->out_edge = EDGE_SUCC (exit_block, 1);
1139           desc->in_edge = EDGE_SUCC (exit_block, 0);
1140         }
1141     }
1142
1143   /* Remove the edges.  */
1144   for (i = 0; i < n_remove_edges; i++)
1145     remove_path (loops, remove_edges[i]);
1146   free (remove_edges);
1147
1148   /* We must be careful when updating the number of iterations due to
1149      preconditioning and the fact that the value must be valid at entry
1150      of the loop.  After passing through the above code, we see that
1151      the correct new number of iterations is this:  */
1152   gcc_assert (!desc->const_iter);
1153   desc->niter_expr =
1154     simplify_gen_binary (UDIV, desc->mode, old_niter,
1155                          GEN_INT (max_unroll + 1));
1156   desc->niter_max /= max_unroll + 1;
1157   if (exit_at_end)
1158     {
1159       desc->niter_expr =
1160         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1161       desc->noloop_assumptions = NULL_RTX;
1162       desc->niter_max--;
1163     }
1164
1165   if (dump_file)
1166     fprintf (dump_file,
1167              ";; Unrolled loop %d times, counting # of iterations "
1168              "in runtime, %i insns\n",
1169              max_unroll, num_loop_insns (loop));
1170
1171   if (dom_bbs)
1172     free (dom_bbs);
1173 }
1174
1175 /* Decide whether to simply peel LOOP and how much.  */
1176 static void
1177 decide_peel_simple (struct loop *loop, int flags)
1178 {
1179   unsigned npeel;
1180   struct niter_desc *desc;
1181
1182   if (!(flags & UAP_PEEL))
1183     {
1184       /* We were not asked to, just return back silently.  */
1185       return;
1186     }
1187
1188   if (dump_file)
1189     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1190
1191   /* npeel = number of iterations to peel.  */
1192   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1193   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1194     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1195
1196   /* Skip big loops.  */
1197   if (!npeel)
1198     {
1199       if (dump_file)
1200         fprintf (dump_file, ";; Not considering loop, is too big\n");
1201       return;
1202     }
1203
1204   /* Check for simple loops.  */
1205   desc = get_simple_loop_desc (loop);
1206
1207   /* Check number of iterations.  */
1208   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1209     {
1210       if (dump_file)
1211         fprintf (dump_file, ";; Loop iterates constant times\n");
1212       return;
1213     }
1214
1215   /* Do not simply peel loops with branches inside -- it increases number
1216      of mispredicts.  */
1217   if (num_loop_branches (loop) > 1)
1218     {
1219       if (dump_file)
1220         fprintf (dump_file, ";; Not peeling, contains branches\n");
1221       return;
1222     }
1223
1224   if (loop->header->count)
1225     {
1226       unsigned niter = expected_loop_iterations (loop);
1227       if (niter + 1 > npeel)
1228         {
1229           if (dump_file)
1230             {
1231               fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1232               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1233                        (HOST_WIDEST_INT) (niter + 1));
1234               fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1235                        npeel);
1236             }
1237           return;
1238         }
1239       npeel = niter + 1;
1240     }
1241   else
1242     {
1243       /* For now we have no good heuristics to decide whether loop peeling
1244          will be effective, so disable it.  */
1245       if (dump_file)
1246         fprintf (dump_file,
1247                  ";; Not peeling loop, no evidence it will be profitable\n");
1248       return;
1249     }
1250
1251   /* Success.  */
1252   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1253   loop->lpt_decision.times = npeel;
1254
1255   if (dump_file)
1256     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1257              loop->lpt_decision.times);
1258 }
1259
1260 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1261    while (cond)
1262      body;
1263
1264    ==>
1265
1266    if (!cond) goto end;
1267    body;
1268    if (!cond) goto end;
1269    body;
1270    while (cond)
1271      body;
1272    end: ;
1273    */
1274 static void
1275 peel_loop_simple (struct loops *loops, struct loop *loop)
1276 {
1277   sbitmap wont_exit;
1278   unsigned npeel = loop->lpt_decision.times;
1279   struct niter_desc *desc = get_simple_loop_desc (loop);
1280   struct opt_info *opt_info = NULL;
1281   bool ok;
1282
1283   if (flag_split_ivs_in_unroller && npeel > 1)
1284     opt_info = analyze_insns_in_loop (loop);
1285
1286   wont_exit = sbitmap_alloc (npeel + 1);
1287   sbitmap_zero (wont_exit);
1288
1289   opt_info_start_duplication (opt_info);
1290
1291   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1292                                       loops, npeel, wont_exit,
1293                                       NULL, NULL,
1294                                       NULL, DLTHE_FLAG_UPDATE_FREQ
1295                                       | (opt_info
1296                                          ? DLTHE_RECORD_COPY_NUMBER
1297                                            : 0));
1298   gcc_assert (ok);
1299
1300   free (wont_exit);
1301
1302   if (opt_info)
1303     {
1304       apply_opt_in_copies (opt_info, npeel, false, false);
1305       free_opt_info (opt_info);
1306     }
1307
1308   if (desc->simple_p)
1309     {
1310       if (desc->const_iter)
1311         {
1312           desc->niter -= npeel;
1313           desc->niter_expr = GEN_INT (desc->niter);
1314           desc->noloop_assumptions = NULL_RTX;
1315         }
1316       else
1317         {
1318           /* We cannot just update niter_expr, as its value might be clobbered
1319              inside loop.  We could handle this by counting the number into
1320              temporary just like we do in runtime unrolling, but it does not
1321              seem worthwhile.  */
1322           free_simple_loop_desc (loop);
1323         }
1324     }
1325   if (dump_file)
1326     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1327 }
1328
1329 /* Decide whether to unroll LOOP stupidly and how much.  */
1330 static void
1331 decide_unroll_stupid (struct loop *loop, int flags)
1332 {
1333   unsigned nunroll, nunroll_by_av, i;
1334   struct niter_desc *desc;
1335
1336   if (!(flags & UAP_UNROLL_ALL))
1337     {
1338       /* We were not asked to, just return back silently.  */
1339       return;
1340     }
1341
1342   if (dump_file)
1343     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1344
1345   /* nunroll = total number of copies of the original loop body in
1346      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1347   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1348   nunroll_by_av
1349     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1350   if (nunroll > nunroll_by_av)
1351     nunroll = nunroll_by_av;
1352   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1353     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1354
1355   /* Skip big loops.  */
1356   if (nunroll <= 1)
1357     {
1358       if (dump_file)
1359         fprintf (dump_file, ";; Not considering loop, is too big\n");
1360       return;
1361     }
1362
1363   /* Check for simple loops.  */
1364   desc = get_simple_loop_desc (loop);
1365
1366   /* Check simpleness.  */
1367   if (desc->simple_p && !desc->assumptions)
1368     {
1369       if (dump_file)
1370         fprintf (dump_file, ";; The loop is simple\n");
1371       return;
1372     }
1373
1374   /* Do not unroll loops with branches inside -- it increases number
1375      of mispredicts.  */
1376   if (num_loop_branches (loop) > 1)
1377     {
1378       if (dump_file)
1379         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1380       return;
1381     }
1382
1383   /* If we have profile feedback, check whether the loop rolls.  */
1384   if (loop->header->count
1385       && expected_loop_iterations (loop) < 2 * nunroll)
1386     {
1387       if (dump_file)
1388         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1389       return;
1390     }
1391
1392   /* Success.  Now force nunroll to be power of 2, as it seems that this
1393      improves results (partially because of better alignments, partially
1394      because of some dark magic).  */
1395   for (i = 1; 2 * i <= nunroll; i *= 2)
1396     continue;
1397
1398   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1399   loop->lpt_decision.times = i - 1;
1400
1401   if (dump_file)
1402     fprintf (dump_file,
1403              ";; Decided to unroll the loop stupidly, %d times.\n",
1404              loop->lpt_decision.times);
1405 }
1406
1407 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1408    while (cond)
1409      body;
1410
1411    ==>
1412
1413    while (cond)
1414      {
1415        body;
1416        if (!cond) break;
1417        body;
1418        if (!cond) break;
1419        body;
1420        if (!cond) break;
1421        body;
1422      }
1423    */
1424 static void
1425 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1426 {
1427   sbitmap wont_exit;
1428   unsigned nunroll = loop->lpt_decision.times;
1429   struct niter_desc *desc = get_simple_loop_desc (loop);
1430   struct opt_info *opt_info = NULL;
1431   bool ok;
1432
1433   if (flag_split_ivs_in_unroller
1434       || flag_variable_expansion_in_unroller)
1435     opt_info = analyze_insns_in_loop (loop);
1436
1437
1438   wont_exit = sbitmap_alloc (nunroll + 1);
1439   sbitmap_zero (wont_exit);
1440   opt_info_start_duplication (opt_info);
1441
1442   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1443                                       loops, nunroll, wont_exit,
1444                                       NULL, NULL, NULL,
1445                                       DLTHE_FLAG_UPDATE_FREQ
1446                                       | (opt_info
1447                                          ? DLTHE_RECORD_COPY_NUMBER
1448                                            : 0));
1449   gcc_assert (ok);
1450
1451   if (opt_info)
1452     {
1453       apply_opt_in_copies (opt_info, nunroll, true, true);
1454       free_opt_info (opt_info);
1455     }
1456
1457   free (wont_exit);
1458
1459   if (desc->simple_p)
1460     {
1461       /* We indeed may get here provided that there are nontrivial assumptions
1462          for a loop to be really simple.  We could update the counts, but the
1463          problem is that we are unable to decide which exit will be taken
1464          (not really true in case the number of iterations is constant,
1465          but noone will do anything with this information, so we do not
1466          worry about it).  */
1467       desc->simple_p = false;
1468     }
1469
1470   if (dump_file)
1471     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1472              nunroll, num_loop_insns (loop));
1473 }
1474
1475 /* A hash function for information about insns to split.  */
1476
1477 static hashval_t
1478 si_info_hash (const void *ivts)
1479 {
1480   return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
1481 }
1482
1483 /* An equality functions for information about insns to split.  */
1484
1485 static int
1486 si_info_eq (const void *ivts1, const void *ivts2)
1487 {
1488   const struct iv_to_split *i1 = ivts1;
1489   const struct iv_to_split *i2 = ivts2;
1490
1491   return i1->insn == i2->insn;
1492 }
1493
1494 /* Return a hash for VES, which is really a "var_to_expand *".  */
1495
1496 static hashval_t
1497 ve_info_hash (const void *ves)
1498 {
1499   return htab_hash_pointer (((struct var_to_expand *) ves)->insn);
1500 }
1501
1502 /* Return true if IVTS1 and IVTS2 (which are really both of type
1503    "var_to_expand *") refer to the same instruction.  */
1504
1505 static int
1506 ve_info_eq (const void *ivts1, const void *ivts2)
1507 {
1508   const struct var_to_expand *i1 = ivts1;
1509   const struct var_to_expand *i2 = ivts2;
1510
1511   return i1->insn == i2->insn;
1512 }
1513
1514 /* Returns true if REG is referenced in one insn in LOOP.  */
1515
1516 bool
1517 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1518 {
1519   basic_block *body, bb;
1520   unsigned i;
1521   int count_ref = 0;
1522   rtx insn;
1523
1524   body = get_loop_body (loop);
1525   for (i = 0; i < loop->num_nodes; i++)
1526     {
1527       bb = body[i];
1528
1529       FOR_BB_INSNS (bb, insn)
1530       {
1531         if (rtx_referenced_p (reg, insn))
1532           count_ref++;
1533       }
1534     }
1535   return (count_ref  == 1);
1536 }
1537
1538 /* Determine whether INSN contains an accumulator
1539    which can be expanded into separate copies,
1540    one for each copy of the LOOP body.
1541
1542    for (i = 0 ; i < n; i++)
1543      sum += a[i];
1544
1545    ==>
1546
1547    sum += a[i]
1548    ....
1549    i = i+1;
1550    sum1 += a[i]
1551    ....
1552    i = i+1
1553    sum2 += a[i];
1554    ....
1555
1556    Return NULL if INSN contains no opportunity for expansion of accumulator.
1557    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1558    information and return a pointer to it.
1559 */
1560
1561 static struct var_to_expand *
1562 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1563 {
1564   rtx set, dest, src, op1;
1565   struct var_to_expand *ves;
1566   enum machine_mode mode1, mode2;
1567
1568   set = single_set (insn);
1569   if (!set)
1570     return NULL;
1571
1572   dest = SET_DEST (set);
1573   src = SET_SRC (set);
1574
1575   if (GET_CODE (src) != PLUS
1576       && GET_CODE (src) != MINUS
1577       && GET_CODE (src) != MULT)
1578     return NULL;
1579
1580   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1581      in MD.  But if there is no optab to generate the insn, we can not
1582      perform the variable expansion.  This can happen if an MD provides
1583      an insn but not a named pattern to generate it, for example to avoid
1584      producing code that needs additional mode switches like for x87/mmx.
1585
1586      So we check have_insn_for which looks for an optab for the operation
1587      in SRC.  If it doesn't exist, we can't perform the expansion even
1588      though INSN is valid.  */
1589   if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
1590     return NULL;
1591
1592   if (!XEXP (src, 0))
1593     return NULL;
1594
1595   op1 = XEXP (src, 0);
1596
1597   if (!REG_P (dest)
1598       && !(GET_CODE (dest) == SUBREG
1599            && REG_P (SUBREG_REG (dest))))
1600     return NULL;
1601
1602   if (!rtx_equal_p (dest, op1))
1603     return NULL;
1604
1605   if (!referenced_in_one_insn_in_loop_p (loop, dest))
1606     return NULL;
1607
1608   if (rtx_referenced_p (dest, XEXP (src, 1)))
1609     return NULL;
1610
1611   mode1 = GET_MODE (dest);
1612   mode2 = GET_MODE (XEXP (src, 1));
1613   if ((FLOAT_MODE_P (mode1)
1614        || FLOAT_MODE_P (mode2))
1615       && !flag_unsafe_math_optimizations)
1616     return NULL;
1617
1618   /* Record the accumulator to expand.  */
1619   ves = XNEW (struct var_to_expand);
1620   ves->insn = insn;
1621   ves->var_expansions = VEC_alloc (rtx, heap, 1);
1622   ves->reg = copy_rtx (dest);
1623   ves->op = GET_CODE (src);
1624   ves->expansion_count = 0;
1625   ves->reuse_expansion = 0;
1626   return ves;
1627 }
1628
1629 /* Determine whether there is an induction variable in INSN that
1630    we would like to split during unrolling.
1631
1632    I.e. replace
1633
1634    i = i + 1;
1635    ...
1636    i = i + 1;
1637    ...
1638    i = i + 1;
1639    ...
1640
1641    type chains by
1642
1643    i0 = i + 1
1644    ...
1645    i = i0 + 1
1646    ...
1647    i = i0 + 2
1648    ...
1649
1650    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1651    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1652    pointer to it.  */
1653
1654 static struct iv_to_split *
1655 analyze_iv_to_split_insn (rtx insn)
1656 {
1657   rtx set, dest;
1658   struct rtx_iv iv;
1659   struct iv_to_split *ivts;
1660   bool ok;
1661
1662   /* For now we just split the basic induction variables.  Later this may be
1663      extended for example by selecting also addresses of memory references.  */
1664   set = single_set (insn);
1665   if (!set)
1666     return NULL;
1667
1668   dest = SET_DEST (set);
1669   if (!REG_P (dest))
1670     return NULL;
1671
1672   if (!biv_p (insn, dest))
1673     return NULL;
1674
1675   ok = iv_analyze_result (insn, dest, &iv);
1676   gcc_assert (ok);
1677
1678   if (iv.step == const0_rtx
1679       || iv.mode != iv.extend_mode)
1680     return NULL;
1681
1682   /* Record the insn to split.  */
1683   ivts = XNEW (struct iv_to_split);
1684   ivts->insn = insn;
1685   ivts->base_var = NULL_RTX;
1686   ivts->step = iv.step;
1687   ivts->n_loc = 1;
1688   ivts->loc[0] = 1;
1689
1690   return ivts;
1691 }
1692
1693 /* Determines which of insns in LOOP can be optimized.
1694    Return a OPT_INFO struct with the relevant hash tables filled
1695    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1696    is undefined for the return value.  */
1697
1698 static struct opt_info *
1699 analyze_insns_in_loop (struct loop *loop)
1700 {
1701   basic_block *body, bb;
1702   unsigned i, num_edges = 0;
1703   struct opt_info *opt_info = XCNEW (struct opt_info);
1704   rtx insn;
1705   struct iv_to_split *ivts = NULL;
1706   struct var_to_expand *ves = NULL;
1707   PTR *slot1;
1708   PTR *slot2;
1709   edge *edges = get_loop_exit_edges (loop, &num_edges);
1710   bool can_apply = false;
1711
1712   iv_analysis_loop_init (loop);
1713
1714   body = get_loop_body (loop);
1715
1716   if (flag_split_ivs_in_unroller)
1717     opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1718                                             si_info_hash, si_info_eq, free);
1719
1720   /* Record the loop exit bb and loop preheader before the unrolling.  */
1721   if (!loop_preheader_edge (loop)->src)
1722     {
1723       loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1724       opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1725     }
1726   else
1727     opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1728
1729   if (num_edges == 1
1730       && !(edges[0]->flags & EDGE_COMPLEX))
1731     {
1732       opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
1733       can_apply = true;
1734     }
1735
1736   if (flag_variable_expansion_in_unroller
1737       && can_apply)
1738     opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1739                                                       ve_info_hash, ve_info_eq, free);
1740
1741   for (i = 0; i < loop->num_nodes; i++)
1742     {
1743       bb = body[i];
1744       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1745         continue;
1746
1747       FOR_BB_INSNS (bb, insn)
1748       {
1749         if (!INSN_P (insn))
1750           continue;
1751
1752         if (opt_info->insns_to_split)
1753           ivts = analyze_iv_to_split_insn (insn);
1754
1755         if (ivts)
1756           {
1757             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1758             *slot1 = ivts;
1759             continue;
1760           }
1761
1762         if (opt_info->insns_with_var_to_expand)
1763           ves = analyze_insn_to_expand_var (loop, insn);
1764
1765         if (ves)
1766           {
1767             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1768             *slot2 = ves;
1769           }
1770       }
1771     }
1772
1773   free (edges);
1774   free (body);
1775   return opt_info;
1776 }
1777
1778 /* Called just before loop duplication.  Records start of duplicated area
1779    to OPT_INFO.  */
1780
1781 static void
1782 opt_info_start_duplication (struct opt_info *opt_info)
1783 {
1784   if (opt_info)
1785     opt_info->first_new_block = last_basic_block;
1786 }
1787
1788 /* Determine the number of iterations between initialization of the base
1789    variable and the current copy (N_COPY).  N_COPIES is the total number
1790    of newly created copies.  UNROLLING is true if we are unrolling
1791    (not peeling) the loop.  */
1792
1793 static unsigned
1794 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1795 {
1796   if (unrolling)
1797     {
1798       /* If we are unrolling, initialization is done in the original loop
1799          body (number 0).  */
1800       return n_copy;
1801     }
1802   else
1803     {
1804       /* If we are peeling, the copy in that the initialization occurs has
1805          number 1.  The original loop (number 0) is the last.  */
1806       if (n_copy)
1807         return n_copy - 1;
1808       else
1809         return n_copies;
1810     }
1811 }
1812
1813 /* Locate in EXPR the expression corresponding to the location recorded
1814    in IVTS, and return a pointer to the RTX for this location.  */
1815
1816 static rtx *
1817 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1818 {
1819   unsigned i;
1820   rtx *ret = &expr;
1821
1822   for (i = 0; i < ivts->n_loc; i++)
1823     ret = &XEXP (*ret, ivts->loc[i]);
1824
1825   return ret;
1826 }
1827
1828 /* Allocate basic variable for the induction variable chain.  Callback for
1829    htab_traverse.  */
1830
1831 static int
1832 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1833 {
1834   struct iv_to_split *ivts = *slot;
1835   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1836
1837   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1838
1839   return 1;
1840 }
1841
1842 /* Insert initialization of basic variable of IVTS before INSN, taking
1843    the initial value from INSN.  */
1844
1845 static void
1846 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1847 {
1848   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1849   rtx seq;
1850
1851   start_sequence ();
1852   expr = force_operand (expr, ivts->base_var);
1853   if (expr != ivts->base_var)
1854     emit_move_insn (ivts->base_var, expr);
1855   seq = get_insns ();
1856   end_sequence ();
1857
1858   emit_insn_before (seq, insn);
1859 }
1860
1861 /* Replace the use of induction variable described in IVTS in INSN
1862    by base variable + DELTA * step.  */
1863
1864 static void
1865 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1866 {
1867   rtx expr, *loc, seq, incr, var;
1868   enum machine_mode mode = GET_MODE (ivts->base_var);
1869   rtx src, dest, set;
1870
1871   /* Construct base + DELTA * step.  */
1872   if (!delta)
1873     expr = ivts->base_var;
1874   else
1875     {
1876       incr = simplify_gen_binary (MULT, mode,
1877                                   ivts->step, gen_int_mode (delta, mode));
1878       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1879                                   ivts->base_var, incr);
1880     }
1881
1882   /* Figure out where to do the replacement.  */
1883   loc = get_ivts_expr (single_set (insn), ivts);
1884
1885   /* If we can make the replacement right away, we're done.  */
1886   if (validate_change (insn, loc, expr, 0))
1887     return;
1888
1889   /* Otherwise, force EXPR into a register and try again.  */
1890   start_sequence ();
1891   var = gen_reg_rtx (mode);
1892   expr = force_operand (expr, var);
1893   if (expr != var)
1894     emit_move_insn (var, expr);
1895   seq = get_insns ();
1896   end_sequence ();
1897   emit_insn_before (seq, insn);
1898
1899   if (validate_change (insn, loc, var, 0))
1900     return;
1901
1902   /* The last chance.  Try recreating the assignment in insn
1903      completely from scratch.  */
1904   set = single_set (insn);
1905   gcc_assert (set);
1906
1907   start_sequence ();
1908   *loc = var;
1909   src = copy_rtx (SET_SRC (set));
1910   dest = copy_rtx (SET_DEST (set));
1911   src = force_operand (src, dest);
1912   if (src != dest)
1913     emit_move_insn (dest, src);
1914   seq = get_insns ();
1915   end_sequence ();
1916
1917   emit_insn_before (seq, insn);
1918   delete_insn (insn);
1919 }
1920
1921
1922 /* Return one expansion of the accumulator recorded in struct VE.  */
1923
1924 static rtx
1925 get_expansion (struct var_to_expand *ve)
1926 {
1927   rtx reg;
1928
1929   if (ve->reuse_expansion == 0)
1930     reg = ve->reg;
1931   else
1932     reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1933
1934   if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1935     ve->reuse_expansion = 0;
1936   else
1937     ve->reuse_expansion++;
1938
1939   return reg;
1940 }
1941
1942
1943 /* Given INSN replace the uses of the accumulator recorded in VE
1944    with a new register.  */
1945
1946 static void
1947 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1948 {
1949   rtx new_reg, set;
1950   bool really_new_expansion = false;
1951
1952   set = single_set (insn);
1953   gcc_assert (set);
1954
1955   /* Generate a new register only if the expansion limit has not been
1956      reached.  Else reuse an already existing expansion.  */
1957   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1958     {
1959       really_new_expansion = true;
1960       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1961     }
1962   else
1963     new_reg = get_expansion (ve);
1964
1965   validate_change (insn, &SET_DEST (set), new_reg, 1);
1966   validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1967
1968   if (apply_change_group ())
1969     if (really_new_expansion)
1970       {
1971         VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
1972         ve->expansion_count++;
1973       }
1974 }
1975
1976 /* Initialize the variable expansions in loop preheader.
1977    Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1978    basic block where the initialization of the expansions
1979    should take place.  */
1980
1981 static int
1982 insert_var_expansion_initialization (void **slot, void *place_p)
1983 {
1984   struct var_to_expand *ve = *slot;
1985   basic_block place = (basic_block)place_p;
1986   rtx seq, var, zero_init, insn;
1987   unsigned i;
1988
1989   if (VEC_length (rtx, ve->var_expansions) == 0)
1990     return 1;
1991
1992   start_sequence ();
1993   if (ve->op == PLUS || ve->op == MINUS)
1994     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1995       {
1996         zero_init =  CONST0_RTX (GET_MODE (var));
1997         emit_move_insn (var, zero_init);
1998       }
1999   else if (ve->op == MULT)
2000     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2001       {
2002         zero_init =  CONST1_RTX (GET_MODE (var));
2003         emit_move_insn (var, zero_init);
2004       }
2005
2006   seq = get_insns ();
2007   end_sequence ();
2008
2009   insn = BB_HEAD (place);
2010   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2011     insn = NEXT_INSN (insn);
2012
2013   emit_insn_after (seq, insn);
2014   /* Continue traversing the hash table.  */
2015   return 1;
2016 }
2017
2018 /*  Combine the variable expansions at the loop exit.
2019     Callbacks for htab_traverse.  PLACE_P is the loop exit
2020     basic block where the summation of the expansions should
2021     take place.  */
2022
2023 static int
2024 combine_var_copies_in_loop_exit (void **slot, void *place_p)
2025 {
2026   struct var_to_expand *ve = *slot;
2027   basic_block place = (basic_block)place_p;
2028   rtx sum = ve->reg;
2029   rtx expr, seq, var, insn;
2030   unsigned i;
2031
2032   if (VEC_length (rtx, ve->var_expansions) == 0)
2033     return 1;
2034
2035   start_sequence ();
2036   if (ve->op == PLUS || ve->op == MINUS)
2037     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2038       {
2039         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2040                                    var, sum);
2041       }
2042   else if (ve->op == MULT)
2043     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2044       {
2045         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2046                                    var, sum);
2047       }
2048
2049   expr = force_operand (sum, ve->reg);
2050   if (expr != ve->reg)
2051     emit_move_insn (ve->reg, expr);
2052   seq = get_insns ();
2053   end_sequence ();
2054
2055   insn = BB_HEAD (place);
2056   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2057     insn = NEXT_INSN (insn);
2058
2059   emit_insn_after (seq, insn);
2060
2061   /* Continue traversing the hash table.  */
2062   return 1;
2063 }
2064
2065 /* Apply loop optimizations in loop copies using the
2066    data which gathered during the unrolling.  Structure
2067    OPT_INFO record that data.
2068
2069    UNROLLING is true if we unrolled (not peeled) the loop.
2070    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2071    the loop (as it should happen in complete unrolling, but not in ordinary
2072    peeling of the loop).  */
2073
2074 static void
2075 apply_opt_in_copies (struct opt_info *opt_info,
2076                      unsigned n_copies, bool unrolling,
2077                      bool rewrite_original_loop)
2078 {
2079   unsigned i, delta;
2080   basic_block bb, orig_bb;
2081   rtx insn, orig_insn, next;
2082   struct iv_to_split ivts_templ, *ivts;
2083   struct var_to_expand ve_templ, *ves;
2084
2085   /* Sanity check -- we need to put initialization in the original loop
2086      body.  */
2087   gcc_assert (!unrolling || rewrite_original_loop);
2088
2089   /* Allocate the basic variables (i0).  */
2090   if (opt_info->insns_to_split)
2091     htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2092
2093   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2094     {
2095       bb = BASIC_BLOCK (i);
2096       orig_bb = get_bb_original (bb);
2097
2098       /* bb->aux holds position in copy sequence initialized by
2099          duplicate_loop_to_header_edge.  */
2100       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2101                                         unrolling);
2102       bb->aux = 0;
2103       orig_insn = BB_HEAD (orig_bb);
2104       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2105         {
2106           next = NEXT_INSN (insn);
2107           if (!INSN_P (insn))
2108             continue;
2109
2110           while (!INSN_P (orig_insn))
2111             orig_insn = NEXT_INSN (orig_insn);
2112
2113           ivts_templ.insn = orig_insn;
2114           ve_templ.insn = orig_insn;
2115
2116           /* Apply splitting iv optimization.  */
2117           if (opt_info->insns_to_split)
2118             {
2119               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2120
2121               if (ivts)
2122                 {
2123                   gcc_assert (GET_CODE (PATTERN (insn))
2124                               == GET_CODE (PATTERN (orig_insn)));
2125
2126                   if (!delta)
2127                     insert_base_initialization (ivts, insn);
2128                   split_iv (ivts, insn, delta);
2129                 }
2130             }
2131           /* Apply variable expansion optimization.  */
2132           if (unrolling && opt_info->insns_with_var_to_expand)
2133             {
2134               ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2135               if (ves)
2136                 {
2137                   gcc_assert (GET_CODE (PATTERN (insn))
2138                               == GET_CODE (PATTERN (orig_insn)));
2139                   expand_var_during_unrolling (ves, insn);
2140                 }
2141             }
2142           orig_insn = NEXT_INSN (orig_insn);
2143         }
2144     }
2145
2146   if (!rewrite_original_loop)
2147     return;
2148
2149   /* Initialize the variable expansions in the loop preheader
2150      and take care of combining them at the loop exit.  */
2151   if (opt_info->insns_with_var_to_expand)
2152     {
2153       htab_traverse (opt_info->insns_with_var_to_expand,
2154                      insert_var_expansion_initialization,
2155                      opt_info->loop_preheader);
2156       htab_traverse (opt_info->insns_with_var_to_expand,
2157                      combine_var_copies_in_loop_exit,
2158                      opt_info->loop_exit);
2159     }
2160
2161   /* Rewrite also the original loop body.  Find them as originals of the blocks
2162      in the last copied iteration, i.e. those that have
2163      get_bb_copy (get_bb_original (bb)) == bb.  */
2164   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2165     {
2166       bb = BASIC_BLOCK (i);
2167       orig_bb = get_bb_original (bb);
2168       if (get_bb_copy (orig_bb) != bb)
2169         continue;
2170
2171       delta = determine_split_iv_delta (0, n_copies, unrolling);
2172       for (orig_insn = BB_HEAD (orig_bb);
2173            orig_insn != NEXT_INSN (BB_END (bb));
2174            orig_insn = next)
2175         {
2176           next = NEXT_INSN (orig_insn);
2177
2178           if (!INSN_P (orig_insn))
2179             continue;
2180
2181           ivts_templ.insn = orig_insn;
2182           if (opt_info->insns_to_split)
2183             {
2184               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2185               if (ivts)
2186                 {
2187                   if (!delta)
2188                     insert_base_initialization (ivts, orig_insn);
2189                   split_iv (ivts, orig_insn, delta);
2190                   continue;
2191                 }
2192             }
2193
2194         }
2195     }
2196 }
2197
2198 /*  Release the data structures used for the variable expansion
2199     optimization.  Callbacks for htab_traverse.  */
2200
2201 static int
2202 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2203 {
2204   struct var_to_expand *ve = *slot;
2205
2206   VEC_free (rtx, heap, ve->var_expansions);
2207
2208   /* Continue traversing the hash table.  */
2209   return 1;
2210 }
2211
2212 /* Release OPT_INFO.  */
2213
2214 static void
2215 free_opt_info (struct opt_info *opt_info)
2216 {
2217   if (opt_info->insns_to_split)
2218     htab_delete (opt_info->insns_to_split);
2219   if (opt_info->insns_with_var_to_expand)
2220     {
2221       htab_traverse (opt_info->insns_with_var_to_expand,
2222                      release_var_copies, NULL);
2223       htab_delete (opt_info->insns_with_var_to_expand);
2224     }
2225   free (opt_info);
2226 }