gcc/loop-unroll.c

   1 /* Loop unrolling and peeling.
   2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 2, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "basic-block.h"
  29 #include "cfgloop.h"
  30 #include "cfglayout.h"
  31 #include "params.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "hashtab.h"
  35 #include "recog.h"
  36 #include "varray.h"
  37
  38 /* This pass performs loop unrolling and peeling.  We only perform these
  39    optimizations on innermost loops (with single exception) because
  40    the impact on performance is greatest here, and we want to avoid
  41    unnecessary code size growth.  The gain is caused by greater sequentiality
  42    of code, better code to optimize for further passes and in some cases
  43    by fewer testings of exit conditions.  The main problem is code growth,
  44    that impacts performance negatively due to effect of caches.
  45
  46    What we do:
  47
  48    -- complete peeling of once-rolling loops; this is the above mentioned
  49       exception, as this causes loop to be cancelled completely and
  50       does not cause code growth
  51    -- complete peeling of loops that roll (small) constant times.
  52    -- simple peeling of first iterations of loops that do not roll much
  53       (according to profile feedback)
  54    -- unrolling of loops that roll constant times; this is almost always
  55       win, as we get rid of exit condition tests.
  56    -- unrolling of loops that roll number of times that we can compute
  57       in runtime; we also get rid of exit condition tests here, but there
  58       is the extra expense for calculating the number of iterations
  59    -- simple unrolling of remaining loops; this is performed only if we
  60       are asked to, as the gain is questionable in this case and often
  61       it may even slow down the code
  62    For more detailed descriptions of each of those, see comments at
  63    appropriate function below.
  64
  65    There is a lot of parameters (defined and described in params.def) that
  66    control how much we unroll/peel.
  67
  68    ??? A great problem is that we don't have a good way how to determine
  69    how many times we should unroll the loop; the experiments I have made
  70    showed that this choice may affect performance in order of several %.
  71    */
  72
  73 /* Information about induction variables to split.  */
  74
  75 struct iv_to_split
  76 {
  77   rtx insn;             /* The insn in that the induction variable occurs.  */
  78   rtx base_var;         /* The variable on that the values in the further
  79                            iterations are based.  */
  80   rtx step;             /* Step of the induction variable.  */
  81   unsigned n_loc;
  82   unsigned loc[3];      /* Location where the definition of the induction
  83                            variable occurs in the insn.  For example if
  84                            N_LOC is 2, the expression is located at
  85                            XEXP (XEXP (single_set, loc[0]), loc[1]).  */
  86 };
  87
  88 DEF_VEC_P(rtx);
  89 DEF_VEC_ALLOC_P(rtx,heap);
  90
  91 /* Information about accumulators to expand.  */
  92
  93 struct var_to_expand
  94 {
  95   rtx insn;                        /* The insn in that the variable expansion occurs.  */
  96   rtx reg;                         /* The accumulator which is expanded.  */
  97   VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
  98   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  99                                       or multiplication.  */
 100   int expansion_count;             /* Count the number of expansions generated so far.  */
 101   int reuse_expansion;             /* The expansion we intend to reuse to expand
 102                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
 103                                       the original accumulator.  Else use
 104                                       var_expansions[REUSE_EXPANSION - 1].  */
 105 };
 106
 107 /* Information about optimization applied in
 108    the unrolled loop.  */
 109
 110 struct opt_info
 111 {
 112   htab_t insns_to_split;           /* A hashtable of insns to split.  */
 113   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
 114                                       to expand.  */
 115   unsigned first_new_block;        /* The first basic block that was
 116                                       duplicated.  */
 117   basic_block loop_exit;           /* The loop exit basic block.  */
 118   basic_block loop_preheader;      /* The loop preheader basic block.  */
 119 };
 120
 121 static void decide_unrolling_and_peeling (struct loops *, int);
 122 static void peel_loops_completely (struct loops *, int);
 123 static void decide_peel_simple (struct loop *, int);
 124 static void decide_peel_once_rolling (struct loop *, int);
 125 static void decide_peel_completely (struct loop *, int);
 126 static void decide_unroll_stupid (struct loop *, int);
 127 static void decide_unroll_constant_iterations (struct loop *, int);
 128 static void decide_unroll_runtime_iterations (struct loop *, int);
 129 static void peel_loop_simple (struct loops *, struct loop *);
 130 static void peel_loop_completely (struct loops *, struct loop *);
 131 static void unroll_loop_stupid (struct loops *, struct loop *);
 132 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
 133 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
 134 static struct opt_info *analyze_insns_in_loop (struct loop *);
 135 static void opt_info_start_duplication (struct opt_info *);
 136 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 137 static void free_opt_info (struct opt_info *);
 138 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
 139 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
 140 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
 141 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
 142 static int insert_var_expansion_initialization (void **, void *);
 143 static int combine_var_copies_in_loop_exit (void **, void *);
 144 static int release_var_copies (void **, void *);
 145 static rtx get_expansion (struct var_to_expand *);
 146
 147 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 148 void
 149 unroll_and_peel_loops (struct loops *loops, int flags)
 150 {
 151   struct loop *loop, *next;
 152   bool check;
 153
 154   /* First perform complete loop peeling (it is almost surely a win,
 155      and affects parameters for further decision a lot).  */
 156   peel_loops_completely (loops, flags);
 157
 158   /* Now decide rest of unrolling and peeling.  */
 159   decide_unrolling_and_peeling (loops, flags);
 160
 161   loop = loops->tree_root;
 162   while (loop->inner)
 163     loop = loop->inner;
 164
 165   /* Scan the loops, inner ones first.  */
 166   while (loop != loops->tree_root)
 167     {
 168       if (loop->next)
 169         {
 170           next = loop->next;
 171           while (next->inner)
 172             next = next->inner;
 173         }
 174       else
 175         next = loop->outer;
 176
 177       check = true;
 178       /* And perform the appropriate transformations.  */
 179       switch (loop->lpt_decision.decision)
 180         {
 181         case LPT_PEEL_COMPLETELY:
 182           /* Already done.  */
 183           gcc_unreachable ();
 184         case LPT_PEEL_SIMPLE:
 185           peel_loop_simple (loops, loop);
 186           break;
 187         case LPT_UNROLL_CONSTANT:
 188           unroll_loop_constant_iterations (loops, loop);
 189           break;
 190         case LPT_UNROLL_RUNTIME:
 191           unroll_loop_runtime_iterations (loops, loop);
 192           break;
 193         case LPT_UNROLL_STUPID:
 194           unroll_loop_stupid (loops, loop);
 195           break;
 196         case LPT_NONE:
 197           check = false;
 198           break;
 199         default:
 200           gcc_unreachable ();
 201         }
 202       if (check)
 203         {
 204 #ifdef ENABLE_CHECKING
 205           verify_dominators (CDI_DOMINATORS);
 206           verify_loop_structure (loops);
 207 #endif
 208         }
 209       loop = next;
 210     }
 211
 212   iv_analysis_done ();
 213 }
 214
 215 /* Check whether exit of the LOOP is at the end of loop body.  */
 216
 217 static bool
 218 loop_exit_at_end_p (struct loop *loop)
 219 {
 220   struct niter_desc *desc = get_simple_loop_desc (loop);
 221   rtx insn;
 222
 223   if (desc->in_edge->dest != loop->latch)
 224     return false;
 225
 226   /* Check that the latch is empty.  */
 227   FOR_BB_INSNS (loop->latch, insn)
 228     {
 229       if (INSN_P (insn))
 230         return false;
 231     }
 232
 233   return true;
 234 }
 235
 236 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
 237 static void
 238 peel_loops_completely (struct loops *loops, int flags)
 239 {
 240   struct loop *loop, *next;
 241
 242   loop = loops->tree_root;
 243   while (loop->inner)
 244     loop = loop->inner;
 245
 246   while (loop != loops->tree_root)
 247     {
 248       if (loop->next)
 249         {
 250           next = loop->next;
 251           while (next->inner)
 252             next = next->inner;
 253         }
 254       else
 255         next = loop->outer;
 256
 257       loop->lpt_decision.decision = LPT_NONE;
 258
 259       if (dump_file)
 260         fprintf (dump_file,
 261                  "\n;; *** Considering loop %d for complete peeling ***\n",
 262                  loop->num);
 263
 264       loop->ninsns = num_loop_insns (loop);
 265
 266       decide_peel_once_rolling (loop, flags);
 267       if (loop->lpt_decision.decision == LPT_NONE)
 268         decide_peel_completely (loop, flags);
 269
 270       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
 271         {
 272           peel_loop_completely (loops, loop);
 273 #ifdef ENABLE_CHECKING
 274           verify_dominators (CDI_DOMINATORS);
 275           verify_loop_structure (loops);
 276 #endif
 277         }
 278       loop = next;
 279     }
 280 }
 281
 282 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
 283 static void
 284 decide_unrolling_and_peeling (struct loops *loops, int flags)
 285 {
 286   struct loop *loop = loops->tree_root, *next;
 287
 288   while (loop->inner)
 289     loop = loop->inner;
 290
 291   /* Scan the loops, inner ones first.  */
 292   while (loop != loops->tree_root)
 293     {
 294       if (loop->next)
 295         {
 296           next = loop->next;
 297           while (next->inner)
 298             next = next->inner;
 299         }
 300       else
 301         next = loop->outer;
 302
 303       loop->lpt_decision.decision = LPT_NONE;
 304
 305       if (dump_file)
 306         fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
 307
 308       /* Do not peel cold areas.  */
 309       if (!maybe_hot_bb_p (loop->header))
 310         {
 311           if (dump_file)
 312             fprintf (dump_file, ";; Not considering loop, cold area\n");
 313           loop = next;
 314           continue;
 315         }
 316
 317       /* Can the loop be manipulated?  */
 318       if (!can_duplicate_loop_p (loop))
 319         {
 320           if (dump_file)
 321             fprintf (dump_file,
 322                      ";; Not considering loop, cannot duplicate\n");
 323           loop = next;
 324           continue;
 325         }
 326
 327       /* Skip non-innermost loops.  */
 328       if (loop->inner)
 329         {
 330           if (dump_file)
 331             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 332           loop = next;
 333           continue;
 334         }
 335
 336       loop->ninsns = num_loop_insns (loop);
 337       loop->av_ninsns = average_num_loop_insns (loop);
 338
 339       /* Try transformations one by one in decreasing order of
 340          priority.  */
 341
 342       decide_unroll_constant_iterations (loop, flags);
 343       if (loop->lpt_decision.decision == LPT_NONE)
 344         decide_unroll_runtime_iterations (loop, flags);
 345       if (loop->lpt_decision.decision == LPT_NONE)
 346         decide_unroll_stupid (loop, flags);
 347       if (loop->lpt_decision.decision == LPT_NONE)
 348         decide_peel_simple (loop, flags);
 349
 350       loop = next;
 351     }
 352 }
 353
 354 /* Decide whether the LOOP is once rolling and suitable for complete
 355    peeling.  */
 356 static void
 357 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 358 {
 359   struct niter_desc *desc;
 360
 361   if (dump_file)
 362     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 363
 364   /* Is the loop small enough?  */
 365   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
 366     {
 367       if (dump_file)
 368         fprintf (dump_file, ";; Not considering loop, is too big\n");
 369       return;
 370     }
 371
 372   /* Check for simple loops.  */
 373   desc = get_simple_loop_desc (loop);
 374
 375   /* Check number of iterations.  */
 376   if (!desc->simple_p
 377       || desc->assumptions
 378       || desc->infinite
 379       || !desc->const_iter
 380       || desc->niter != 0)
 381     {
 382       if (dump_file)
 383         fprintf (dump_file,
 384                  ";; Unable to prove that the loop rolls exactly once\n");
 385       return;
 386     }
 387
 388   /* Success.  */
 389   if (dump_file)
 390     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
 391   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 392 }
 393
 394 /* Decide whether the LOOP is suitable for complete peeling.  */
 395 static void
 396 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 397 {
 398   unsigned npeel;
 399   struct niter_desc *desc;
 400
 401   if (dump_file)
 402     fprintf (dump_file, "\n;; Considering peeling completely\n");
 403
 404   /* Skip non-innermost loops.  */
 405   if (loop->inner)
 406     {
 407       if (dump_file)
 408         fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 409       return;
 410     }
 411
 412   /* Do not peel cold areas.  */
 413   if (!maybe_hot_bb_p (loop->header))
 414     {
 415       if (dump_file)
 416         fprintf (dump_file, ";; Not considering loop, cold area\n");
 417       return;
 418     }
 419
 420   /* Can the loop be manipulated?  */
 421   if (!can_duplicate_loop_p (loop))
 422     {
 423       if (dump_file)
 424         fprintf (dump_file,
 425                  ";; Not considering loop, cannot duplicate\n");
 426       return;
 427     }
 428
 429   /* npeel = number of iterations to peel.  */
 430   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
 431   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 432     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 433
 434   /* Is the loop small enough?  */
 435   if (!npeel)
 436     {
 437       if (dump_file)
 438         fprintf (dump_file, ";; Not considering loop, is too big\n");
 439       return;
 440     }
 441
 442   /* Check for simple loops.  */
 443   desc = get_simple_loop_desc (loop);
 444
 445   /* Check number of iterations.  */
 446   if (!desc->simple_p
 447       || desc->assumptions
 448       || !desc->const_iter
 449       || desc->infinite)
 450     {
 451       if (dump_file)
 452         fprintf (dump_file,
 453                  ";; Unable to prove that the loop iterates constant times\n");
 454       return;
 455     }
 456
 457   if (desc->niter > npeel - 1)
 458     {
 459       if (dump_file)
 460         {
 461           fprintf (dump_file,
 462                    ";; Not peeling loop completely, rolls too much (");
 463           fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
 464           fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
 465         }
 466       return;
 467     }
 468
 469   /* Success.  */
 470   if (dump_file)
 471     fprintf (dump_file, ";; Decided to peel loop completely\n");
 472   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 473 }
 474
 475 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
 476    completely.  The transformation done:
 477
 478    for (i = 0; i < 4; i++)
 479      body;
 480
 481    ==>
 482
 483    i = 0;
 484    body; i++;
 485    body; i++;
 486    body; i++;
 487    body; i++;
 488    */
 489 static void
 490 peel_loop_completely (struct loops *loops, struct loop *loop)
 491 {
 492   sbitmap wont_exit;
 493   unsigned HOST_WIDE_INT npeel;
 494   unsigned n_remove_edges, i;
 495   edge *remove_edges, ein;
 496   struct niter_desc *desc = get_simple_loop_desc (loop);
 497   struct opt_info *opt_info = NULL;
 498
 499   npeel = desc->niter;
 500
 501   if (npeel)
 502     {
 503       bool ok;
 504
 505       wont_exit = sbitmap_alloc (npeel + 1);
 506       sbitmap_ones (wont_exit);
 507       RESET_BIT (wont_exit, 0);
 508       if (desc->noloop_assumptions)
 509         RESET_BIT (wont_exit, 1);
 510
 511       remove_edges = xcalloc (npeel, sizeof (edge));
 512       n_remove_edges = 0;
 513
 514       if (flag_split_ivs_in_unroller)
 515         opt_info = analyze_insns_in_loop (loop);
 516
 517       opt_info_start_duplication (opt_info);
 518       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 519                                           loops, npeel,
 520                                           wont_exit, desc->out_edge,
 521                                           remove_edges, &n_remove_edges,
 522                                           DLTHE_FLAG_UPDATE_FREQ
 523                                           | DLTHE_FLAG_COMPLETTE_PEEL
 524                                           | (opt_info
 525                                              ? DLTHE_RECORD_COPY_NUMBER : 0));
 526       gcc_assert (ok);
 527
 528       free (wont_exit);
 529
 530       if (opt_info)
 531         {
 532           apply_opt_in_copies (opt_info, npeel, false, true);
 533           free_opt_info (opt_info);
 534         }
 535
 536       /* Remove the exit edges.  */
 537       for (i = 0; i < n_remove_edges; i++)
 538         remove_path (loops, remove_edges[i]);
 539       free (remove_edges);
 540     }
 541
 542   ein = desc->in_edge;
 543   free_simple_loop_desc (loop);
 544
 545   /* Now remove the unreachable part of the last iteration and cancel
 546      the loop.  */
 547   remove_path (loops, ein);
 548
 549   if (dump_file)
 550     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
 551 }
 552
 553 /* Decide whether to unroll LOOP iterating constant number of times
 554    and how much.  */
 555
 556 static void
 557 decide_unroll_constant_iterations (struct loop *loop, int flags)
 558 {
 559   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 560   struct niter_desc *desc;
 561
 562   if (!(flags & UAP_UNROLL))
 563     {
 564       /* We were not asked to, just return back silently.  */
 565       return;
 566     }
 567
 568   if (dump_file)
 569     fprintf (dump_file,
 570              "\n;; Considering unrolling loop with constant "
 571              "number of iterations\n");
 572
 573   /* nunroll = total number of copies of the original loop body in
 574      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 575   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 576   nunroll_by_av
 577     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 578   if (nunroll > nunroll_by_av)
 579     nunroll = nunroll_by_av;
 580   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 581     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 582
 583   /* Skip big loops.  */
 584   if (nunroll <= 1)
 585     {
 586       if (dump_file)
 587         fprintf (dump_file, ";; Not considering loop, is too big\n");
 588       return;
 589     }
 590
 591   /* Check for simple loops.  */
 592   desc = get_simple_loop_desc (loop);
 593
 594   /* Check number of iterations.  */
 595   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 596     {
 597       if (dump_file)
 598         fprintf (dump_file,
 599                  ";; Unable to prove that the loop iterates constant times\n");
 600       return;
 601     }
 602
 603   /* Check whether the loop rolls enough to consider.  */
 604   if (desc->niter < 2 * nunroll)
 605     {
 606       if (dump_file)
 607         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 608       return;
 609     }
 610
 611   /* Success; now compute number of iterations to unroll.  We alter
 612      nunroll so that as few as possible copies of loop body are
 613      necessary, while still not decreasing the number of unrollings
 614      too much (at most by 1).  */
 615   best_copies = 2 * nunroll + 10;
 616
 617   i = 2 * nunroll + 2;
 618   if (i - 1 >= desc->niter)
 619     i = desc->niter - 2;
 620
 621   for (; i >= nunroll - 1; i--)
 622     {
 623       unsigned exit_mod = desc->niter % (i + 1);
 624
 625       if (!loop_exit_at_end_p (loop))
 626         n_copies = exit_mod + i + 1;
 627       else if (exit_mod != (unsigned) i
 628                || desc->noloop_assumptions != NULL_RTX)
 629         n_copies = exit_mod + i + 2;
 630       else
 631         n_copies = i + 1;
 632
 633       if (n_copies < best_copies)
 634         {
 635           best_copies = n_copies;
 636           best_unroll = i;
 637         }
 638     }
 639
 640   if (dump_file)
 641     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
 642              best_unroll + 1, best_copies, nunroll);
 643
 644   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 645   loop->lpt_decision.times = best_unroll;
 646
 647   if (dump_file)
 648     fprintf (dump_file,
 649              ";; Decided to unroll the constant times rolling loop, %d times.\n",
 650              loop->lpt_decision.times);
 651 }
 652
 653 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
 654    times.  The transformation does this:
 655
 656    for (i = 0; i < 102; i++)
 657      body;
 658
 659    ==>
 660
 661    i = 0;
 662    body; i++;
 663    body; i++;
 664    while (i < 102)
 665      {
 666        body; i++;
 667        body; i++;
 668        body; i++;
 669        body; i++;
 670      }
 671   */
 672 static void
 673 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
 674 {
 675   unsigned HOST_WIDE_INT niter;
 676   unsigned exit_mod;
 677   sbitmap wont_exit;
 678   unsigned n_remove_edges, i;
 679   edge *remove_edges;
 680   unsigned max_unroll = loop->lpt_decision.times;
 681   struct niter_desc *desc = get_simple_loop_desc (loop);
 682   bool exit_at_end = loop_exit_at_end_p (loop);
 683   struct opt_info *opt_info = NULL;
 684   bool ok;
 685
 686   niter = desc->niter;
 687
 688   /* Should not get here (such loop should be peeled instead).  */
 689   gcc_assert (niter > max_unroll + 1);
 690
 691   exit_mod = niter % (max_unroll + 1);
 692
 693   wont_exit = sbitmap_alloc (max_unroll + 1);
 694   sbitmap_ones (wont_exit);
 695
 696   remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
 697   n_remove_edges = 0;
 698   if (flag_split_ivs_in_unroller
 699       || flag_variable_expansion_in_unroller)
 700     opt_info = analyze_insns_in_loop (loop);
 701
 702   if (!exit_at_end)
 703     {
 704       /* The exit is not at the end of the loop; leave exit test
 705          in the first copy, so that the loops that start with test
 706          of exit condition have continuous body after unrolling.  */
 707
 708       if (dump_file)
 709         fprintf (dump_file, ";; Condition on beginning of loop.\n");
 710
 711       /* Peel exit_mod iterations.  */
 712       RESET_BIT (wont_exit, 0);
 713       if (desc->noloop_assumptions)
 714         RESET_BIT (wont_exit, 1);
 715
 716       if (exit_mod)
 717         {
 718           opt_info_start_duplication (opt_info);
 719           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 720                                               loops, exit_mod,
 721                                               wont_exit, desc->out_edge,
 722                                               remove_edges, &n_remove_edges,
 723                                               DLTHE_FLAG_UPDATE_FREQ
 724                                               | (opt_info && exit_mod > 1
 725                                                  ? DLTHE_RECORD_COPY_NUMBER
 726                                                    : 0));
 727           gcc_assert (ok);
 728
 729           if (opt_info && exit_mod > 1)
 730             apply_opt_in_copies (opt_info, exit_mod, false, false);
 731
 732           desc->noloop_assumptions = NULL_RTX;
 733           desc->niter -= exit_mod;
 734           desc->niter_max -= exit_mod;
 735         }
 736
 737       SET_BIT (wont_exit, 1);
 738     }
 739   else
 740     {
 741       /* Leave exit test in last copy, for the same reason as above if
 742          the loop tests the condition at the end of loop body.  */
 743
 744       if (dump_file)
 745         fprintf (dump_file, ";; Condition on end of loop.\n");
 746
 747       /* We know that niter >= max_unroll + 2; so we do not need to care of
 748          case when we would exit before reaching the loop.  So just peel
 749          exit_mod + 1 iterations.  */
 750       if (exit_mod != max_unroll
 751           || desc->noloop_assumptions)
 752         {
 753           RESET_BIT (wont_exit, 0);
 754           if (desc->noloop_assumptions)
 755             RESET_BIT (wont_exit, 1);
 756
 757           opt_info_start_duplication (opt_info);
 758           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 759                                               loops, exit_mod + 1,
 760                                               wont_exit, desc->out_edge,
 761                                               remove_edges, &n_remove_edges,
 762                                               DLTHE_FLAG_UPDATE_FREQ
 763                                               | (opt_info && exit_mod > 0
 764                                                  ? DLTHE_RECORD_COPY_NUMBER
 765                                                    : 0));
 766           gcc_assert (ok);
 767
 768           if (opt_info && exit_mod > 0)
 769             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 770
 771           desc->niter -= exit_mod + 1;
 772           desc->niter_max -= exit_mod + 1;
 773           desc->noloop_assumptions = NULL_RTX;
 774
 775           SET_BIT (wont_exit, 0);
 776           SET_BIT (wont_exit, 1);
 777         }
 778
 779       RESET_BIT (wont_exit, max_unroll);
 780     }
 781
 782   /* Now unroll the loop.  */
 783
 784   opt_info_start_duplication (opt_info);
 785   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 786                                       loops, max_unroll,
 787                                       wont_exit, desc->out_edge,
 788                                       remove_edges, &n_remove_edges,
 789                                       DLTHE_FLAG_UPDATE_FREQ
 790                                       | (opt_info
 791                                          ? DLTHE_RECORD_COPY_NUMBER
 792                                            : 0));
 793   gcc_assert (ok);
 794
 795   if (opt_info)
 796     {
 797       apply_opt_in_copies (opt_info, max_unroll, true, true);
 798       free_opt_info (opt_info);
 799     }
 800
 801   free (wont_exit);
 802
 803   if (exit_at_end)
 804     {
 805       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 806       /* Find a new in and out edge; they are in the last copy we have made.  */
 807
 808       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 809         {
 810           desc->out_edge = EDGE_SUCC (exit_block, 0);
 811           desc->in_edge = EDGE_SUCC (exit_block, 1);
 812         }
 813       else
 814         {
 815           desc->out_edge = EDGE_SUCC (exit_block, 1);
 816           desc->in_edge = EDGE_SUCC (exit_block, 0);
 817         }
 818     }
 819
 820   desc->niter /= max_unroll + 1;
 821   desc->niter_max /= max_unroll + 1;
 822   desc->niter_expr = GEN_INT (desc->niter);
 823
 824   /* Remove the edges.  */
 825   for (i = 0; i < n_remove_edges; i++)
 826     remove_path (loops, remove_edges[i]);
 827   free (remove_edges);
 828
 829   if (dump_file)
 830     fprintf (dump_file,
 831              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 832              max_unroll, num_loop_insns (loop));
 833 }
 834
 835 /* Decide whether to unroll LOOP iterating runtime computable number of times
 836    and how much.  */
 837 static void
 838 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 839 {
 840   unsigned nunroll, nunroll_by_av, i;
 841   struct niter_desc *desc;
 842
 843   if (!(flags & UAP_UNROLL))
 844     {
 845       /* We were not asked to, just return back silently.  */
 846       return;
 847     }
 848
 849   if (dump_file)
 850     fprintf (dump_file,
 851              "\n;; Considering unrolling loop with runtime "
 852              "computable number of iterations\n");
 853
 854   /* nunroll = total number of copies of the original loop body in
 855      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 856   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 857   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 858   if (nunroll > nunroll_by_av)
 859     nunroll = nunroll_by_av;
 860   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 861     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 862
 863   /* Skip big loops.  */
 864   if (nunroll <= 1)
 865     {
 866       if (dump_file)
 867         fprintf (dump_file, ";; Not considering loop, is too big\n");
 868       return;
 869     }
 870
 871   /* Check for simple loops.  */
 872   desc = get_simple_loop_desc (loop);
 873
 874   /* Check simpleness.  */
 875   if (!desc->simple_p || desc->assumptions)
 876     {
 877       if (dump_file)
 878         fprintf (dump_file,
 879                  ";; Unable to prove that the number of iterations "
 880                  "can be counted in runtime\n");
 881       return;
 882     }
 883
 884   if (desc->const_iter)
 885     {
 886       if (dump_file)
 887         fprintf (dump_file, ";; Loop iterates constant times\n");
 888       return;
 889     }
 890
 891   /* If we have profile feedback, check whether the loop rolls.  */
 892   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
 893     {
 894       if (dump_file)
 895         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 896       return;
 897     }
 898
 899   /* Success; now force nunroll to be power of 2, as we are unable to
 900      cope with overflows in computation of number of iterations.  */
 901   for (i = 1; 2 * i <= nunroll; i *= 2)
 902     continue;
 903
 904   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 905   loop->lpt_decision.times = i - 1;
 906
 907   if (dump_file)
 908     fprintf (dump_file,
 909              ";; Decided to unroll the runtime computable "
 910              "times rolling loop, %d times.\n",
 911              loop->lpt_decision.times);
 912 }
 913
 914 /* Unroll LOOP for that we are able to count number of iterations in runtime
 915    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
 916    extra care for case n < 0):
 917
 918    for (i = 0; i < n; i++)
 919      body;
 920
 921    ==>
 922
 923    i = 0;
 924    mod = n % 4;
 925
 926    switch (mod)
 927      {
 928        case 3:
 929          body; i++;
 930        case 2:
 931          body; i++;
 932        case 1:
 933          body; i++;
 934        case 0: ;
 935      }
 936
 937    while (i < n)
 938      {
 939        body; i++;
 940        body; i++;
 941        body; i++;
 942        body; i++;
 943      }
 944    */
 945 static void
 946 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
 947 {
 948   rtx old_niter, niter, init_code, branch_code, tmp;
 949   unsigned i, j, p;
 950   basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
 951   unsigned n_dom_bbs;
 952   sbitmap wont_exit;
 953   int may_exit_copy;
 954   unsigned n_peel, n_remove_edges;
 955   edge *remove_edges, e;
 956   bool extra_zero_check, last_may_exit;
 957   unsigned max_unroll = loop->lpt_decision.times;
 958   struct niter_desc *desc = get_simple_loop_desc (loop);
 959   bool exit_at_end = loop_exit_at_end_p (loop);
 960   struct opt_info *opt_info = NULL;
 961   bool ok;
 962
 963   if (flag_split_ivs_in_unroller
 964       || flag_variable_expansion_in_unroller)
 965     opt_info = analyze_insns_in_loop (loop);
 966
 967   /* Remember blocks whose dominators will have to be updated.  */
 968   dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
 969   n_dom_bbs = 0;
 970
 971   body = get_loop_body (loop);
 972   for (i = 0; i < loop->num_nodes; i++)
 973     {
 974       unsigned nldom;
 975       basic_block *ldom;
 976
 977       nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
 978       for (j = 0; j < nldom; j++)
 979         if (!flow_bb_inside_loop_p (loop, ldom[j]))
 980           dom_bbs[n_dom_bbs++] = ldom[j];
 981
 982       free (ldom);
 983     }
 984   free (body);
 985
 986   if (!exit_at_end)
 987     {
 988       /* Leave exit in first copy (for explanation why see comment in
 989          unroll_loop_constant_iterations).  */
 990       may_exit_copy = 0;
 991       n_peel = max_unroll - 1;
 992       extra_zero_check = true;
 993       last_may_exit = false;
 994     }
 995   else
 996     {
 997       /* Leave exit in last copy (for explanation why see comment in
 998          unroll_loop_constant_iterations).  */
 999       may_exit_copy = max_unroll;
1000       n_peel = max_unroll;
1001       extra_zero_check = false;
1002       last_may_exit = true;
1003     }
1004
1005   /* Get expression for number of iterations.  */
1006   start_sequence ();
1007   old_niter = niter = gen_reg_rtx (desc->mode);
1008   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
1009   if (tmp != niter)
1010     emit_move_insn (niter, tmp);
1011
1012   /* Count modulo by ANDing it with max_unroll; we use the fact that
1013      the number of unrollings is a power of two, and thus this is correct
1014      even if there is overflow in the computation.  */
1015   niter = expand_simple_binop (desc->mode, AND,
1016                                niter,
1017                                GEN_INT (max_unroll),
1018                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
1019
1020   init_code = get_insns ();
1021   end_sequence ();
1022
1023   /* Precondition the loop.  */
1024   loop_split_edge_with (loop_preheader_edge (loop), init_code);
1025
1026   remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
1027   n_remove_edges = 0;
1028
1029   wont_exit = sbitmap_alloc (max_unroll + 2);
1030
1031   /* Peel the first copy of loop body (almost always we must leave exit test
1032      here; the only exception is when we have extra zero check and the number
1033      of iterations is reliable.  Also record the place of (possible) extra
1034      zero check.  */
1035   sbitmap_zero (wont_exit);
1036   if (extra_zero_check
1037       && !desc->noloop_assumptions)
1038     SET_BIT (wont_exit, 1);
1039   ezc_swtch = loop_preheader_edge (loop)->src;
1040   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1041                                       loops, 1,
1042                                       wont_exit, desc->out_edge,
1043                                       remove_edges, &n_remove_edges,
1044                                       DLTHE_FLAG_UPDATE_FREQ);
1045   gcc_assert (ok);
1046
1047   /* Record the place where switch will be built for preconditioning.  */
1048   swtch = loop_split_edge_with (loop_preheader_edge (loop),
1049                                 NULL_RTX);
1050
1051   for (i = 0; i < n_peel; i++)
1052     {
1053       /* Peel the copy.  */
1054       sbitmap_zero (wont_exit);
1055       if (i != n_peel - 1 || !last_may_exit)
1056         SET_BIT (wont_exit, 1);
1057       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1058                                           loops, 1,
1059                                           wont_exit, desc->out_edge,
1060                                           remove_edges, &n_remove_edges,
1061                                           DLTHE_FLAG_UPDATE_FREQ);
1062       gcc_assert (ok);
1063
1064       /* Create item for switch.  */
1065       j = n_peel - i - (extra_zero_check ? 0 : 1);
1066       p = REG_BR_PROB_BASE / (i + 2);
1067
1068       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1069       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1070                                           block_label (preheader), p,
1071                                           NULL_RTX);
1072
1073       swtch = loop_split_edge_with (single_pred_edge (swtch), branch_code);
1074       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1075       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1076       e = make_edge (swtch, preheader,
1077                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1078       e->probability = p;
1079     }
1080
1081   if (extra_zero_check)
1082     {
1083       /* Add branch for zero iterations.  */
1084       p = REG_BR_PROB_BASE / (max_unroll + 1);
1085       swtch = ezc_swtch;
1086       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1087       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1088                                           block_label (preheader), p,
1089                                           NULL_RTX);
1090
1091       swtch = loop_split_edge_with (single_succ_edge (swtch), branch_code);
1092       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1093       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1094       e = make_edge (swtch, preheader,
1095                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1096       e->probability = p;
1097     }
1098
1099   /* Recount dominators for outer blocks.  */
1100   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1101
1102   /* And unroll loop.  */
1103
1104   sbitmap_ones (wont_exit);
1105   RESET_BIT (wont_exit, may_exit_copy);
1106   opt_info_start_duplication (opt_info);
1107
1108   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1109                                       loops, max_unroll,
1110                                       wont_exit, desc->out_edge,
1111                                       remove_edges, &n_remove_edges,
1112                                       DLTHE_FLAG_UPDATE_FREQ
1113                                       | (opt_info
1114                                          ? DLTHE_RECORD_COPY_NUMBER
1115                                            : 0));
1116   gcc_assert (ok);
1117
1118   if (opt_info)
1119     {
1120       apply_opt_in_copies (opt_info, max_unroll, true, true);
1121       free_opt_info (opt_info);
1122     }
1123
1124   free (wont_exit);
1125
1126   if (exit_at_end)
1127     {
1128       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1129       /* Find a new in and out edge; they are in the last copy we have
1130          made.  */
1131
1132       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1133         {
1134           desc->out_edge = EDGE_SUCC (exit_block, 0);
1135           desc->in_edge = EDGE_SUCC (exit_block, 1);
1136         }
1137       else
1138         {
1139           desc->out_edge = EDGE_SUCC (exit_block, 1);
1140           desc->in_edge = EDGE_SUCC (exit_block, 0);
1141         }
1142     }
1143
1144   /* Remove the edges.  */
1145   for (i = 0; i < n_remove_edges; i++)
1146     remove_path (loops, remove_edges[i]);
1147   free (remove_edges);
1148
1149   /* We must be careful when updating the number of iterations due to
1150      preconditioning and the fact that the value must be valid at entry
1151      of the loop.  After passing through the above code, we see that
1152      the correct new number of iterations is this:  */
1153   gcc_assert (!desc->const_iter);
1154   desc->niter_expr =
1155     simplify_gen_binary (UDIV, desc->mode, old_niter,
1156                          GEN_INT (max_unroll + 1));
1157   desc->niter_max /= max_unroll + 1;
1158   if (exit_at_end)
1159     {
1160       desc->niter_expr =
1161         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1162       desc->noloop_assumptions = NULL_RTX;
1163       desc->niter_max--;
1164     }
1165
1166   if (dump_file)
1167     fprintf (dump_file,
1168              ";; Unrolled loop %d times, counting # of iterations "
1169              "in runtime, %i insns\n",
1170              max_unroll, num_loop_insns (loop));
1171 }
1172
1173 /* Decide whether to simply peel LOOP and how much.  */
1174 static void
1175 decide_peel_simple (struct loop *loop, int flags)
1176 {
1177   unsigned npeel;
1178   struct niter_desc *desc;
1179
1180   if (!(flags & UAP_PEEL))
1181     {
1182       /* We were not asked to, just return back silently.  */
1183       return;
1184     }
1185
1186   if (dump_file)
1187     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1188
1189   /* npeel = number of iterations to peel.  */
1190   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1191   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1192     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1193
1194   /* Skip big loops.  */
1195   if (!npeel)
1196     {
1197       if (dump_file)
1198         fprintf (dump_file, ";; Not considering loop, is too big\n");
1199       return;
1200     }
1201
1202   /* Check for simple loops.  */
1203   desc = get_simple_loop_desc (loop);
1204
1205   /* Check number of iterations.  */
1206   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1207     {
1208       if (dump_file)
1209         fprintf (dump_file, ";; Loop iterates constant times\n");
1210       return;
1211     }
1212
1213   /* Do not simply peel loops with branches inside -- it increases number
1214      of mispredicts.  */
1215   if (num_loop_branches (loop) > 1)
1216     {
1217       if (dump_file)
1218         fprintf (dump_file, ";; Not peeling, contains branches\n");
1219       return;
1220     }
1221
1222   if (loop->header->count)
1223     {
1224       unsigned niter = expected_loop_iterations (loop);
1225       if (niter + 1 > npeel)
1226         {
1227           if (dump_file)
1228             {
1229               fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1230               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1231                        (HOST_WIDEST_INT) (niter + 1));
1232               fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1233                        npeel);
1234             }
1235           return;
1236         }
1237       npeel = niter + 1;
1238     }
1239   else
1240     {
1241       /* For now we have no good heuristics to decide whether loop peeling
1242          will be effective, so disable it.  */
1243       if (dump_file)
1244         fprintf (dump_file,
1245                  ";; Not peeling loop, no evidence it will be profitable\n");
1246       return;
1247     }
1248
1249   /* Success.  */
1250   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1251   loop->lpt_decision.times = npeel;
1252
1253   if (dump_file)
1254     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1255              loop->lpt_decision.times);
1256 }
1257
1258 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1259    while (cond)
1260      body;
1261
1262    ==>
1263
1264    if (!cond) goto end;
1265    body;
1266    if (!cond) goto end;
1267    body;
1268    while (cond)
1269      body;
1270    end: ;
1271    */
1272 static void
1273 peel_loop_simple (struct loops *loops, struct loop *loop)
1274 {
1275   sbitmap wont_exit;
1276   unsigned npeel = loop->lpt_decision.times;
1277   struct niter_desc *desc = get_simple_loop_desc (loop);
1278   struct opt_info *opt_info = NULL;
1279   bool ok;
1280
1281   if (flag_split_ivs_in_unroller && npeel > 1)
1282     opt_info = analyze_insns_in_loop (loop);
1283
1284   wont_exit = sbitmap_alloc (npeel + 1);
1285   sbitmap_zero (wont_exit);
1286
1287   opt_info_start_duplication (opt_info);
1288
1289   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1290                                       loops, npeel, wont_exit,
1291                                       NULL, NULL,
1292                                       NULL, DLTHE_FLAG_UPDATE_FREQ
1293                                       | (opt_info
1294                                          ? DLTHE_RECORD_COPY_NUMBER
1295                                            : 0));
1296   gcc_assert (ok);
1297
1298   free (wont_exit);
1299
1300   if (opt_info)
1301     {
1302       apply_opt_in_copies (opt_info, npeel, false, false);
1303       free_opt_info (opt_info);
1304     }
1305
1306   if (desc->simple_p)
1307     {
1308       if (desc->const_iter)
1309         {
1310           desc->niter -= npeel;
1311           desc->niter_expr = GEN_INT (desc->niter);
1312           desc->noloop_assumptions = NULL_RTX;
1313         }
1314       else
1315         {
1316           /* We cannot just update niter_expr, as its value might be clobbered
1317              inside loop.  We could handle this by counting the number into
1318              temporary just like we do in runtime unrolling, but it does not
1319              seem worthwhile.  */
1320           free_simple_loop_desc (loop);
1321         }
1322     }
1323   if (dump_file)
1324     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1325 }
1326
1327 /* Decide whether to unroll LOOP stupidly and how much.  */
1328 static void
1329 decide_unroll_stupid (struct loop *loop, int flags)
1330 {
1331   unsigned nunroll, nunroll_by_av, i;
1332   struct niter_desc *desc;
1333
1334   if (!(flags & UAP_UNROLL_ALL))
1335     {
1336       /* We were not asked to, just return back silently.  */
1337       return;
1338     }
1339
1340   if (dump_file)
1341     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1342
1343   /* nunroll = total number of copies of the original loop body in
1344      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1345   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1346   nunroll_by_av
1347     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1348   if (nunroll > nunroll_by_av)
1349     nunroll = nunroll_by_av;
1350   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1351     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1352
1353   /* Skip big loops.  */
1354   if (nunroll <= 1)
1355     {
1356       if (dump_file)
1357         fprintf (dump_file, ";; Not considering loop, is too big\n");
1358       return;
1359     }
1360
1361   /* Check for simple loops.  */
1362   desc = get_simple_loop_desc (loop);
1363
1364   /* Check simpleness.  */
1365   if (desc->simple_p && !desc->assumptions)
1366     {
1367       if (dump_file)
1368         fprintf (dump_file, ";; The loop is simple\n");
1369       return;
1370     }
1371
1372   /* Do not unroll loops with branches inside -- it increases number
1373      of mispredicts.  */
1374   if (num_loop_branches (loop) > 1)
1375     {
1376       if (dump_file)
1377         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1378       return;
1379     }
1380
1381   /* If we have profile feedback, check whether the loop rolls.  */
1382   if (loop->header->count
1383       && expected_loop_iterations (loop) < 2 * nunroll)
1384     {
1385       if (dump_file)
1386         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1387       return;
1388     }
1389
1390   /* Success.  Now force nunroll to be power of 2, as it seems that this
1391      improves results (partially because of better alignments, partially
1392      because of some dark magic).  */
1393   for (i = 1; 2 * i <= nunroll; i *= 2)
1394     continue;
1395
1396   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1397   loop->lpt_decision.times = i - 1;
1398
1399   if (dump_file)
1400     fprintf (dump_file,
1401              ";; Decided to unroll the loop stupidly, %d times.\n",
1402              loop->lpt_decision.times);
1403 }
1404
1405 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1406    while (cond)
1407      body;
1408
1409    ==>
1410
1411    while (cond)
1412      {
1413        body;
1414        if (!cond) break;
1415        body;
1416        if (!cond) break;
1417        body;
1418        if (!cond) break;
1419        body;
1420      }
1421    */
1422 static void
1423 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1424 {
1425   sbitmap wont_exit;
1426   unsigned nunroll = loop->lpt_decision.times;
1427   struct niter_desc *desc = get_simple_loop_desc (loop);
1428   struct opt_info *opt_info = NULL;
1429   bool ok;
1430
1431   if (flag_split_ivs_in_unroller
1432       || flag_variable_expansion_in_unroller)
1433     opt_info = analyze_insns_in_loop (loop);
1434
1435
1436   wont_exit = sbitmap_alloc (nunroll + 1);
1437   sbitmap_zero (wont_exit);
1438   opt_info_start_duplication (opt_info);
1439
1440   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1441                                       loops, nunroll, wont_exit,
1442                                       NULL, NULL, NULL,
1443                                       DLTHE_FLAG_UPDATE_FREQ
1444                                       | (opt_info
1445                                          ? DLTHE_RECORD_COPY_NUMBER
1446                                            : 0));
1447   gcc_assert (ok);
1448
1449   if (opt_info)
1450     {
1451       apply_opt_in_copies (opt_info, nunroll, true, true);
1452       free_opt_info (opt_info);
1453     }
1454
1455   free (wont_exit);
1456
1457   if (desc->simple_p)
1458     {
1459       /* We indeed may get here provided that there are nontrivial assumptions
1460          for a loop to be really simple.  We could update the counts, but the
1461          problem is that we are unable to decide which exit will be taken
1462          (not really true in case the number of iterations is constant,
1463          but noone will do anything with this information, so we do not
1464          worry about it).  */
1465       desc->simple_p = false;
1466     }
1467
1468   if (dump_file)
1469     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1470              nunroll, num_loop_insns (loop));
1471 }
1472
1473 /* A hash function for information about insns to split.  */
1474
1475 static hashval_t
1476 si_info_hash (const void *ivts)
1477 {
1478   return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
1479 }
1480
1481 /* An equality functions for information about insns to split.  */
1482
1483 static int
1484 si_info_eq (const void *ivts1, const void *ivts2)
1485 {
1486   const struct iv_to_split *i1 = ivts1;
1487   const struct iv_to_split *i2 = ivts2;
1488
1489   return i1->insn == i2->insn;
1490 }
1491
1492 /* Return a hash for VES, which is really a "var_to_expand *".  */
1493
1494 static hashval_t
1495 ve_info_hash (const void *ves)
1496 {
1497   return htab_hash_pointer (((struct var_to_expand *) ves)->insn);
1498 }
1499
1500 /* Return true if IVTS1 and IVTS2 (which are really both of type
1501    "var_to_expand *") refer to the same instruction.  */
1502
1503 static int
1504 ve_info_eq (const void *ivts1, const void *ivts2)
1505 {
1506   const struct var_to_expand *i1 = ivts1;
1507   const struct var_to_expand *i2 = ivts2;
1508
1509   return i1->insn == i2->insn;
1510 }
1511
1512 /* Returns true if REG is referenced in one insn in LOOP.  */
1513
1514 bool
1515 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1516 {
1517   basic_block *body, bb;
1518   unsigned i;
1519   int count_ref = 0;
1520   rtx insn;
1521
1522   body = get_loop_body (loop);
1523   for (i = 0; i < loop->num_nodes; i++)
1524     {
1525       bb = body[i];
1526
1527       FOR_BB_INSNS (bb, insn)
1528       {
1529         if (rtx_referenced_p (reg, insn))
1530           count_ref++;
1531       }
1532     }
1533   return (count_ref  == 1);
1534 }
1535
1536 /* Determine whether INSN contains an accumulator
1537    which can be expanded into separate copies,
1538    one for each copy of the LOOP body.
1539
1540    for (i = 0 ; i < n; i++)
1541      sum += a[i];
1542
1543    ==>
1544
1545    sum += a[i]
1546    ....
1547    i = i+1;
1548    sum1 += a[i]
1549    ....
1550    i = i+1
1551    sum2 += a[i];
1552    ....
1553
1554    Return NULL if INSN contains no opportunity for expansion of accumulator.
1555    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1556    information and return a pointer to it.
1557 */
1558
1559 static struct var_to_expand *
1560 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1561 {
1562   rtx set, dest, src, op1;
1563   struct var_to_expand *ves;
1564   enum machine_mode mode1, mode2;
1565
1566   set = single_set (insn);
1567   if (!set)
1568     return NULL;
1569
1570   dest = SET_DEST (set);
1571   src = SET_SRC (set);
1572
1573   if (GET_CODE (src) != PLUS
1574       && GET_CODE (src) != MINUS
1575       && GET_CODE (src) != MULT)
1576     return NULL;
1577
1578   if (!XEXP (src, 0))
1579     return NULL;
1580
1581   op1 = XEXP (src, 0);
1582
1583   if (!REG_P (dest)
1584       && !(GET_CODE (dest) == SUBREG
1585            && REG_P (SUBREG_REG (dest))))
1586     return NULL;
1587
1588   if (!rtx_equal_p (dest, op1))
1589     return NULL;
1590
1591   if (!referenced_in_one_insn_in_loop_p (loop, dest))
1592     return NULL;
1593
1594   if (rtx_referenced_p (dest, XEXP (src, 1)))
1595     return NULL;
1596
1597   mode1 = GET_MODE (dest);
1598   mode2 = GET_MODE (XEXP (src, 1));
1599   if ((FLOAT_MODE_P (mode1)
1600        || FLOAT_MODE_P (mode2))
1601       && !flag_unsafe_math_optimizations)
1602     return NULL;
1603
1604   /* Record the accumulator to expand.  */
1605   ves = xmalloc (sizeof (struct var_to_expand));
1606   ves->insn = insn;
1607   ves->var_expansions = VEC_alloc (rtx, heap, 1);
1608   ves->reg = copy_rtx (dest);
1609   ves->op = GET_CODE (src);
1610   ves->expansion_count = 0;
1611   ves->reuse_expansion = 0;
1612   return ves;
1613 }
1614
1615 /* Determine whether there is an induction variable in INSN that
1616    we would like to split during unrolling.
1617
1618    I.e. replace
1619
1620    i = i + 1;
1621    ...
1622    i = i + 1;
1623    ...
1624    i = i + 1;
1625    ...
1626
1627    type chains by
1628
1629    i0 = i + 1
1630    ...
1631    i = i0 + 1
1632    ...
1633    i = i0 + 2
1634    ...
1635
1636    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1637    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1638    pointer to it.  */
1639
1640 static struct iv_to_split *
1641 analyze_iv_to_split_insn (rtx insn)
1642 {
1643   rtx set, dest;
1644   struct rtx_iv iv;
1645   struct iv_to_split *ivts;
1646   bool ok;
1647
1648   /* For now we just split the basic induction variables.  Later this may be
1649      extended for example by selecting also addresses of memory references.  */
1650   set = single_set (insn);
1651   if (!set)
1652     return NULL;
1653
1654   dest = SET_DEST (set);
1655   if (!REG_P (dest))
1656     return NULL;
1657
1658   if (!biv_p (insn, dest))
1659     return NULL;
1660
1661   ok = iv_analyze (insn, dest, &iv);
1662   gcc_assert (ok);
1663
1664   if (iv.step == const0_rtx
1665       || iv.mode != iv.extend_mode)
1666     return NULL;
1667
1668   /* Record the insn to split.  */
1669   ivts = xmalloc (sizeof (struct iv_to_split));
1670   ivts->insn = insn;
1671   ivts->base_var = NULL_RTX;
1672   ivts->step = iv.step;
1673   ivts->n_loc = 1;
1674   ivts->loc[0] = 1;
1675
1676   return ivts;
1677 }
1678
1679 /* Determines which of insns in LOOP can be optimized.
1680    Return a OPT_INFO struct with the relevant hash tables filled
1681    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1682    is undefined for the return value.  */
1683
1684 static struct opt_info *
1685 analyze_insns_in_loop (struct loop *loop)
1686 {
1687   basic_block *body, bb;
1688   unsigned i, num_edges = 0;
1689   struct opt_info *opt_info = xcalloc (1, sizeof (struct opt_info));
1690   rtx insn;
1691   struct iv_to_split *ivts = NULL;
1692   struct var_to_expand *ves = NULL;
1693   PTR *slot1;
1694   PTR *slot2;
1695   edge *edges = get_loop_exit_edges (loop, &num_edges);
1696   bool can_apply = false;
1697
1698   iv_analysis_loop_init (loop);
1699
1700   body = get_loop_body (loop);
1701
1702   if (flag_split_ivs_in_unroller)
1703     opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1704                                             si_info_hash, si_info_eq, free);
1705
1706   /* Record the loop exit bb and loop preheader before the unrolling.  */
1707   if (!loop_preheader_edge (loop)->src)
1708     {
1709       loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1710       opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1711     }
1712   else
1713     opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1714
1715   if (num_edges == 1
1716       && !(edges[0]->flags & EDGE_COMPLEX))
1717     {
1718       opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
1719       can_apply = true;
1720     }
1721
1722   if (flag_variable_expansion_in_unroller
1723       && can_apply)
1724     opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1725                                                       ve_info_hash, ve_info_eq, free);
1726
1727   for (i = 0; i < loop->num_nodes; i++)
1728     {
1729       bb = body[i];
1730       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1731         continue;
1732
1733       FOR_BB_INSNS (bb, insn)
1734       {
1735         if (!INSN_P (insn))
1736           continue;
1737
1738         if (opt_info->insns_to_split)
1739           ivts = analyze_iv_to_split_insn (insn);
1740
1741         if (ivts)
1742           {
1743             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1744             *slot1 = ivts;
1745             continue;
1746           }
1747
1748         if (opt_info->insns_with_var_to_expand)
1749           ves = analyze_insn_to_expand_var (loop, insn);
1750
1751         if (ves)
1752           {
1753             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1754             *slot2 = ves;
1755           }
1756       }
1757     }
1758
1759   free (edges);
1760   free (body);
1761   return opt_info;
1762 }
1763
1764 /* Called just before loop duplication.  Records start of duplicated area
1765    to OPT_INFO.  */
1766
1767 static void
1768 opt_info_start_duplication (struct opt_info *opt_info)
1769 {
1770   if (opt_info)
1771     opt_info->first_new_block = last_basic_block;
1772 }
1773
1774 /* Determine the number of iterations between initialization of the base
1775    variable and the current copy (N_COPY).  N_COPIES is the total number
1776    of newly created copies.  UNROLLING is true if we are unrolling
1777    (not peeling) the loop.  */
1778
1779 static unsigned
1780 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1781 {
1782   if (unrolling)
1783     {
1784       /* If we are unrolling, initialization is done in the original loop
1785          body (number 0).  */
1786       return n_copy;
1787     }
1788   else
1789     {
1790       /* If we are peeling, the copy in that the initialization occurs has
1791          number 1.  The original loop (number 0) is the last.  */
1792       if (n_copy)
1793         return n_copy - 1;
1794       else
1795         return n_copies;
1796     }
1797 }
1798
1799 /* Locate in EXPR the expression corresponding to the location recorded
1800    in IVTS, and return a pointer to the RTX for this location.  */
1801
1802 static rtx *
1803 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1804 {
1805   unsigned i;
1806   rtx *ret = &expr;
1807
1808   for (i = 0; i < ivts->n_loc; i++)
1809     ret = &XEXP (*ret, ivts->loc[i]);
1810
1811   return ret;
1812 }
1813
1814 /* Allocate basic variable for the induction variable chain.  Callback for
1815    htab_traverse.  */
1816
1817 static int
1818 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1819 {
1820   struct iv_to_split *ivts = *slot;
1821   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1822
1823   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1824
1825   return 1;
1826 }
1827
1828 /* Insert initialization of basic variable of IVTS before INSN, taking
1829    the initial value from INSN.  */
1830
1831 static void
1832 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1833 {
1834   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1835   rtx seq;
1836
1837   start_sequence ();
1838   expr = force_operand (expr, ivts->base_var);
1839   if (expr != ivts->base_var)
1840     emit_move_insn (ivts->base_var, expr);
1841   seq = get_insns ();
1842   end_sequence ();
1843
1844   emit_insn_before (seq, insn);
1845 }
1846
1847 /* Replace the use of induction variable described in IVTS in INSN
1848    by base variable + DELTA * step.  */
1849
1850 static void
1851 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1852 {
1853   rtx expr, *loc, seq, incr, var;
1854   enum machine_mode mode = GET_MODE (ivts->base_var);
1855   rtx src, dest, set;
1856
1857   /* Construct base + DELTA * step.  */
1858   if (!delta)
1859     expr = ivts->base_var;
1860   else
1861     {
1862       incr = simplify_gen_binary (MULT, mode,
1863                                   ivts->step, gen_int_mode (delta, mode));
1864       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1865                                   ivts->base_var, incr);
1866     }
1867
1868   /* Figure out where to do the replacement.  */
1869   loc = get_ivts_expr (single_set (insn), ivts);
1870
1871   /* If we can make the replacement right away, we're done.  */
1872   if (validate_change (insn, loc, expr, 0))
1873     return;
1874
1875   /* Otherwise, force EXPR into a register and try again.  */
1876   start_sequence ();
1877   var = gen_reg_rtx (mode);
1878   expr = force_operand (expr, var);
1879   if (expr != var)
1880     emit_move_insn (var, expr);
1881   seq = get_insns ();
1882   end_sequence ();
1883   emit_insn_before (seq, insn);
1884
1885   if (validate_change (insn, loc, var, 0))
1886     return;
1887
1888   /* The last chance.  Try recreating the assignment in insn
1889      completely from scratch.  */
1890   set = single_set (insn);
1891   gcc_assert (set);
1892
1893   start_sequence ();
1894   *loc = var;
1895   src = copy_rtx (SET_SRC (set));
1896   dest = copy_rtx (SET_DEST (set));
1897   src = force_operand (src, dest);
1898   if (src != dest)
1899     emit_move_insn (dest, src);
1900   seq = get_insns ();
1901   end_sequence ();
1902
1903   emit_insn_before (seq, insn);
1904   delete_insn (insn);
1905 }
1906
1907
1908 /* Return one expansion of the accumulator recorded in struct VE.  */
1909
1910 static rtx
1911 get_expansion (struct var_to_expand *ve)
1912 {
1913   rtx reg;
1914
1915   if (ve->reuse_expansion == 0)
1916     reg = ve->reg;
1917   else
1918     reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1919
1920   if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1921     ve->reuse_expansion = 0;
1922   else
1923     ve->reuse_expansion++;
1924
1925   return reg;
1926 }
1927
1928
1929 /* Given INSN replace the uses of the accumulator recorded in VE
1930    with a new register.  */
1931
1932 static void
1933 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1934 {
1935   rtx new_reg, set;
1936   bool really_new_expansion = false;
1937
1938   set = single_set (insn);
1939   gcc_assert (set);
1940
1941   /* Generate a new register only if the expansion limit has not been
1942      reached.  Else reuse an already existing expansion.  */
1943   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1944     {
1945       really_new_expansion = true;
1946       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1947     }
1948   else
1949     new_reg = get_expansion (ve);
1950
1951   validate_change (insn, &SET_DEST (set), new_reg, 1);
1952   validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1953
1954   if (apply_change_group ())
1955     if (really_new_expansion)
1956       {
1957         VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
1958         ve->expansion_count++;
1959       }
1960 }
1961
1962 /* Initialize the variable expansions in loop preheader.
1963    Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1964    basic block where the initialization of the expansions
1965    should take place.  */
1966
1967 static int
1968 insert_var_expansion_initialization (void **slot, void *place_p)
1969 {
1970   struct var_to_expand *ve = *slot;
1971   basic_block place = (basic_block)place_p;
1972   rtx seq, var, zero_init, insn;
1973   unsigned i;
1974
1975   if (VEC_length (rtx, ve->var_expansions) == 0)
1976     return 1;
1977
1978   start_sequence ();
1979   if (ve->op == PLUS || ve->op == MINUS)
1980     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1981       {
1982         zero_init =  CONST0_RTX (GET_MODE (var));
1983         emit_move_insn (var, zero_init);
1984       }
1985   else if (ve->op == MULT)
1986     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1987       {
1988         zero_init =  CONST1_RTX (GET_MODE (var));
1989         emit_move_insn (var, zero_init);
1990       }
1991
1992   seq = get_insns ();
1993   end_sequence ();
1994
1995   insn = BB_HEAD (place);
1996   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1997     insn = NEXT_INSN (insn);
1998
1999   emit_insn_after (seq, insn);
2000   /* Continue traversing the hash table.  */
2001   return 1;
2002 }
2003
2004 /*  Combine the variable expansions at the loop exit.
2005     Callbacks for htab_traverse.  PLACE_P is the loop exit
2006     basic block where the summation of the expansions should
2007     take place.  */
2008
2009 static int
2010 combine_var_copies_in_loop_exit (void **slot, void *place_p)
2011 {
2012   struct var_to_expand *ve = *slot;
2013   basic_block place = (basic_block)place_p;
2014   rtx sum = ve->reg;
2015   rtx expr, seq, var, insn;
2016   unsigned i;
2017
2018   if (VEC_length (rtx, ve->var_expansions) == 0)
2019     return 1;
2020
2021   start_sequence ();
2022   if (ve->op == PLUS || ve->op == MINUS)
2023     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2024       {
2025         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2026                                    var, sum);
2027       }
2028   else if (ve->op == MULT)
2029     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2030       {
2031         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2032                                    var, sum);
2033       }
2034
2035   expr = force_operand (sum, ve->reg);
2036   if (expr != ve->reg)
2037     emit_move_insn (ve->reg, expr);
2038   seq = get_insns ();
2039   end_sequence ();
2040
2041   insn = BB_HEAD (place);
2042   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2043     insn = NEXT_INSN (insn);
2044
2045   emit_insn_after (seq, insn);
2046
2047   /* Continue traversing the hash table.  */
2048   return 1;
2049 }
2050
2051 /* Apply loop optimizations in loop copies using the
2052    data which gathered during the unrolling.  Structure
2053    OPT_INFO record that data.
2054
2055    UNROLLING is true if we unrolled (not peeled) the loop.
2056    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2057    the loop (as it should happen in complete unrolling, but not in ordinary
2058    peeling of the loop).  */
2059
2060 static void
2061 apply_opt_in_copies (struct opt_info *opt_info,
2062                      unsigned n_copies, bool unrolling,
2063                      bool rewrite_original_loop)
2064 {
2065   unsigned i, delta;
2066   basic_block bb, orig_bb;
2067   rtx insn, orig_insn, next;
2068   struct iv_to_split ivts_templ, *ivts;
2069   struct var_to_expand ve_templ, *ves;
2070
2071   /* Sanity check -- we need to put initialization in the original loop
2072      body.  */
2073   gcc_assert (!unrolling || rewrite_original_loop);
2074
2075   /* Allocate the basic variables (i0).  */
2076   if (opt_info->insns_to_split)
2077     htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2078
2079   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2080     {
2081       bb = BASIC_BLOCK (i);
2082       orig_bb = get_bb_original (bb);
2083
2084       /* bb->aux holds position in copy sequence initialized by
2085          duplicate_loop_to_header_edge.  */
2086       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2087                                         unrolling);
2088       bb->aux = 0;
2089       orig_insn = BB_HEAD (orig_bb);
2090       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2091         {
2092           next = NEXT_INSN (insn);
2093           if (!INSN_P (insn))
2094             continue;
2095
2096           while (!INSN_P (orig_insn))
2097             orig_insn = NEXT_INSN (orig_insn);
2098
2099           ivts_templ.insn = orig_insn;
2100           ve_templ.insn = orig_insn;
2101
2102           /* Apply splitting iv optimization.  */
2103           if (opt_info->insns_to_split)
2104             {
2105               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2106
2107               if (ivts)
2108                 {
2109 #ifdef ENABLE_CHECKING
2110                   gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
2111 #endif
2112
2113                   if (!delta)
2114                     insert_base_initialization (ivts, insn);
2115                   split_iv (ivts, insn, delta);
2116                 }
2117             }
2118           /* Apply variable expansion optimization.  */
2119           if (unrolling && opt_info->insns_with_var_to_expand)
2120             {
2121               ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2122               if (ves)
2123                 {
2124 #ifdef ENABLE_CHECKING
2125                   gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
2126 #endif
2127                   expand_var_during_unrolling (ves, insn);
2128                 }
2129             }
2130           orig_insn = NEXT_INSN (orig_insn);
2131         }
2132     }
2133
2134   if (!rewrite_original_loop)
2135     return;
2136
2137   /* Initialize the variable expansions in the loop preheader
2138      and take care of combining them at the loop exit.  */
2139   if (opt_info->insns_with_var_to_expand)
2140     {
2141       htab_traverse (opt_info->insns_with_var_to_expand,
2142                      insert_var_expansion_initialization,
2143                      opt_info->loop_preheader);
2144       htab_traverse (opt_info->insns_with_var_to_expand,
2145                      combine_var_copies_in_loop_exit,
2146                      opt_info->loop_exit);
2147     }
2148
2149   /* Rewrite also the original loop body.  Find them as originals of the blocks
2150      in the last copied iteration, i.e. those that have
2151      get_bb_copy (get_bb_original (bb)) == bb.  */
2152   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2153     {
2154       bb = BASIC_BLOCK (i);
2155       orig_bb = get_bb_original (bb);
2156       if (get_bb_copy (orig_bb) != bb)
2157         continue;
2158
2159       delta = determine_split_iv_delta (0, n_copies, unrolling);
2160       for (orig_insn = BB_HEAD (orig_bb);
2161            orig_insn != NEXT_INSN (BB_END (bb));
2162            orig_insn = next)
2163         {
2164           next = NEXT_INSN (orig_insn);
2165
2166           if (!INSN_P (orig_insn))
2167             continue;
2168
2169           ivts_templ.insn = orig_insn;
2170           if (opt_info->insns_to_split)
2171             {
2172               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2173               if (ivts)
2174                 {
2175                   if (!delta)
2176                     insert_base_initialization (ivts, orig_insn);
2177                   split_iv (ivts, orig_insn, delta);
2178                   continue;
2179                 }
2180             }
2181
2182         }
2183     }
2184 }
2185
2186 /*  Release the data structures used for the variable expansion
2187     optimization.  Callbacks for htab_traverse.  */
2188
2189 static int
2190 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2191 {
2192   struct var_to_expand *ve = *slot;
2193
2194   VEC_free (rtx, heap, ve->var_expansions);
2195
2196   /* Continue traversing the hash table.  */
2197   return 1;
2198 }
2199
2200 /* Release OPT_INFO.  */
2201
2202 static void
2203 free_opt_info (struct opt_info *opt_info)
2204 {
2205   if (opt_info->insns_to_split)
2206     htab_delete (opt_info->insns_to_split);
2207   if (opt_info->insns_with_var_to_expand)
2208     {
2209       htab_traverse (opt_info->insns_with_var_to_expand,
2210                      release_var_copies, NULL);
2211       htab_delete (opt_info->insns_with_var_to_expand);
2212     }
2213   free (opt_info);
2214 }