gcc/loop-unroll.c

   1 /* Loop unrolling and peeling.
   2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 2, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "basic-block.h"
  29 #include "cfgloop.h"
  30 #include "cfglayout.h"
  31 #include "params.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "hashtab.h"
  35 #include "recog.h"
  36 #include "varray.h"
  37
  38 /* This pass performs loop unrolling and peeling.  We only perform these
  39    optimizations on innermost loops (with single exception) because
  40    the impact on performance is greatest here, and we want to avoid
  41    unnecessary code size growth.  The gain is caused by greater sequentiality
  42    of code, better code to optimize for further passes and in some cases
  43    by fewer testings of exit conditions.  The main problem is code growth,
  44    that impacts performance negatively due to effect of caches.
  45
  46    What we do:
  47
  48    -- complete peeling of once-rolling loops; this is the above mentioned
  49       exception, as this causes loop to be cancelled completely and
  50       does not cause code growth
  51    -- complete peeling of loops that roll (small) constant times.
  52    -- simple peeling of first iterations of loops that do not roll much
  53       (according to profile feedback)
  54    -- unrolling of loops that roll constant times; this is almost always
  55       win, as we get rid of exit condition tests.
  56    -- unrolling of loops that roll number of times that we can compute
  57       in runtime; we also get rid of exit condition tests here, but there
  58       is the extra expense for calculating the number of iterations
  59    -- simple unrolling of remaining loops; this is performed only if we
  60       are asked to, as the gain is questionable in this case and often
  61       it may even slow down the code
  62    For more detailed descriptions of each of those, see comments at
  63    appropriate function below.
  64
  65    There is a lot of parameters (defined and described in params.def) that
  66    control how much we unroll/peel.
  67
  68    ??? A great problem is that we don't have a good way how to determine
  69    how many times we should unroll the loop; the experiments I have made
  70    showed that this choice may affect performance in order of several %.
  71    */
  72
  73 /* Information about induction variables to split.  */
  74
  75 struct iv_to_split
  76 {
  77   rtx insn;             /* The insn in that the induction variable occurs.  */
  78   rtx base_var;         /* The variable on that the values in the further
  79                            iterations are based.  */
  80   rtx step;             /* Step of the induction variable.  */
  81   unsigned n_loc;
  82   unsigned loc[3];      /* Location where the definition of the induction
  83                            variable occurs in the insn.  For example if
  84                            N_LOC is 2, the expression is located at
  85                            XEXP (XEXP (single_set, loc[0]), loc[1]).  */
  86 };
  87
  88 DEF_VEC_P(rtx);
  89 DEF_VEC_ALLOC_P(rtx,heap);
  90
  91 /* Information about accumulators to expand.  */
  92
  93 struct var_to_expand
  94 {
  95   rtx insn;                        /* The insn in that the variable expansion occurs.  */
  96   rtx reg;                         /* The accumulator which is expanded.  */
  97   VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
  98   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  99                                       or multiplication.  */
 100   int expansion_count;             /* Count the number of expansions generated so far.  */
 101   int reuse_expansion;             /* The expansion we intend to reuse to expand
 102                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
 103                                       the original accumulator.  Else use
 104                                       var_expansions[REUSE_EXPANSION - 1].  */
 105 };
 106
 107 /* Information about optimization applied in
 108    the unrolled loop.  */
 109
 110 struct opt_info
 111 {
 112   htab_t insns_to_split;           /* A hashtable of insns to split.  */
 113   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
 114                                       to expand.  */
 115   unsigned first_new_block;        /* The first basic block that was
 116                                       duplicated.  */
 117   basic_block loop_exit;           /* The loop exit basic block.  */
 118   basic_block loop_preheader;      /* The loop preheader basic block.  */
 119 };
 120
 121 static void decide_unrolling_and_peeling (struct loops *, int);
 122 static void peel_loops_completely (struct loops *, int);
 123 static void decide_peel_simple (struct loop *, int);
 124 static void decide_peel_once_rolling (struct loop *, int);
 125 static void decide_peel_completely (struct loop *, int);
 126 static void decide_unroll_stupid (struct loop *, int);
 127 static void decide_unroll_constant_iterations (struct loop *, int);
 128 static void decide_unroll_runtime_iterations (struct loop *, int);
 129 static void peel_loop_simple (struct loops *, struct loop *);
 130 static void peel_loop_completely (struct loops *, struct loop *);
 131 static void unroll_loop_stupid (struct loops *, struct loop *);
 132 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
 133 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
 134 static struct opt_info *analyze_insns_in_loop (struct loop *);
 135 static void opt_info_start_duplication (struct opt_info *);
 136 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 137 static void free_opt_info (struct opt_info *);
 138 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
 139 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
 140 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
 141 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
 142 static int insert_var_expansion_initialization (void **, void *);
 143 static int combine_var_copies_in_loop_exit (void **, void *);
 144 static int release_var_copies (void **, void *);
 145 static rtx get_expansion (struct var_to_expand *);
 146
 147 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 148 void
 149 unroll_and_peel_loops (struct loops *loops, int flags)
 150 {
 151   struct loop *loop, *next;
 152   bool check;
 153
 154   /* First perform complete loop peeling (it is almost surely a win,
 155      and affects parameters for further decision a lot).  */
 156   peel_loops_completely (loops, flags);
 157
 158   /* Now decide rest of unrolling and peeling.  */
 159   decide_unrolling_and_peeling (loops, flags);
 160
 161   loop = loops->tree_root;
 162   while (loop->inner)
 163     loop = loop->inner;
 164
 165   /* Scan the loops, inner ones first.  */
 166   while (loop != loops->tree_root)
 167     {
 168       if (loop->next)
 169         {
 170           next = loop->next;
 171           while (next->inner)
 172             next = next->inner;
 173         }
 174       else
 175         next = loop->outer;
 176
 177       check = true;
 178       /* And perform the appropriate transformations.  */
 179       switch (loop->lpt_decision.decision)
 180         {
 181         case LPT_PEEL_COMPLETELY:
 182           /* Already done.  */
 183           gcc_unreachable ();
 184         case LPT_PEEL_SIMPLE:
 185           peel_loop_simple (loops, loop);
 186           break;
 187         case LPT_UNROLL_CONSTANT:
 188           unroll_loop_constant_iterations (loops, loop);
 189           break;
 190         case LPT_UNROLL_RUNTIME:
 191           unroll_loop_runtime_iterations (loops, loop);
 192           break;
 193         case LPT_UNROLL_STUPID:
 194           unroll_loop_stupid (loops, loop);
 195           break;
 196         case LPT_NONE:
 197           check = false;
 198           break;
 199         default:
 200           gcc_unreachable ();
 201         }
 202       if (check)
 203         {
 204 #ifdef ENABLE_CHECKING
 205           verify_dominators (CDI_DOMINATORS);
 206           verify_loop_structure (loops);
 207 #endif
 208         }
 209       loop = next;
 210     }
 211
 212   iv_analysis_done ();
 213 }
 214
 215 /* Check whether exit of the LOOP is at the end of loop body.  */
 216
 217 static bool
 218 loop_exit_at_end_p (struct loop *loop)
 219 {
 220   struct niter_desc *desc = get_simple_loop_desc (loop);
 221   rtx insn;
 222
 223   if (desc->in_edge->dest != loop->latch)
 224     return false;
 225
 226   /* Check that the latch is empty.  */
 227   FOR_BB_INSNS (loop->latch, insn)
 228     {
 229       if (INSN_P (insn))
 230         return false;
 231     }
 232
 233   return true;
 234 }
 235
 236 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
 237 static void
 238 peel_loops_completely (struct loops *loops, int flags)
 239 {
 240   struct loop *loop, *next;
 241
 242   loop = loops->tree_root;
 243   while (loop->inner)
 244     loop = loop->inner;
 245
 246   while (loop != loops->tree_root)
 247     {
 248       if (loop->next)
 249         {
 250           next = loop->next;
 251           while (next->inner)
 252             next = next->inner;
 253         }
 254       else
 255         next = loop->outer;
 256
 257       loop->lpt_decision.decision = LPT_NONE;
 258
 259       if (dump_file)
 260         fprintf (dump_file,
 261                  "\n;; *** Considering loop %d for complete peeling ***\n",
 262                  loop->num);
 263
 264       loop->ninsns = num_loop_insns (loop);
 265
 266       decide_peel_once_rolling (loop, flags);
 267       if (loop->lpt_decision.decision == LPT_NONE)
 268         decide_peel_completely (loop, flags);
 269
 270       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
 271         {
 272           peel_loop_completely (loops, loop);
 273 #ifdef ENABLE_CHECKING
 274           verify_dominators (CDI_DOMINATORS);
 275           verify_loop_structure (loops);
 276 #endif
 277         }
 278       loop = next;
 279     }
 280 }
 281
 282 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
 283 static void
 284 decide_unrolling_and_peeling (struct loops *loops, int flags)
 285 {
 286   struct loop *loop = loops->tree_root, *next;
 287
 288   while (loop->inner)
 289     loop = loop->inner;
 290
 291   /* Scan the loops, inner ones first.  */
 292   while (loop != loops->tree_root)
 293     {
 294       if (loop->next)
 295         {
 296           next = loop->next;
 297           while (next->inner)
 298             next = next->inner;
 299         }
 300       else
 301         next = loop->outer;
 302
 303       loop->lpt_decision.decision = LPT_NONE;
 304
 305       if (dump_file)
 306         fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
 307
 308       /* Do not peel cold areas.  */
 309       if (!maybe_hot_bb_p (loop->header))
 310         {
 311           if (dump_file)
 312             fprintf (dump_file, ";; Not considering loop, cold area\n");
 313           loop = next;
 314           continue;
 315         }
 316
 317       /* Can the loop be manipulated?  */
 318       if (!can_duplicate_loop_p (loop))
 319         {
 320           if (dump_file)
 321             fprintf (dump_file,
 322                      ";; Not considering loop, cannot duplicate\n");
 323           loop = next;
 324           continue;
 325         }
 326
 327       /* Skip non-innermost loops.  */
 328       if (loop->inner)
 329         {
 330           if (dump_file)
 331             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 332           loop = next;
 333           continue;
 334         }
 335
 336       loop->ninsns = num_loop_insns (loop);
 337       loop->av_ninsns = average_num_loop_insns (loop);
 338
 339       /* Try transformations one by one in decreasing order of
 340          priority.  */
 341
 342       decide_unroll_constant_iterations (loop, flags);
 343       if (loop->lpt_decision.decision == LPT_NONE)
 344         decide_unroll_runtime_iterations (loop, flags);
 345       if (loop->lpt_decision.decision == LPT_NONE)
 346         decide_unroll_stupid (loop, flags);
 347       if (loop->lpt_decision.decision == LPT_NONE)
 348         decide_peel_simple (loop, flags);
 349
 350       loop = next;
 351     }
 352 }
 353
 354 /* Decide whether the LOOP is once rolling and suitable for complete
 355    peeling.  */
 356 static void
 357 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 358 {
 359   struct niter_desc *desc;
 360
 361   if (dump_file)
 362     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 363
 364   /* Is the loop small enough?  */
 365   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
 366     {
 367       if (dump_file)
 368         fprintf (dump_file, ";; Not considering loop, is too big\n");
 369       return;
 370     }
 371
 372   /* Check for simple loops.  */
 373   desc = get_simple_loop_desc (loop);
 374
 375   /* Check number of iterations.  */
 376   if (!desc->simple_p
 377       || desc->assumptions
 378       || desc->infinite
 379       || !desc->const_iter
 380       || desc->niter != 0)
 381     {
 382       if (dump_file)
 383         fprintf (dump_file,
 384                  ";; Unable to prove that the loop rolls exactly once\n");
 385       return;
 386     }
 387
 388   /* Success.  */
 389   if (dump_file)
 390     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
 391   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 392 }
 393
 394 /* Decide whether the LOOP is suitable for complete peeling.  */
 395 static void
 396 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 397 {
 398   unsigned npeel;
 399   struct niter_desc *desc;
 400
 401   if (dump_file)
 402     fprintf (dump_file, "\n;; Considering peeling completely\n");
 403
 404   /* Skip non-innermost loops.  */
 405   if (loop->inner)
 406     {
 407       if (dump_file)
 408         fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 409       return;
 410     }
 411
 412   /* Do not peel cold areas.  */
 413   if (!maybe_hot_bb_p (loop->header))
 414     {
 415       if (dump_file)
 416         fprintf (dump_file, ";; Not considering loop, cold area\n");
 417       return;
 418     }
 419
 420   /* Can the loop be manipulated?  */
 421   if (!can_duplicate_loop_p (loop))
 422     {
 423       if (dump_file)
 424         fprintf (dump_file,
 425                  ";; Not considering loop, cannot duplicate\n");
 426       return;
 427     }
 428
 429   /* npeel = number of iterations to peel.  */
 430   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
 431   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 432     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 433
 434   /* Is the loop small enough?  */
 435   if (!npeel)
 436     {
 437       if (dump_file)
 438         fprintf (dump_file, ";; Not considering loop, is too big\n");
 439       return;
 440     }
 441
 442   /* Check for simple loops.  */
 443   desc = get_simple_loop_desc (loop);
 444
 445   /* Check number of iterations.  */
 446   if (!desc->simple_p
 447       || desc->assumptions
 448       || !desc->const_iter
 449       || desc->infinite)
 450     {
 451       if (dump_file)
 452         fprintf (dump_file,
 453                  ";; Unable to prove that the loop iterates constant times\n");
 454       return;
 455     }
 456
 457   if (desc->niter > npeel - 1)
 458     {
 459       if (dump_file)
 460         {
 461           fprintf (dump_file,
 462                    ";; Not peeling loop completely, rolls too much (");
 463           fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
 464           fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
 465         }
 466       return;
 467     }
 468
 469   /* Success.  */
 470   if (dump_file)
 471     fprintf (dump_file, ";; Decided to peel loop completely\n");
 472   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 473 }
 474
 475 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
 476    completely.  The transformation done:
 477
 478    for (i = 0; i < 4; i++)
 479      body;
 480
 481    ==>
 482
 483    i = 0;
 484    body; i++;
 485    body; i++;
 486    body; i++;
 487    body; i++;
 488    */
 489 static void
 490 peel_loop_completely (struct loops *loops, struct loop *loop)
 491 {
 492   sbitmap wont_exit;
 493   unsigned HOST_WIDE_INT npeel;
 494   unsigned n_remove_edges, i;
 495   edge *remove_edges, ein;
 496   struct niter_desc *desc = get_simple_loop_desc (loop);
 497   struct opt_info *opt_info = NULL;
 498
 499   npeel = desc->niter;
 500
 501   if (npeel)
 502     {
 503       bool ok;
 504
 505       wont_exit = sbitmap_alloc (npeel + 1);
 506       sbitmap_ones (wont_exit);
 507       RESET_BIT (wont_exit, 0);
 508       if (desc->noloop_assumptions)
 509         RESET_BIT (wont_exit, 1);
 510
 511       remove_edges = xcalloc (npeel, sizeof (edge));
 512       n_remove_edges = 0;
 513
 514       if (flag_split_ivs_in_unroller)
 515         opt_info = analyze_insns_in_loop (loop);
 516
 517       opt_info_start_duplication (opt_info);
 518       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 519                                           loops, npeel,
 520                                           wont_exit, desc->out_edge,
 521                                           remove_edges, &n_remove_edges,
 522                                           DLTHE_FLAG_UPDATE_FREQ
 523                                           | (opt_info
 524                                              ? DLTHE_RECORD_COPY_NUMBER : 0));
 525       gcc_assert (ok);
 526
 527       free (wont_exit);
 528
 529       if (opt_info)
 530         {
 531           apply_opt_in_copies (opt_info, npeel, false, true);
 532           free_opt_info (opt_info);
 533         }
 534
 535       /* Remove the exit edges.  */
 536       for (i = 0; i < n_remove_edges; i++)
 537         remove_path (loops, remove_edges[i]);
 538       free (remove_edges);
 539     }
 540
 541   ein = desc->in_edge;
 542   free_simple_loop_desc (loop);
 543
 544   /* Now remove the unreachable part of the last iteration and cancel
 545      the loop.  */
 546   remove_path (loops, ein);
 547
 548   if (dump_file)
 549     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
 550 }
 551
 552 /* Decide whether to unroll LOOP iterating constant number of times
 553    and how much.  */
 554
 555 static void
 556 decide_unroll_constant_iterations (struct loop *loop, int flags)
 557 {
 558   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 559   struct niter_desc *desc;
 560
 561   if (!(flags & UAP_UNROLL))
 562     {
 563       /* We were not asked to, just return back silently.  */
 564       return;
 565     }
 566
 567   if (dump_file)
 568     fprintf (dump_file,
 569              "\n;; Considering unrolling loop with constant "
 570              "number of iterations\n");
 571
 572   /* nunroll = total number of copies of the original loop body in
 573      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 574   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 575   nunroll_by_av
 576     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 577   if (nunroll > nunroll_by_av)
 578     nunroll = nunroll_by_av;
 579   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 580     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 581
 582   /* Skip big loops.  */
 583   if (nunroll <= 1)
 584     {
 585       if (dump_file)
 586         fprintf (dump_file, ";; Not considering loop, is too big\n");
 587       return;
 588     }
 589
 590   /* Check for simple loops.  */
 591   desc = get_simple_loop_desc (loop);
 592
 593   /* Check number of iterations.  */
 594   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 595     {
 596       if (dump_file)
 597         fprintf (dump_file,
 598                  ";; Unable to prove that the loop iterates constant times\n");
 599       return;
 600     }
 601
 602   /* Check whether the loop rolls enough to consider.  */
 603   if (desc->niter < 2 * nunroll)
 604     {
 605       if (dump_file)
 606         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 607       return;
 608     }
 609
 610   /* Success; now compute number of iterations to unroll.  We alter
 611      nunroll so that as few as possible copies of loop body are
 612      necessary, while still not decreasing the number of unrollings
 613      too much (at most by 1).  */
 614   best_copies = 2 * nunroll + 10;
 615
 616   i = 2 * nunroll + 2;
 617   if (i - 1 >= desc->niter)
 618     i = desc->niter - 2;
 619
 620   for (; i >= nunroll - 1; i--)
 621     {
 622       unsigned exit_mod = desc->niter % (i + 1);
 623
 624       if (!loop_exit_at_end_p (loop))
 625         n_copies = exit_mod + i + 1;
 626       else if (exit_mod != (unsigned) i
 627                || desc->noloop_assumptions != NULL_RTX)
 628         n_copies = exit_mod + i + 2;
 629       else
 630         n_copies = i + 1;
 631
 632       if (n_copies < best_copies)
 633         {
 634           best_copies = n_copies;
 635           best_unroll = i;
 636         }
 637     }
 638
 639   if (dump_file)
 640     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
 641              best_unroll + 1, best_copies, nunroll);
 642
 643   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 644   loop->lpt_decision.times = best_unroll;
 645
 646   if (dump_file)
 647     fprintf (dump_file,
 648              ";; Decided to unroll the constant times rolling loop, %d times.\n",
 649              loop->lpt_decision.times);
 650 }
 651
 652 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
 653    times.  The transformation does this:
 654
 655    for (i = 0; i < 102; i++)
 656      body;
 657
 658    ==>
 659
 660    i = 0;
 661    body; i++;
 662    body; i++;
 663    while (i < 102)
 664      {
 665        body; i++;
 666        body; i++;
 667        body; i++;
 668        body; i++;
 669      }
 670   */
 671 static void
 672 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
 673 {
 674   unsigned HOST_WIDE_INT niter;
 675   unsigned exit_mod;
 676   sbitmap wont_exit;
 677   unsigned n_remove_edges, i;
 678   edge *remove_edges;
 679   unsigned max_unroll = loop->lpt_decision.times;
 680   struct niter_desc *desc = get_simple_loop_desc (loop);
 681   bool exit_at_end = loop_exit_at_end_p (loop);
 682   struct opt_info *opt_info = NULL;
 683   bool ok;
 684
 685   niter = desc->niter;
 686
 687   /* Should not get here (such loop should be peeled instead).  */
 688   gcc_assert (niter > max_unroll + 1);
 689
 690   exit_mod = niter % (max_unroll + 1);
 691
 692   wont_exit = sbitmap_alloc (max_unroll + 1);
 693   sbitmap_ones (wont_exit);
 694
 695   remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
 696   n_remove_edges = 0;
 697   if (flag_split_ivs_in_unroller
 698       || flag_variable_expansion_in_unroller)
 699     opt_info = analyze_insns_in_loop (loop);
 700
 701   if (!exit_at_end)
 702     {
 703       /* The exit is not at the end of the loop; leave exit test
 704          in the first copy, so that the loops that start with test
 705          of exit condition have continuous body after unrolling.  */
 706
 707       if (dump_file)
 708         fprintf (dump_file, ";; Condition on beginning of loop.\n");
 709
 710       /* Peel exit_mod iterations.  */
 711       RESET_BIT (wont_exit, 0);
 712       if (desc->noloop_assumptions)
 713         RESET_BIT (wont_exit, 1);
 714
 715       if (exit_mod)
 716         {
 717           opt_info_start_duplication (opt_info);
 718           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 719                                               loops, exit_mod,
 720                                               wont_exit, desc->out_edge,
 721                                               remove_edges, &n_remove_edges,
 722                                               DLTHE_FLAG_UPDATE_FREQ
 723                                               | (opt_info && exit_mod > 1
 724                                                  ? DLTHE_RECORD_COPY_NUMBER
 725                                                    : 0));
 726           gcc_assert (ok);
 727
 728           if (opt_info && exit_mod > 1)
 729             apply_opt_in_copies (opt_info, exit_mod, false, false);
 730
 731           desc->noloop_assumptions = NULL_RTX;
 732           desc->niter -= exit_mod;
 733           desc->niter_max -= exit_mod;
 734         }
 735
 736       SET_BIT (wont_exit, 1);
 737     }
 738   else
 739     {
 740       /* Leave exit test in last copy, for the same reason as above if
 741          the loop tests the condition at the end of loop body.  */
 742
 743       if (dump_file)
 744         fprintf (dump_file, ";; Condition on end of loop.\n");
 745
 746       /* We know that niter >= max_unroll + 2; so we do not need to care of
 747          case when we would exit before reaching the loop.  So just peel
 748          exit_mod + 1 iterations.  */
 749       if (exit_mod != max_unroll
 750           || desc->noloop_assumptions)
 751         {
 752           RESET_BIT (wont_exit, 0);
 753           if (desc->noloop_assumptions)
 754             RESET_BIT (wont_exit, 1);
 755
 756           opt_info_start_duplication (opt_info);
 757           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 758                                               loops, exit_mod + 1,
 759                                               wont_exit, desc->out_edge,
 760                                               remove_edges, &n_remove_edges,
 761                                               DLTHE_FLAG_UPDATE_FREQ
 762                                               | (opt_info && exit_mod > 0
 763                                                  ? DLTHE_RECORD_COPY_NUMBER
 764                                                    : 0));
 765           gcc_assert (ok);
 766
 767           if (opt_info && exit_mod > 0)
 768             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 769
 770           desc->niter -= exit_mod + 1;
 771           desc->niter_max -= exit_mod + 1;
 772           desc->noloop_assumptions = NULL_RTX;
 773
 774           SET_BIT (wont_exit, 0);
 775           SET_BIT (wont_exit, 1);
 776         }
 777
 778       RESET_BIT (wont_exit, max_unroll);
 779     }
 780
 781   /* Now unroll the loop.  */
 782
 783   opt_info_start_duplication (opt_info);
 784   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 785                                       loops, max_unroll,
 786                                       wont_exit, desc->out_edge,
 787                                       remove_edges, &n_remove_edges,
 788                                       DLTHE_FLAG_UPDATE_FREQ
 789                                       | (opt_info
 790                                          ? DLTHE_RECORD_COPY_NUMBER
 791                                            : 0));
 792   gcc_assert (ok);
 793
 794   if (opt_info)
 795     {
 796       apply_opt_in_copies (opt_info, max_unroll, true, true);
 797       free_opt_info (opt_info);
 798     }
 799
 800   free (wont_exit);
 801
 802   if (exit_at_end)
 803     {
 804       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 805       /* Find a new in and out edge; they are in the last copy we have made.  */
 806
 807       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 808         {
 809           desc->out_edge = EDGE_SUCC (exit_block, 0);
 810           desc->in_edge = EDGE_SUCC (exit_block, 1);
 811         }
 812       else
 813         {
 814           desc->out_edge = EDGE_SUCC (exit_block, 1);
 815           desc->in_edge = EDGE_SUCC (exit_block, 0);
 816         }
 817     }
 818
 819   desc->niter /= max_unroll + 1;
 820   desc->niter_max /= max_unroll + 1;
 821   desc->niter_expr = GEN_INT (desc->niter);
 822
 823   /* Remove the edges.  */
 824   for (i = 0; i < n_remove_edges; i++)
 825     remove_path (loops, remove_edges[i]);
 826   free (remove_edges);
 827
 828   if (dump_file)
 829     fprintf (dump_file,
 830              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 831              max_unroll, num_loop_insns (loop));
 832 }
 833
 834 /* Decide whether to unroll LOOP iterating runtime computable number of times
 835    and how much.  */
 836 static void
 837 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 838 {
 839   unsigned nunroll, nunroll_by_av, i;
 840   struct niter_desc *desc;
 841
 842   if (!(flags & UAP_UNROLL))
 843     {
 844       /* We were not asked to, just return back silently.  */
 845       return;
 846     }
 847
 848   if (dump_file)
 849     fprintf (dump_file,
 850              "\n;; Considering unrolling loop with runtime "
 851              "computable number of iterations\n");
 852
 853   /* nunroll = total number of copies of the original loop body in
 854      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 855   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 856   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 857   if (nunroll > nunroll_by_av)
 858     nunroll = nunroll_by_av;
 859   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 860     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 861
 862   /* Skip big loops.  */
 863   if (nunroll <= 1)
 864     {
 865       if (dump_file)
 866         fprintf (dump_file, ";; Not considering loop, is too big\n");
 867       return;
 868     }
 869
 870   /* Check for simple loops.  */
 871   desc = get_simple_loop_desc (loop);
 872
 873   /* Check simpleness.  */
 874   if (!desc->simple_p || desc->assumptions)
 875     {
 876       if (dump_file)
 877         fprintf (dump_file,
 878                  ";; Unable to prove that the number of iterations "
 879                  "can be counted in runtime\n");
 880       return;
 881     }
 882
 883   if (desc->const_iter)
 884     {
 885       if (dump_file)
 886         fprintf (dump_file, ";; Loop iterates constant times\n");
 887       return;
 888     }
 889
 890   /* If we have profile feedback, check whether the loop rolls.  */
 891   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
 892     {
 893       if (dump_file)
 894         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 895       return;
 896     }
 897
 898   /* Success; now force nunroll to be power of 2, as we are unable to
 899      cope with overflows in computation of number of iterations.  */
 900   for (i = 1; 2 * i <= nunroll; i *= 2)
 901     continue;
 902
 903   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 904   loop->lpt_decision.times = i - 1;
 905
 906   if (dump_file)
 907     fprintf (dump_file,
 908              ";; Decided to unroll the runtime computable "
 909              "times rolling loop, %d times.\n",
 910              loop->lpt_decision.times);
 911 }
 912
 913 /* Unroll LOOP for that we are able to count number of iterations in runtime
 914    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
 915    extra care for case n < 0):
 916
 917    for (i = 0; i < n; i++)
 918      body;
 919
 920    ==>
 921
 922    i = 0;
 923    mod = n % 4;
 924
 925    switch (mod)
 926      {
 927        case 3:
 928          body; i++;
 929        case 2:
 930          body; i++;
 931        case 1:
 932          body; i++;
 933        case 0: ;
 934      }
 935
 936    while (i < n)
 937      {
 938        body; i++;
 939        body; i++;
 940        body; i++;
 941        body; i++;
 942      }
 943    */
 944 static void
 945 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
 946 {
 947   rtx old_niter, niter, init_code, branch_code, tmp;
 948   unsigned i, j, p;
 949   basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
 950   unsigned n_dom_bbs;
 951   sbitmap wont_exit;
 952   int may_exit_copy;
 953   unsigned n_peel, n_remove_edges;
 954   edge *remove_edges, e;
 955   bool extra_zero_check, last_may_exit;
 956   unsigned max_unroll = loop->lpt_decision.times;
 957   struct niter_desc *desc = get_simple_loop_desc (loop);
 958   bool exit_at_end = loop_exit_at_end_p (loop);
 959   struct opt_info *opt_info = NULL;
 960   bool ok;
 961
 962   if (flag_split_ivs_in_unroller
 963       || flag_variable_expansion_in_unroller)
 964     opt_info = analyze_insns_in_loop (loop);
 965
 966   /* Remember blocks whose dominators will have to be updated.  */
 967   dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
 968   n_dom_bbs = 0;
 969
 970   body = get_loop_body (loop);
 971   for (i = 0; i < loop->num_nodes; i++)
 972     {
 973       unsigned nldom;
 974       basic_block *ldom;
 975
 976       nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
 977       for (j = 0; j < nldom; j++)
 978         if (!flow_bb_inside_loop_p (loop, ldom[j]))
 979           dom_bbs[n_dom_bbs++] = ldom[j];
 980
 981       free (ldom);
 982     }
 983   free (body);
 984
 985   if (!exit_at_end)
 986     {
 987       /* Leave exit in first copy (for explanation why see comment in
 988          unroll_loop_constant_iterations).  */
 989       may_exit_copy = 0;
 990       n_peel = max_unroll - 1;
 991       extra_zero_check = true;
 992       last_may_exit = false;
 993     }
 994   else
 995     {
 996       /* Leave exit in last copy (for explanation why see comment in
 997          unroll_loop_constant_iterations).  */
 998       may_exit_copy = max_unroll;
 999       n_peel = max_unroll;
1000       extra_zero_check = false;
1001       last_may_exit = true;
1002     }
1003
1004   /* Get expression for number of iterations.  */
1005   start_sequence ();
1006   old_niter = niter = gen_reg_rtx (desc->mode);
1007   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
1008   if (tmp != niter)
1009     emit_move_insn (niter, tmp);
1010
1011   /* Count modulo by ANDing it with max_unroll; we use the fact that
1012      the number of unrollings is a power of two, and thus this is correct
1013      even if there is overflow in the computation.  */
1014   niter = expand_simple_binop (desc->mode, AND,
1015                                niter,
1016                                GEN_INT (max_unroll),
1017                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
1018
1019   init_code = get_insns ();
1020   end_sequence ();
1021
1022   /* Precondition the loop.  */
1023   loop_split_edge_with (loop_preheader_edge (loop), init_code);
1024
1025   remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
1026   n_remove_edges = 0;
1027
1028   wont_exit = sbitmap_alloc (max_unroll + 2);
1029
1030   /* Peel the first copy of loop body (almost always we must leave exit test
1031      here; the only exception is when we have extra zero check and the number
1032      of iterations is reliable.  Also record the place of (possible) extra
1033      zero check.  */
1034   sbitmap_zero (wont_exit);
1035   if (extra_zero_check
1036       && !desc->noloop_assumptions)
1037     SET_BIT (wont_exit, 1);
1038   ezc_swtch = loop_preheader_edge (loop)->src;
1039   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1040                                       loops, 1,
1041                                       wont_exit, desc->out_edge,
1042                                       remove_edges, &n_remove_edges,
1043                                       DLTHE_FLAG_UPDATE_FREQ);
1044   gcc_assert (ok);
1045
1046   /* Record the place where switch will be built for preconditioning.  */
1047   swtch = loop_split_edge_with (loop_preheader_edge (loop),
1048                                 NULL_RTX);
1049
1050   for (i = 0; i < n_peel; i++)
1051     {
1052       /* Peel the copy.  */
1053       sbitmap_zero (wont_exit);
1054       if (i != n_peel - 1 || !last_may_exit)
1055         SET_BIT (wont_exit, 1);
1056       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1057                                           loops, 1,
1058                                           wont_exit, desc->out_edge,
1059                                           remove_edges, &n_remove_edges,
1060                                           DLTHE_FLAG_UPDATE_FREQ);
1061       gcc_assert (ok);
1062
1063       /* Create item for switch.  */
1064       j = n_peel - i - (extra_zero_check ? 0 : 1);
1065       p = REG_BR_PROB_BASE / (i + 2);
1066
1067       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1068       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1069                                           block_label (preheader), p,
1070                                           NULL_RTX);
1071
1072       swtch = loop_split_edge_with (single_pred_edge (swtch), branch_code);
1073       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1074       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1075       e = make_edge (swtch, preheader,
1076                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1077       e->probability = p;
1078     }
1079
1080   if (extra_zero_check)
1081     {
1082       /* Add branch for zero iterations.  */
1083       p = REG_BR_PROB_BASE / (max_unroll + 1);
1084       swtch = ezc_swtch;
1085       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1086       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1087                                           block_label (preheader), p,
1088                                           NULL_RTX);
1089
1090       swtch = loop_split_edge_with (single_succ_edge (swtch), branch_code);
1091       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1092       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1093       e = make_edge (swtch, preheader,
1094                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1095       e->probability = p;
1096     }
1097
1098   /* Recount dominators for outer blocks.  */
1099   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1100
1101   /* And unroll loop.  */
1102
1103   sbitmap_ones (wont_exit);
1104   RESET_BIT (wont_exit, may_exit_copy);
1105   opt_info_start_duplication (opt_info);
1106
1107   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1108                                       loops, max_unroll,
1109                                       wont_exit, desc->out_edge,
1110                                       remove_edges, &n_remove_edges,
1111                                       DLTHE_FLAG_UPDATE_FREQ
1112                                       | (opt_info
1113                                          ? DLTHE_RECORD_COPY_NUMBER
1114                                            : 0));
1115   gcc_assert (ok);
1116
1117   if (opt_info)
1118     {
1119       apply_opt_in_copies (opt_info, max_unroll, true, true);
1120       free_opt_info (opt_info);
1121     }
1122
1123   free (wont_exit);
1124
1125   if (exit_at_end)
1126     {
1127       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1128       /* Find a new in and out edge; they are in the last copy we have
1129          made.  */
1130
1131       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1132         {
1133           desc->out_edge = EDGE_SUCC (exit_block, 0);
1134           desc->in_edge = EDGE_SUCC (exit_block, 1);
1135         }
1136       else
1137         {
1138           desc->out_edge = EDGE_SUCC (exit_block, 1);
1139           desc->in_edge = EDGE_SUCC (exit_block, 0);
1140         }
1141     }
1142
1143   /* Remove the edges.  */
1144   for (i = 0; i < n_remove_edges; i++)
1145     remove_path (loops, remove_edges[i]);
1146   free (remove_edges);
1147
1148   /* We must be careful when updating the number of iterations due to
1149      preconditioning and the fact that the value must be valid at entry
1150      of the loop.  After passing through the above code, we see that
1151      the correct new number of iterations is this:  */
1152   gcc_assert (!desc->const_iter);
1153   desc->niter_expr =
1154     simplify_gen_binary (UDIV, desc->mode, old_niter,
1155                          GEN_INT (max_unroll + 1));
1156   desc->niter_max /= max_unroll + 1;
1157   if (exit_at_end)
1158     {
1159       desc->niter_expr =
1160         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1161       desc->noloop_assumptions = NULL_RTX;
1162       desc->niter_max--;
1163     }
1164
1165   if (dump_file)
1166     fprintf (dump_file,
1167              ";; Unrolled loop %d times, counting # of iterations "
1168              "in runtime, %i insns\n",
1169              max_unroll, num_loop_insns (loop));
1170 }
1171
1172 /* Decide whether to simply peel LOOP and how much.  */
1173 static void
1174 decide_peel_simple (struct loop *loop, int flags)
1175 {
1176   unsigned npeel;
1177   struct niter_desc *desc;
1178
1179   if (!(flags & UAP_PEEL))
1180     {
1181       /* We were not asked to, just return back silently.  */
1182       return;
1183     }
1184
1185   if (dump_file)
1186     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1187
1188   /* npeel = number of iterations to peel.  */
1189   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1190   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1191     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1192
1193   /* Skip big loops.  */
1194   if (!npeel)
1195     {
1196       if (dump_file)
1197         fprintf (dump_file, ";; Not considering loop, is too big\n");
1198       return;
1199     }
1200
1201   /* Check for simple loops.  */
1202   desc = get_simple_loop_desc (loop);
1203
1204   /* Check number of iterations.  */
1205   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1206     {
1207       if (dump_file)
1208         fprintf (dump_file, ";; Loop iterates constant times\n");
1209       return;
1210     }
1211
1212   /* Do not simply peel loops with branches inside -- it increases number
1213      of mispredicts.  */
1214   if (num_loop_branches (loop) > 1)
1215     {
1216       if (dump_file)
1217         fprintf (dump_file, ";; Not peeling, contains branches\n");
1218       return;
1219     }
1220
1221   if (loop->header->count)
1222     {
1223       unsigned niter = expected_loop_iterations (loop);
1224       if (niter + 1 > npeel)
1225         {
1226           if (dump_file)
1227             {
1228               fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1229               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1230                        (HOST_WIDEST_INT) (niter + 1));
1231               fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1232                        npeel);
1233             }
1234           return;
1235         }
1236       npeel = niter + 1;
1237     }
1238   else
1239     {
1240       /* For now we have no good heuristics to decide whether loop peeling
1241          will be effective, so disable it.  */
1242       if (dump_file)
1243         fprintf (dump_file,
1244                  ";; Not peeling loop, no evidence it will be profitable\n");
1245       return;
1246     }
1247
1248   /* Success.  */
1249   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1250   loop->lpt_decision.times = npeel;
1251
1252   if (dump_file)
1253     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1254              loop->lpt_decision.times);
1255 }
1256
1257 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1258    while (cond)
1259      body;
1260
1261    ==>
1262
1263    if (!cond) goto end;
1264    body;
1265    if (!cond) goto end;
1266    body;
1267    while (cond)
1268      body;
1269    end: ;
1270    */
1271 static void
1272 peel_loop_simple (struct loops *loops, struct loop *loop)
1273 {
1274   sbitmap wont_exit;
1275   unsigned npeel = loop->lpt_decision.times;
1276   struct niter_desc *desc = get_simple_loop_desc (loop);
1277   struct opt_info *opt_info = NULL;
1278   bool ok;
1279
1280   if (flag_split_ivs_in_unroller && npeel > 1)
1281     opt_info = analyze_insns_in_loop (loop);
1282
1283   wont_exit = sbitmap_alloc (npeel + 1);
1284   sbitmap_zero (wont_exit);
1285
1286   opt_info_start_duplication (opt_info);
1287
1288   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1289                                       loops, npeel, wont_exit,
1290                                       NULL, NULL,
1291                                       NULL, DLTHE_FLAG_UPDATE_FREQ
1292                                       | (opt_info
1293                                          ? DLTHE_RECORD_COPY_NUMBER
1294                                            : 0));
1295   gcc_assert (ok);
1296
1297   free (wont_exit);
1298
1299   if (opt_info)
1300     {
1301       apply_opt_in_copies (opt_info, npeel, false, false);
1302       free_opt_info (opt_info);
1303     }
1304
1305   if (desc->simple_p)
1306     {
1307       if (desc->const_iter)
1308         {
1309           desc->niter -= npeel;
1310           desc->niter_expr = GEN_INT (desc->niter);
1311           desc->noloop_assumptions = NULL_RTX;
1312         }
1313       else
1314         {
1315           /* We cannot just update niter_expr, as its value might be clobbered
1316              inside loop.  We could handle this by counting the number into
1317              temporary just like we do in runtime unrolling, but it does not
1318              seem worthwhile.  */
1319           free_simple_loop_desc (loop);
1320         }
1321     }
1322   if (dump_file)
1323     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1324 }
1325
1326 /* Decide whether to unroll LOOP stupidly and how much.  */
1327 static void
1328 decide_unroll_stupid (struct loop *loop, int flags)
1329 {
1330   unsigned nunroll, nunroll_by_av, i;
1331   struct niter_desc *desc;
1332
1333   if (!(flags & UAP_UNROLL_ALL))
1334     {
1335       /* We were not asked to, just return back silently.  */
1336       return;
1337     }
1338
1339   if (dump_file)
1340     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1341
1342   /* nunroll = total number of copies of the original loop body in
1343      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1344   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1345   nunroll_by_av
1346     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1347   if (nunroll > nunroll_by_av)
1348     nunroll = nunroll_by_av;
1349   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1350     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1351
1352   /* Skip big loops.  */
1353   if (nunroll <= 1)
1354     {
1355       if (dump_file)
1356         fprintf (dump_file, ";; Not considering loop, is too big\n");
1357       return;
1358     }
1359
1360   /* Check for simple loops.  */
1361   desc = get_simple_loop_desc (loop);
1362
1363   /* Check simpleness.  */
1364   if (desc->simple_p && !desc->assumptions)
1365     {
1366       if (dump_file)
1367         fprintf (dump_file, ";; The loop is simple\n");
1368       return;
1369     }
1370
1371   /* Do not unroll loops with branches inside -- it increases number
1372      of mispredicts.  */
1373   if (num_loop_branches (loop) > 1)
1374     {
1375       if (dump_file)
1376         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1377       return;
1378     }
1379
1380   /* If we have profile feedback, check whether the loop rolls.  */
1381   if (loop->header->count
1382       && expected_loop_iterations (loop) < 2 * nunroll)
1383     {
1384       if (dump_file)
1385         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1386       return;
1387     }
1388
1389   /* Success.  Now force nunroll to be power of 2, as it seems that this
1390      improves results (partially because of better alignments, partially
1391      because of some dark magic).  */
1392   for (i = 1; 2 * i <= nunroll; i *= 2)
1393     continue;
1394
1395   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1396   loop->lpt_decision.times = i - 1;
1397
1398   if (dump_file)
1399     fprintf (dump_file,
1400              ";; Decided to unroll the loop stupidly, %d times.\n",
1401              loop->lpt_decision.times);
1402 }
1403
1404 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1405    while (cond)
1406      body;
1407
1408    ==>
1409
1410    while (cond)
1411      {
1412        body;
1413        if (!cond) break;
1414        body;
1415        if (!cond) break;
1416        body;
1417        if (!cond) break;
1418        body;
1419      }
1420    */
1421 static void
1422 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1423 {
1424   sbitmap wont_exit;
1425   unsigned nunroll = loop->lpt_decision.times;
1426   struct niter_desc *desc = get_simple_loop_desc (loop);
1427   struct opt_info *opt_info = NULL;
1428   bool ok;
1429
1430   if (flag_split_ivs_in_unroller
1431       || flag_variable_expansion_in_unroller)
1432     opt_info = analyze_insns_in_loop (loop);
1433
1434
1435   wont_exit = sbitmap_alloc (nunroll + 1);
1436   sbitmap_zero (wont_exit);
1437   opt_info_start_duplication (opt_info);
1438
1439   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1440                                       loops, nunroll, wont_exit,
1441                                       NULL, NULL, NULL,
1442                                       DLTHE_FLAG_UPDATE_FREQ
1443                                       | (opt_info
1444                                          ? DLTHE_RECORD_COPY_NUMBER
1445                                            : 0));
1446   gcc_assert (ok);
1447
1448   if (opt_info)
1449     {
1450       apply_opt_in_copies (opt_info, nunroll, true, true);
1451       free_opt_info (opt_info);
1452     }
1453
1454   free (wont_exit);
1455
1456   if (desc->simple_p)
1457     {
1458       /* We indeed may get here provided that there are nontrivial assumptions
1459          for a loop to be really simple.  We could update the counts, but the
1460          problem is that we are unable to decide which exit will be taken
1461          (not really true in case the number of iterations is constant,
1462          but noone will do anything with this information, so we do not
1463          worry about it).  */
1464       desc->simple_p = false;
1465     }
1466
1467   if (dump_file)
1468     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1469              nunroll, num_loop_insns (loop));
1470 }
1471
1472 /* A hash function for information about insns to split.  */
1473
1474 static hashval_t
1475 si_info_hash (const void *ivts)
1476 {
1477   return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
1478 }
1479
1480 /* An equality functions for information about insns to split.  */
1481
1482 static int
1483 si_info_eq (const void *ivts1, const void *ivts2)
1484 {
1485   const struct iv_to_split *i1 = ivts1;
1486   const struct iv_to_split *i2 = ivts2;
1487
1488   return i1->insn == i2->insn;
1489 }
1490
1491 /* Return a hash for VES, which is really a "var_to_expand *".  */
1492
1493 static hashval_t
1494 ve_info_hash (const void *ves)
1495 {
1496   return htab_hash_pointer (((struct var_to_expand *) ves)->insn);
1497 }
1498
1499 /* Return true if IVTS1 and IVTS2 (which are really both of type
1500    "var_to_expand *") refer to the same instruction.  */
1501
1502 static int
1503 ve_info_eq (const void *ivts1, const void *ivts2)
1504 {
1505   const struct var_to_expand *i1 = ivts1;
1506   const struct var_to_expand *i2 = ivts2;
1507
1508   return i1->insn == i2->insn;
1509 }
1510
1511 /* Returns true if REG is referenced in one insn in LOOP.  */
1512
1513 bool
1514 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1515 {
1516   basic_block *body, bb;
1517   unsigned i;
1518   int count_ref = 0;
1519   rtx insn;
1520
1521   body = get_loop_body (loop);
1522   for (i = 0; i < loop->num_nodes; i++)
1523     {
1524       bb = body[i];
1525
1526       FOR_BB_INSNS (bb, insn)
1527       {
1528         if (rtx_referenced_p (reg, insn))
1529           count_ref++;
1530       }
1531     }
1532   return (count_ref  == 1);
1533 }
1534
1535 /* Determine whether INSN contains an accumulator
1536    which can be expanded into separate copies,
1537    one for each copy of the LOOP body.
1538
1539    for (i = 0 ; i < n; i++)
1540      sum += a[i];
1541
1542    ==>
1543
1544    sum += a[i]
1545    ....
1546    i = i+1;
1547    sum1 += a[i]
1548    ....
1549    i = i+1
1550    sum2 += a[i];
1551    ....
1552
1553    Return NULL if INSN contains no opportunity for expansion of accumulator.
1554    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1555    information and return a pointer to it.
1556 */
1557
1558 static struct var_to_expand *
1559 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1560 {
1561   rtx set, dest, src, op1;
1562   struct var_to_expand *ves;
1563   enum machine_mode mode1, mode2;
1564
1565   set = single_set (insn);
1566   if (!set)
1567     return NULL;
1568
1569   dest = SET_DEST (set);
1570   src = SET_SRC (set);
1571
1572   if (GET_CODE (src) != PLUS
1573       && GET_CODE (src) != MINUS
1574       && GET_CODE (src) != MULT)
1575     return NULL;
1576
1577   if (!XEXP (src, 0))
1578     return NULL;
1579
1580   op1 = XEXP (src, 0);
1581
1582   if (!REG_P (dest)
1583       && !(GET_CODE (dest) == SUBREG
1584            && REG_P (SUBREG_REG (dest))))
1585     return NULL;
1586
1587   if (!rtx_equal_p (dest, op1))
1588     return NULL;
1589
1590   if (!referenced_in_one_insn_in_loop_p (loop, dest))
1591     return NULL;
1592
1593   if (rtx_referenced_p (dest, XEXP (src, 1)))
1594     return NULL;
1595
1596   mode1 = GET_MODE (dest);
1597   mode2 = GET_MODE (XEXP (src, 1));
1598   if ((FLOAT_MODE_P (mode1)
1599        || FLOAT_MODE_P (mode2))
1600       && !flag_unsafe_math_optimizations)
1601     return NULL;
1602
1603   /* Record the accumulator to expand.  */
1604   ves = xmalloc (sizeof (struct var_to_expand));
1605   ves->insn = insn;
1606   ves->var_expansions = VEC_alloc (rtx, heap, 1);
1607   ves->reg = copy_rtx (dest);
1608   ves->op = GET_CODE (src);
1609   ves->expansion_count = 0;
1610   ves->reuse_expansion = 0;
1611   return ves;
1612 }
1613
1614 /* Determine whether there is an induction variable in INSN that
1615    we would like to split during unrolling.
1616
1617    I.e. replace
1618
1619    i = i + 1;
1620    ...
1621    i = i + 1;
1622    ...
1623    i = i + 1;
1624    ...
1625
1626    type chains by
1627
1628    i0 = i + 1
1629    ...
1630    i = i0 + 1
1631    ...
1632    i = i0 + 2
1633    ...
1634
1635    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1636    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1637    pointer to it.  */
1638
1639 static struct iv_to_split *
1640 analyze_iv_to_split_insn (rtx insn)
1641 {
1642   rtx set, dest;
1643   struct rtx_iv iv;
1644   struct iv_to_split *ivts;
1645   bool ok;
1646
1647   /* For now we just split the basic induction variables.  Later this may be
1648      extended for example by selecting also addresses of memory references.  */
1649   set = single_set (insn);
1650   if (!set)
1651     return NULL;
1652
1653   dest = SET_DEST (set);
1654   if (!REG_P (dest))
1655     return NULL;
1656
1657   if (!biv_p (insn, dest))
1658     return NULL;
1659
1660   ok = iv_analyze (insn, dest, &iv);
1661   gcc_assert (ok);
1662
1663   if (iv.step == const0_rtx
1664       || iv.mode != iv.extend_mode)
1665     return NULL;
1666
1667   /* Record the insn to split.  */
1668   ivts = xmalloc (sizeof (struct iv_to_split));
1669   ivts->insn = insn;
1670   ivts->base_var = NULL_RTX;
1671   ivts->step = iv.step;
1672   ivts->n_loc = 1;
1673   ivts->loc[0] = 1;
1674
1675   return ivts;
1676 }
1677
1678 /* Determines which of insns in LOOP can be optimized.
1679    Return a OPT_INFO struct with the relevant hash tables filled
1680    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1681    is undefined for the return value.  */
1682
1683 static struct opt_info *
1684 analyze_insns_in_loop (struct loop *loop)
1685 {
1686   basic_block *body, bb;
1687   unsigned i, num_edges = 0;
1688   struct opt_info *opt_info = xcalloc (1, sizeof (struct opt_info));
1689   rtx insn;
1690   struct iv_to_split *ivts = NULL;
1691   struct var_to_expand *ves = NULL;
1692   PTR *slot1;
1693   PTR *slot2;
1694   edge *edges = get_loop_exit_edges (loop, &num_edges);
1695   bool can_apply = false;
1696
1697   iv_analysis_loop_init (loop);
1698
1699   body = get_loop_body (loop);
1700
1701   if (flag_split_ivs_in_unroller)
1702     opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1703                                             si_info_hash, si_info_eq, free);
1704
1705   /* Record the loop exit bb and loop preheader before the unrolling.  */
1706   if (!loop_preheader_edge (loop)->src)
1707     {
1708       loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1709       opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1710     }
1711   else
1712     opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1713
1714   if (num_edges == 1
1715       && !(edges[0]->flags & EDGE_COMPLEX))
1716     {
1717       opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
1718       can_apply = true;
1719     }
1720
1721   if (flag_variable_expansion_in_unroller
1722       && can_apply)
1723     opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1724                                                       ve_info_hash, ve_info_eq, free);
1725
1726   for (i = 0; i < loop->num_nodes; i++)
1727     {
1728       bb = body[i];
1729       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1730         continue;
1731
1732       FOR_BB_INSNS (bb, insn)
1733       {
1734         if (!INSN_P (insn))
1735           continue;
1736
1737         if (opt_info->insns_to_split)
1738           ivts = analyze_iv_to_split_insn (insn);
1739
1740         if (ivts)
1741           {
1742             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1743             *slot1 = ivts;
1744             continue;
1745           }
1746
1747         if (opt_info->insns_with_var_to_expand)
1748           ves = analyze_insn_to_expand_var (loop, insn);
1749
1750         if (ves)
1751           {
1752             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1753             *slot2 = ves;
1754           }
1755       }
1756     }
1757
1758   free (edges);
1759   free (body);
1760   return opt_info;
1761 }
1762
1763 /* Called just before loop duplication.  Records start of duplicated area
1764    to OPT_INFO.  */
1765
1766 static void
1767 opt_info_start_duplication (struct opt_info *opt_info)
1768 {
1769   if (opt_info)
1770     opt_info->first_new_block = last_basic_block;
1771 }
1772
1773 /* Determine the number of iterations between initialization of the base
1774    variable and the current copy (N_COPY).  N_COPIES is the total number
1775    of newly created copies.  UNROLLING is true if we are unrolling
1776    (not peeling) the loop.  */
1777
1778 static unsigned
1779 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1780 {
1781   if (unrolling)
1782     {
1783       /* If we are unrolling, initialization is done in the original loop
1784          body (number 0).  */
1785       return n_copy;
1786     }
1787   else
1788     {
1789       /* If we are peeling, the copy in that the initialization occurs has
1790          number 1.  The original loop (number 0) is the last.  */
1791       if (n_copy)
1792         return n_copy - 1;
1793       else
1794         return n_copies;
1795     }
1796 }
1797
1798 /* Locate in EXPR the expression corresponding to the location recorded
1799    in IVTS, and return a pointer to the RTX for this location.  */
1800
1801 static rtx *
1802 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1803 {
1804   unsigned i;
1805   rtx *ret = &expr;
1806
1807   for (i = 0; i < ivts->n_loc; i++)
1808     ret = &XEXP (*ret, ivts->loc[i]);
1809
1810   return ret;
1811 }
1812
1813 /* Allocate basic variable for the induction variable chain.  Callback for
1814    htab_traverse.  */
1815
1816 static int
1817 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1818 {
1819   struct iv_to_split *ivts = *slot;
1820   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1821
1822   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1823
1824   return 1;
1825 }
1826
1827 /* Insert initialization of basic variable of IVTS before INSN, taking
1828    the initial value from INSN.  */
1829
1830 static void
1831 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1832 {
1833   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1834   rtx seq;
1835
1836   start_sequence ();
1837   expr = force_operand (expr, ivts->base_var);
1838   if (expr != ivts->base_var)
1839     emit_move_insn (ivts->base_var, expr);
1840   seq = get_insns ();
1841   end_sequence ();
1842
1843   emit_insn_before (seq, insn);
1844 }
1845
1846 /* Replace the use of induction variable described in IVTS in INSN
1847    by base variable + DELTA * step.  */
1848
1849 static void
1850 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1851 {
1852   rtx expr, *loc, seq, incr, var;
1853   enum machine_mode mode = GET_MODE (ivts->base_var);
1854   rtx src, dest, set;
1855
1856   /* Construct base + DELTA * step.  */
1857   if (!delta)
1858     expr = ivts->base_var;
1859   else
1860     {
1861       incr = simplify_gen_binary (MULT, mode,
1862                                   ivts->step, gen_int_mode (delta, mode));
1863       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1864                                   ivts->base_var, incr);
1865     }
1866
1867   /* Figure out where to do the replacement.  */
1868   loc = get_ivts_expr (single_set (insn), ivts);
1869
1870   /* If we can make the replacement right away, we're done.  */
1871   if (validate_change (insn, loc, expr, 0))
1872     return;
1873
1874   /* Otherwise, force EXPR into a register and try again.  */
1875   start_sequence ();
1876   var = gen_reg_rtx (mode);
1877   expr = force_operand (expr, var);
1878   if (expr != var)
1879     emit_move_insn (var, expr);
1880   seq = get_insns ();
1881   end_sequence ();
1882   emit_insn_before (seq, insn);
1883
1884   if (validate_change (insn, loc, var, 0))
1885     return;
1886
1887   /* The last chance.  Try recreating the assignment in insn
1888      completely from scratch.  */
1889   set = single_set (insn);
1890   gcc_assert (set);
1891
1892   start_sequence ();
1893   *loc = var;
1894   src = copy_rtx (SET_SRC (set));
1895   dest = copy_rtx (SET_DEST (set));
1896   src = force_operand (src, dest);
1897   if (src != dest)
1898     emit_move_insn (dest, src);
1899   seq = get_insns ();
1900   end_sequence ();
1901
1902   emit_insn_before (seq, insn);
1903   delete_insn (insn);
1904 }
1905
1906
1907 /* Return one expansion of the accumulator recorded in struct VE.  */
1908
1909 static rtx
1910 get_expansion (struct var_to_expand *ve)
1911 {
1912   rtx reg;
1913
1914   if (ve->reuse_expansion == 0)
1915     reg = ve->reg;
1916   else
1917     reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1918
1919   if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1920     ve->reuse_expansion = 0;
1921   else
1922     ve->reuse_expansion++;
1923
1924   return reg;
1925 }
1926
1927
1928 /* Given INSN replace the uses of the accumulator recorded in VE
1929    with a new register.  */
1930
1931 static void
1932 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1933 {
1934   rtx new_reg, set;
1935   bool really_new_expansion = false;
1936
1937   set = single_set (insn);
1938   gcc_assert (set);
1939
1940   /* Generate a new register only if the expansion limit has not been
1941      reached.  Else reuse an already existing expansion.  */
1942   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1943     {
1944       really_new_expansion = true;
1945       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1946     }
1947   else
1948     new_reg = get_expansion (ve);
1949
1950   validate_change (insn, &SET_DEST (set), new_reg, 1);
1951   validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1952
1953   if (apply_change_group ())
1954     if (really_new_expansion)
1955       {
1956         VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
1957         ve->expansion_count++;
1958       }
1959 }
1960
1961 /* Initialize the variable expansions in loop preheader.
1962    Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1963    basic block where the initialization of the expansions
1964    should take place.  */
1965
1966 static int
1967 insert_var_expansion_initialization (void **slot, void *place_p)
1968 {
1969   struct var_to_expand *ve = *slot;
1970   basic_block place = (basic_block)place_p;
1971   rtx seq, var, zero_init, insn;
1972   unsigned i;
1973
1974   if (VEC_length (rtx, ve->var_expansions) == 0)
1975     return 1;
1976
1977   start_sequence ();
1978   if (ve->op == PLUS || ve->op == MINUS)
1979     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1980       {
1981         zero_init =  CONST0_RTX (GET_MODE (var));
1982         emit_move_insn (var, zero_init);
1983       }
1984   else if (ve->op == MULT)
1985     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1986       {
1987         zero_init =  CONST1_RTX (GET_MODE (var));
1988         emit_move_insn (var, zero_init);
1989       }
1990
1991   seq = get_insns ();
1992   end_sequence ();
1993
1994   insn = BB_HEAD (place);
1995   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1996     insn = NEXT_INSN (insn);
1997
1998   emit_insn_after (seq, insn);
1999   /* Continue traversing the hash table.  */
2000   return 1;
2001 }
2002
2003 /*  Combine the variable expansions at the loop exit.
2004     Callbacks for htab_traverse.  PLACE_P is the loop exit
2005     basic block where the summation of the expansions should
2006     take place.  */
2007
2008 static int
2009 combine_var_copies_in_loop_exit (void **slot, void *place_p)
2010 {
2011   struct var_to_expand *ve = *slot;
2012   basic_block place = (basic_block)place_p;
2013   rtx sum = ve->reg;
2014   rtx expr, seq, var, insn;
2015   unsigned i;
2016
2017   if (VEC_length (rtx, ve->var_expansions) == 0)
2018     return 1;
2019
2020   start_sequence ();
2021   if (ve->op == PLUS || ve->op == MINUS)
2022     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2023       {
2024         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2025                                    var, sum);
2026       }
2027   else if (ve->op == MULT)
2028     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2029       {
2030         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2031                                    var, sum);
2032       }
2033
2034   expr = force_operand (sum, ve->reg);
2035   if (expr != ve->reg)
2036     emit_move_insn (ve->reg, expr);
2037   seq = get_insns ();
2038   end_sequence ();
2039
2040   insn = BB_HEAD (place);
2041   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2042     insn = NEXT_INSN (insn);
2043
2044   emit_insn_after (seq, insn);
2045
2046   /* Continue traversing the hash table.  */
2047   return 1;
2048 }
2049
2050 /* Apply loop optimizations in loop copies using the
2051    data which gathered during the unrolling.  Structure
2052    OPT_INFO record that data.
2053
2054    UNROLLING is true if we unrolled (not peeled) the loop.
2055    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2056    the loop (as it should happen in complete unrolling, but not in ordinary
2057    peeling of the loop).  */
2058
2059 static void
2060 apply_opt_in_copies (struct opt_info *opt_info,
2061                      unsigned n_copies, bool unrolling,
2062                      bool rewrite_original_loop)
2063 {
2064   unsigned i, delta;
2065   basic_block bb, orig_bb;
2066   rtx insn, orig_insn, next;
2067   struct iv_to_split ivts_templ, *ivts;
2068   struct var_to_expand ve_templ, *ves;
2069
2070   /* Sanity check -- we need to put initialization in the original loop
2071      body.  */
2072   gcc_assert (!unrolling || rewrite_original_loop);
2073
2074   /* Allocate the basic variables (i0).  */
2075   if (opt_info->insns_to_split)
2076     htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2077
2078   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2079     {
2080       bb = BASIC_BLOCK (i);
2081       orig_bb = get_bb_original (bb);
2082
2083       /* bb->aux holds position in copy sequence initialized by
2084          duplicate_loop_to_header_edge.  */
2085       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2086                                         unrolling);
2087       bb->aux = 0;
2088       orig_insn = BB_HEAD (orig_bb);
2089       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2090         {
2091           next = NEXT_INSN (insn);
2092           if (!INSN_P (insn))
2093             continue;
2094
2095           while (!INSN_P (orig_insn))
2096             orig_insn = NEXT_INSN (orig_insn);
2097
2098           ivts_templ.insn = orig_insn;
2099           ve_templ.insn = orig_insn;
2100
2101           /* Apply splitting iv optimization.  */
2102           if (opt_info->insns_to_split)
2103             {
2104               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2105
2106               if (ivts)
2107                 {
2108 #ifdef ENABLE_CHECKING
2109                   gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
2110 #endif
2111
2112                   if (!delta)
2113                     insert_base_initialization (ivts, insn);
2114                   split_iv (ivts, insn, delta);
2115                 }
2116             }
2117           /* Apply variable expansion optimization.  */
2118           if (unrolling && opt_info->insns_with_var_to_expand)
2119             {
2120               ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2121               if (ves)
2122                 {
2123 #ifdef ENABLE_CHECKING
2124                   gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
2125 #endif
2126                   expand_var_during_unrolling (ves, insn);
2127                 }
2128             }
2129           orig_insn = NEXT_INSN (orig_insn);
2130         }
2131     }
2132
2133   if (!rewrite_original_loop)
2134     return;
2135
2136   /* Initialize the variable expansions in the loop preheader
2137      and take care of combining them at the loop exit.  */
2138   if (opt_info->insns_with_var_to_expand)
2139     {
2140       htab_traverse (opt_info->insns_with_var_to_expand,
2141                      insert_var_expansion_initialization,
2142                      opt_info->loop_preheader);
2143       htab_traverse (opt_info->insns_with_var_to_expand,
2144                      combine_var_copies_in_loop_exit,
2145                      opt_info->loop_exit);
2146     }
2147
2148   /* Rewrite also the original loop body.  Find them as originals of the blocks
2149      in the last copied iteration, i.e. those that have
2150      get_bb_copy (get_bb_original (bb)) == bb.  */
2151   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2152     {
2153       bb = BASIC_BLOCK (i);
2154       orig_bb = get_bb_original (bb);
2155       if (get_bb_copy (orig_bb) != bb)
2156         continue;
2157
2158       delta = determine_split_iv_delta (0, n_copies, unrolling);
2159       for (orig_insn = BB_HEAD (orig_bb);
2160            orig_insn != NEXT_INSN (BB_END (bb));
2161            orig_insn = next)
2162         {
2163           next = NEXT_INSN (orig_insn);
2164
2165           if (!INSN_P (orig_insn))
2166             continue;
2167
2168           ivts_templ.insn = orig_insn;
2169           if (opt_info->insns_to_split)
2170             {
2171               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2172               if (ivts)
2173                 {
2174                   if (!delta)
2175                     insert_base_initialization (ivts, orig_insn);
2176                   split_iv (ivts, orig_insn, delta);
2177                   continue;
2178                 }
2179             }
2180
2181         }
2182     }
2183 }
2184
2185 /*  Release the data structures used for the variable expansion
2186     optimization.  Callbacks for htab_traverse.  */
2187
2188 static int
2189 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2190 {
2191   struct var_to_expand *ve = *slot;
2192
2193   VEC_free (rtx, heap, ve->var_expansions);
2194
2195   /* Continue traversing the hash table.  */
2196   return 1;
2197 }
2198
2199 /* Release OPT_INFO.  */
2200
2201 static void
2202 free_opt_info (struct opt_info *opt_info)
2203 {
2204   if (opt_info->insns_to_split)
2205     htab_delete (opt_info->insns_to_split);
2206   if (opt_info->insns_with_var_to_expand)
2207     {
2208       htab_traverse (opt_info->insns_with_var_to_expand,
2209                      release_var_copies, NULL);
2210       htab_delete (opt_info->insns_with_var_to_expand);
2211     }
2212   free (opt_info);
2213 }