gcc/loop-unroll.c

   1 /* Loop unrolling and peeling.
   2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 2, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "basic-block.h"
  29 #include "cfgloop.h"
  30 #include "cfglayout.h"
  31 #include "params.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "hashtab.h"
  35 #include "recog.h"
  36
  37 /* This pass performs loop unrolling and peeling.  We only perform these
  38    optimizations on innermost loops (with single exception) because
  39    the impact on performance is greatest here, and we want to avoid
  40    unnecessary code size growth.  The gain is caused by greater sequentiality
  41    of code, better code to optimize for further passes and in some cases
  42    by fewer testings of exit conditions.  The main problem is code growth,
  43    that impacts performance negatively due to effect of caches.
  44
  45    What we do:
  46
  47    -- complete peeling of once-rolling loops; this is the above mentioned
  48       exception, as this causes loop to be cancelled completely and
  49       does not cause code growth
  50    -- complete peeling of loops that roll (small) constant times.
  51    -- simple peeling of first iterations of loops that do not roll much
  52       (according to profile feedback)
  53    -- unrolling of loops that roll constant times; this is almost always
  54       win, as we get rid of exit condition tests.
  55    -- unrolling of loops that roll number of times that we can compute
  56       in runtime; we also get rid of exit condition tests here, but there
  57       is the extra expense for calculating the number of iterations
  58    -- simple unrolling of remaining loops; this is performed only if we
  59       are asked to, as the gain is questionable in this case and often
  60       it may even slow down the code
  61    For more detailed descriptions of each of those, see comments at
  62    appropriate function below.
  63
  64    There is a lot of parameters (defined and described in params.def) that
  65    control how much we unroll/peel.
  66
  67    ??? A great problem is that we don't have a good way how to determine
  68    how many times we should unroll the loop; the experiments I have made
  69    showed that this choice may affect performance in order of several %.
  70    */
  71
  72 /* Information about induction variables to split.  */
  73
  74 struct iv_to_split
  75 {
  76   rtx insn;             /* The insn in that the induction variable occurs.  */
  77   rtx base_var;         /* The variable on that the values in the further
  78                            iterations are based.  */
  79   rtx step;             /* Step of the induction variable.  */
  80   unsigned n_loc;
  81   unsigned loc[3];      /* Location where the definition of the induction
  82                            variable occurs in the insn.  For example if
  83                            N_LOC is 2, the expression is located at
  84                            XEXP (XEXP (single_set, loc[0]), loc[1]).  */
  85 };
  86
  87 /* Information about accumulators to expand.  */
  88
  89 struct var_to_expand
  90 {
  91   rtx insn;                        /* The insn in that the variable expansion occurs.  */
  92   rtx reg;                         /* The accumulator which is expanded.  */
  93   VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
  94   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  95                                       or multiplication.  */
  96   int expansion_count;             /* Count the number of expansions generated so far.  */
  97   int reuse_expansion;             /* The expansion we intend to reuse to expand
  98                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
  99                                       the original accumulator.  Else use
 100                                       var_expansions[REUSE_EXPANSION - 1].  */
 101 };
 102
 103 /* Information about optimization applied in
 104    the unrolled loop.  */
 105
 106 struct opt_info
 107 {
 108   htab_t insns_to_split;           /* A hashtable of insns to split.  */
 109   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
 110                                       to expand.  */
 111   unsigned first_new_block;        /* The first basic block that was
 112                                       duplicated.  */
 113   basic_block loop_exit;           /* The loop exit basic block.  */
 114   basic_block loop_preheader;      /* The loop preheader basic block.  */
 115 };
 116
 117 static void decide_unrolling_and_peeling (struct loops *, int);
 118 static void peel_loops_completely (struct loops *, int);
 119 static void decide_peel_simple (struct loop *, int);
 120 static void decide_peel_once_rolling (struct loop *, int);
 121 static void decide_peel_completely (struct loop *, int);
 122 static void decide_unroll_stupid (struct loop *, int);
 123 static void decide_unroll_constant_iterations (struct loop *, int);
 124 static void decide_unroll_runtime_iterations (struct loop *, int);
 125 static void peel_loop_simple (struct loops *, struct loop *);
 126 static void peel_loop_completely (struct loops *, struct loop *);
 127 static void unroll_loop_stupid (struct loops *, struct loop *);
 128 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
 129 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
 130 static struct opt_info *analyze_insns_in_loop (struct loop *);
 131 static void opt_info_start_duplication (struct opt_info *);
 132 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 133 static void free_opt_info (struct opt_info *);
 134 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
 135 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
 136 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
 137 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
 138 static int insert_var_expansion_initialization (void **, void *);
 139 static int combine_var_copies_in_loop_exit (void **, void *);
 140 static int release_var_copies (void **, void *);
 141 static rtx get_expansion (struct var_to_expand *);
 142
 143 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 144 void
 145 unroll_and_peel_loops (struct loops *loops, int flags)
 146 {
 147   struct loop *loop, *next;
 148   bool check;
 149
 150   /* First perform complete loop peeling (it is almost surely a win,
 151      and affects parameters for further decision a lot).  */
 152   peel_loops_completely (loops, flags);
 153
 154   /* Now decide rest of unrolling and peeling.  */
 155   decide_unrolling_and_peeling (loops, flags);
 156
 157   loop = loops->tree_root;
 158   while (loop->inner)
 159     loop = loop->inner;
 160
 161   /* Scan the loops, inner ones first.  */
 162   while (loop != loops->tree_root)
 163     {
 164       if (loop->next)
 165         {
 166           next = loop->next;
 167           while (next->inner)
 168             next = next->inner;
 169         }
 170       else
 171         next = loop->outer;
 172
 173       check = true;
 174       /* And perform the appropriate transformations.  */
 175       switch (loop->lpt_decision.decision)
 176         {
 177         case LPT_PEEL_COMPLETELY:
 178           /* Already done.  */
 179           gcc_unreachable ();
 180         case LPT_PEEL_SIMPLE:
 181           peel_loop_simple (loops, loop);
 182           break;
 183         case LPT_UNROLL_CONSTANT:
 184           unroll_loop_constant_iterations (loops, loop);
 185           break;
 186         case LPT_UNROLL_RUNTIME:
 187           unroll_loop_runtime_iterations (loops, loop);
 188           break;
 189         case LPT_UNROLL_STUPID:
 190           unroll_loop_stupid (loops, loop);
 191           break;
 192         case LPT_NONE:
 193           check = false;
 194           break;
 195         default:
 196           gcc_unreachable ();
 197         }
 198       if (check)
 199         {
 200 #ifdef ENABLE_CHECKING
 201           verify_dominators (CDI_DOMINATORS);
 202           verify_loop_structure (loops);
 203 #endif
 204         }
 205       loop = next;
 206     }
 207
 208   iv_analysis_done ();
 209 }
 210
 211 /* Check whether exit of the LOOP is at the end of loop body.  */
 212
 213 static bool
 214 loop_exit_at_end_p (struct loop *loop)
 215 {
 216   struct niter_desc *desc = get_simple_loop_desc (loop);
 217   rtx insn;
 218
 219   if (desc->in_edge->dest != loop->latch)
 220     return false;
 221
 222   /* Check that the latch is empty.  */
 223   FOR_BB_INSNS (loop->latch, insn)
 224     {
 225       if (INSN_P (insn))
 226         return false;
 227     }
 228
 229   return true;
 230 }
 231
 232 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
 233 static void
 234 peel_loops_completely (struct loops *loops, int flags)
 235 {
 236   struct loop *loop;
 237   unsigned i;
 238
 239   /* Scan the loops, the inner ones first.  */
 240   for (i = loops->num - 1; i > 0; i--)
 241     {
 242       loop = loops->parray[i];
 243       if (!loop)
 244         continue;
 245
 246       loop->lpt_decision.decision = LPT_NONE;
 247
 248       if (dump_file)
 249         fprintf (dump_file,
 250                  "\n;; *** Considering loop %d for complete peeling ***\n",
 251                  loop->num);
 252
 253       loop->ninsns = num_loop_insns (loop);
 254
 255       decide_peel_once_rolling (loop, flags);
 256       if (loop->lpt_decision.decision == LPT_NONE)
 257         decide_peel_completely (loop, flags);
 258
 259       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
 260         {
 261           peel_loop_completely (loops, loop);
 262 #ifdef ENABLE_CHECKING
 263           verify_dominators (CDI_DOMINATORS);
 264           verify_loop_structure (loops);
 265 #endif
 266         }
 267     }
 268 }
 269
 270 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
 271 static void
 272 decide_unrolling_and_peeling (struct loops *loops, int flags)
 273 {
 274   struct loop *loop = loops->tree_root, *next;
 275
 276   while (loop->inner)
 277     loop = loop->inner;
 278
 279   /* Scan the loops, inner ones first.  */
 280   while (loop != loops->tree_root)
 281     {
 282       if (loop->next)
 283         {
 284           next = loop->next;
 285           while (next->inner)
 286             next = next->inner;
 287         }
 288       else
 289         next = loop->outer;
 290
 291       loop->lpt_decision.decision = LPT_NONE;
 292
 293       if (dump_file)
 294         fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
 295
 296       /* Do not peel cold areas.  */
 297       if (!maybe_hot_bb_p (loop->header))
 298         {
 299           if (dump_file)
 300             fprintf (dump_file, ";; Not considering loop, cold area\n");
 301           loop = next;
 302           continue;
 303         }
 304
 305       /* Can the loop be manipulated?  */
 306       if (!can_duplicate_loop_p (loop))
 307         {
 308           if (dump_file)
 309             fprintf (dump_file,
 310                      ";; Not considering loop, cannot duplicate\n");
 311           loop = next;
 312           continue;
 313         }
 314
 315       /* Skip non-innermost loops.  */
 316       if (loop->inner)
 317         {
 318           if (dump_file)
 319             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 320           loop = next;
 321           continue;
 322         }
 323
 324       loop->ninsns = num_loop_insns (loop);
 325       loop->av_ninsns = average_num_loop_insns (loop);
 326
 327       /* Try transformations one by one in decreasing order of
 328          priority.  */
 329
 330       decide_unroll_constant_iterations (loop, flags);
 331       if (loop->lpt_decision.decision == LPT_NONE)
 332         decide_unroll_runtime_iterations (loop, flags);
 333       if (loop->lpt_decision.decision == LPT_NONE)
 334         decide_unroll_stupid (loop, flags);
 335       if (loop->lpt_decision.decision == LPT_NONE)
 336         decide_peel_simple (loop, flags);
 337
 338       loop = next;
 339     }
 340 }
 341
 342 /* Decide whether the LOOP is once rolling and suitable for complete
 343    peeling.  */
 344 static void
 345 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 346 {
 347   struct niter_desc *desc;
 348
 349   if (dump_file)
 350     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 351
 352   /* Is the loop small enough?  */
 353   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
 354     {
 355       if (dump_file)
 356         fprintf (dump_file, ";; Not considering loop, is too big\n");
 357       return;
 358     }
 359
 360   /* Check for simple loops.  */
 361   desc = get_simple_loop_desc (loop);
 362
 363   /* Check number of iterations.  */
 364   if (!desc->simple_p
 365       || desc->assumptions
 366       || desc->infinite
 367       || !desc->const_iter
 368       || desc->niter != 0)
 369     {
 370       if (dump_file)
 371         fprintf (dump_file,
 372                  ";; Unable to prove that the loop rolls exactly once\n");
 373       return;
 374     }
 375
 376   /* Success.  */
 377   if (dump_file)
 378     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
 379   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 380 }
 381
 382 /* Decide whether the LOOP is suitable for complete peeling.  */
 383 static void
 384 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 385 {
 386   unsigned npeel;
 387   struct niter_desc *desc;
 388
 389   if (dump_file)
 390     fprintf (dump_file, "\n;; Considering peeling completely\n");
 391
 392   /* Skip non-innermost loops.  */
 393   if (loop->inner)
 394     {
 395       if (dump_file)
 396         fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 397       return;
 398     }
 399
 400   /* Do not peel cold areas.  */
 401   if (!maybe_hot_bb_p (loop->header))
 402     {
 403       if (dump_file)
 404         fprintf (dump_file, ";; Not considering loop, cold area\n");
 405       return;
 406     }
 407
 408   /* Can the loop be manipulated?  */
 409   if (!can_duplicate_loop_p (loop))
 410     {
 411       if (dump_file)
 412         fprintf (dump_file,
 413                  ";; Not considering loop, cannot duplicate\n");
 414       return;
 415     }
 416
 417   /* npeel = number of iterations to peel.  */
 418   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
 419   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 420     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 421
 422   /* Is the loop small enough?  */
 423   if (!npeel)
 424     {
 425       if (dump_file)
 426         fprintf (dump_file, ";; Not considering loop, is too big\n");
 427       return;
 428     }
 429
 430   /* Check for simple loops.  */
 431   desc = get_simple_loop_desc (loop);
 432
 433   /* Check number of iterations.  */
 434   if (!desc->simple_p
 435       || desc->assumptions
 436       || !desc->const_iter
 437       || desc->infinite)
 438     {
 439       if (dump_file)
 440         fprintf (dump_file,
 441                  ";; Unable to prove that the loop iterates constant times\n");
 442       return;
 443     }
 444
 445   if (desc->niter > npeel - 1)
 446     {
 447       if (dump_file)
 448         {
 449           fprintf (dump_file,
 450                    ";; Not peeling loop completely, rolls too much (");
 451           fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
 452           fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
 453         }
 454       return;
 455     }
 456
 457   /* Success.  */
 458   if (dump_file)
 459     fprintf (dump_file, ";; Decided to peel loop completely\n");
 460   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 461 }
 462
 463 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
 464    completely.  The transformation done:
 465
 466    for (i = 0; i < 4; i++)
 467      body;
 468
 469    ==>
 470
 471    i = 0;
 472    body; i++;
 473    body; i++;
 474    body; i++;
 475    body; i++;
 476    */
 477 static void
 478 peel_loop_completely (struct loops *loops, struct loop *loop)
 479 {
 480   sbitmap wont_exit;
 481   unsigned HOST_WIDE_INT npeel;
 482   unsigned n_remove_edges, i;
 483   edge *remove_edges, ein;
 484   struct niter_desc *desc = get_simple_loop_desc (loop);
 485   struct opt_info *opt_info = NULL;
 486
 487   npeel = desc->niter;
 488
 489   if (npeel)
 490     {
 491       bool ok;
 492
 493       wont_exit = sbitmap_alloc (npeel + 1);
 494       sbitmap_ones (wont_exit);
 495       RESET_BIT (wont_exit, 0);
 496       if (desc->noloop_assumptions)
 497         RESET_BIT (wont_exit, 1);
 498
 499       remove_edges = XCNEWVEC (edge, npeel);
 500       n_remove_edges = 0;
 501
 502       if (flag_split_ivs_in_unroller)
 503         opt_info = analyze_insns_in_loop (loop);
 504
 505       opt_info_start_duplication (opt_info);
 506       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 507                                           loops, npeel,
 508                                           wont_exit, desc->out_edge,
 509                                           remove_edges, &n_remove_edges,
 510                                           DLTHE_FLAG_UPDATE_FREQ
 511                                           | DLTHE_FLAG_COMPLETTE_PEEL
 512                                           | (opt_info
 513                                              ? DLTHE_RECORD_COPY_NUMBER : 0));
 514       gcc_assert (ok);
 515
 516       free (wont_exit);
 517
 518       if (opt_info)
 519         {
 520           apply_opt_in_copies (opt_info, npeel, false, true);
 521           free_opt_info (opt_info);
 522         }
 523
 524       /* Remove the exit edges.  */
 525       for (i = 0; i < n_remove_edges; i++)
 526         remove_path (loops, remove_edges[i]);
 527       free (remove_edges);
 528     }
 529
 530   ein = desc->in_edge;
 531   free_simple_loop_desc (loop);
 532
 533   /* Now remove the unreachable part of the last iteration and cancel
 534      the loop.  */
 535   remove_path (loops, ein);
 536
 537   if (dump_file)
 538     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
 539 }
 540
 541 /* Decide whether to unroll LOOP iterating constant number of times
 542    and how much.  */
 543
 544 static void
 545 decide_unroll_constant_iterations (struct loop *loop, int flags)
 546 {
 547   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 548   struct niter_desc *desc;
 549
 550   if (!(flags & UAP_UNROLL))
 551     {
 552       /* We were not asked to, just return back silently.  */
 553       return;
 554     }
 555
 556   if (dump_file)
 557     fprintf (dump_file,
 558              "\n;; Considering unrolling loop with constant "
 559              "number of iterations\n");
 560
 561   /* nunroll = total number of copies of the original loop body in
 562      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 563   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 564   nunroll_by_av
 565     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 566   if (nunroll > nunroll_by_av)
 567     nunroll = nunroll_by_av;
 568   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 569     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 570
 571   /* Skip big loops.  */
 572   if (nunroll <= 1)
 573     {
 574       if (dump_file)
 575         fprintf (dump_file, ";; Not considering loop, is too big\n");
 576       return;
 577     }
 578
 579   /* Check for simple loops.  */
 580   desc = get_simple_loop_desc (loop);
 581
 582   /* Check number of iterations.  */
 583   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 584     {
 585       if (dump_file)
 586         fprintf (dump_file,
 587                  ";; Unable to prove that the loop iterates constant times\n");
 588       return;
 589     }
 590
 591   /* Check whether the loop rolls enough to consider.  */
 592   if (desc->niter < 2 * nunroll)
 593     {
 594       if (dump_file)
 595         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 596       return;
 597     }
 598
 599   /* Success; now compute number of iterations to unroll.  We alter
 600      nunroll so that as few as possible copies of loop body are
 601      necessary, while still not decreasing the number of unrollings
 602      too much (at most by 1).  */
 603   best_copies = 2 * nunroll + 10;
 604
 605   i = 2 * nunroll + 2;
 606   if (i - 1 >= desc->niter)
 607     i = desc->niter - 2;
 608
 609   for (; i >= nunroll - 1; i--)
 610     {
 611       unsigned exit_mod = desc->niter % (i + 1);
 612
 613       if (!loop_exit_at_end_p (loop))
 614         n_copies = exit_mod + i + 1;
 615       else if (exit_mod != (unsigned) i
 616                || desc->noloop_assumptions != NULL_RTX)
 617         n_copies = exit_mod + i + 2;
 618       else
 619         n_copies = i + 1;
 620
 621       if (n_copies < best_copies)
 622         {
 623           best_copies = n_copies;
 624           best_unroll = i;
 625         }
 626     }
 627
 628   if (dump_file)
 629     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
 630              best_unroll + 1, best_copies, nunroll);
 631
 632   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 633   loop->lpt_decision.times = best_unroll;
 634
 635   if (dump_file)
 636     fprintf (dump_file,
 637              ";; Decided to unroll the constant times rolling loop, %d times.\n",
 638              loop->lpt_decision.times);
 639 }
 640
 641 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
 642    times.  The transformation does this:
 643
 644    for (i = 0; i < 102; i++)
 645      body;
 646
 647    ==>
 648
 649    i = 0;
 650    body; i++;
 651    body; i++;
 652    while (i < 102)
 653      {
 654        body; i++;
 655        body; i++;
 656        body; i++;
 657        body; i++;
 658      }
 659   */
 660 static void
 661 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
 662 {
 663   unsigned HOST_WIDE_INT niter;
 664   unsigned exit_mod;
 665   sbitmap wont_exit;
 666   unsigned n_remove_edges, i;
 667   edge *remove_edges;
 668   unsigned max_unroll = loop->lpt_decision.times;
 669   struct niter_desc *desc = get_simple_loop_desc (loop);
 670   bool exit_at_end = loop_exit_at_end_p (loop);
 671   struct opt_info *opt_info = NULL;
 672   bool ok;
 673
 674   niter = desc->niter;
 675
 676   /* Should not get here (such loop should be peeled instead).  */
 677   gcc_assert (niter > max_unroll + 1);
 678
 679   exit_mod = niter % (max_unroll + 1);
 680
 681   wont_exit = sbitmap_alloc (max_unroll + 1);
 682   sbitmap_ones (wont_exit);
 683
 684   remove_edges = XCNEWVEC (edge, max_unroll + exit_mod + 1);
 685   n_remove_edges = 0;
 686   if (flag_split_ivs_in_unroller
 687       || flag_variable_expansion_in_unroller)
 688     opt_info = analyze_insns_in_loop (loop);
 689
 690   if (!exit_at_end)
 691     {
 692       /* The exit is not at the end of the loop; leave exit test
 693          in the first copy, so that the loops that start with test
 694          of exit condition have continuous body after unrolling.  */
 695
 696       if (dump_file)
 697         fprintf (dump_file, ";; Condition on beginning of loop.\n");
 698
 699       /* Peel exit_mod iterations.  */
 700       RESET_BIT (wont_exit, 0);
 701       if (desc->noloop_assumptions)
 702         RESET_BIT (wont_exit, 1);
 703
 704       if (exit_mod)
 705         {
 706           opt_info_start_duplication (opt_info);
 707           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 708                                               loops, exit_mod,
 709                                               wont_exit, desc->out_edge,
 710                                               remove_edges, &n_remove_edges,
 711                                               DLTHE_FLAG_UPDATE_FREQ
 712                                               | (opt_info && exit_mod > 1
 713                                                  ? DLTHE_RECORD_COPY_NUMBER
 714                                                    : 0));
 715           gcc_assert (ok);
 716
 717           if (opt_info && exit_mod > 1)
 718             apply_opt_in_copies (opt_info, exit_mod, false, false);
 719
 720           desc->noloop_assumptions = NULL_RTX;
 721           desc->niter -= exit_mod;
 722           desc->niter_max -= exit_mod;
 723         }
 724
 725       SET_BIT (wont_exit, 1);
 726     }
 727   else
 728     {
 729       /* Leave exit test in last copy, for the same reason as above if
 730          the loop tests the condition at the end of loop body.  */
 731
 732       if (dump_file)
 733         fprintf (dump_file, ";; Condition on end of loop.\n");
 734
 735       /* We know that niter >= max_unroll + 2; so we do not need to care of
 736          case when we would exit before reaching the loop.  So just peel
 737          exit_mod + 1 iterations.  */
 738       if (exit_mod != max_unroll
 739           || desc->noloop_assumptions)
 740         {
 741           RESET_BIT (wont_exit, 0);
 742           if (desc->noloop_assumptions)
 743             RESET_BIT (wont_exit, 1);
 744
 745           opt_info_start_duplication (opt_info);
 746           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 747                                               loops, exit_mod + 1,
 748                                               wont_exit, desc->out_edge,
 749                                               remove_edges, &n_remove_edges,
 750                                               DLTHE_FLAG_UPDATE_FREQ
 751                                               | (opt_info && exit_mod > 0
 752                                                  ? DLTHE_RECORD_COPY_NUMBER
 753                                                    : 0));
 754           gcc_assert (ok);
 755
 756           if (opt_info && exit_mod > 0)
 757             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 758
 759           desc->niter -= exit_mod + 1;
 760           desc->niter_max -= exit_mod + 1;
 761           desc->noloop_assumptions = NULL_RTX;
 762
 763           SET_BIT (wont_exit, 0);
 764           SET_BIT (wont_exit, 1);
 765         }
 766
 767       RESET_BIT (wont_exit, max_unroll);
 768     }
 769
 770   /* Now unroll the loop.  */
 771
 772   opt_info_start_duplication (opt_info);
 773   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 774                                       loops, max_unroll,
 775                                       wont_exit, desc->out_edge,
 776                                       remove_edges, &n_remove_edges,
 777                                       DLTHE_FLAG_UPDATE_FREQ
 778                                       | (opt_info
 779                                          ? DLTHE_RECORD_COPY_NUMBER
 780                                            : 0));
 781   gcc_assert (ok);
 782
 783   if (opt_info)
 784     {
 785       apply_opt_in_copies (opt_info, max_unroll, true, true);
 786       free_opt_info (opt_info);
 787     }
 788
 789   free (wont_exit);
 790
 791   if (exit_at_end)
 792     {
 793       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 794       /* Find a new in and out edge; they are in the last copy we have made.  */
 795
 796       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 797         {
 798           desc->out_edge = EDGE_SUCC (exit_block, 0);
 799           desc->in_edge = EDGE_SUCC (exit_block, 1);
 800         }
 801       else
 802         {
 803           desc->out_edge = EDGE_SUCC (exit_block, 1);
 804           desc->in_edge = EDGE_SUCC (exit_block, 0);
 805         }
 806     }
 807
 808   desc->niter /= max_unroll + 1;
 809   desc->niter_max /= max_unroll + 1;
 810   desc->niter_expr = GEN_INT (desc->niter);
 811
 812   /* Remove the edges.  */
 813   for (i = 0; i < n_remove_edges; i++)
 814     remove_path (loops, remove_edges[i]);
 815   free (remove_edges);
 816
 817   if (dump_file)
 818     fprintf (dump_file,
 819              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 820              max_unroll, num_loop_insns (loop));
 821 }
 822
 823 /* Decide whether to unroll LOOP iterating runtime computable number of times
 824    and how much.  */
 825 static void
 826 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 827 {
 828   unsigned nunroll, nunroll_by_av, i;
 829   struct niter_desc *desc;
 830
 831   if (!(flags & UAP_UNROLL))
 832     {
 833       /* We were not asked to, just return back silently.  */
 834       return;
 835     }
 836
 837   if (dump_file)
 838     fprintf (dump_file,
 839              "\n;; Considering unrolling loop with runtime "
 840              "computable number of iterations\n");
 841
 842   /* nunroll = total number of copies of the original loop body in
 843      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 844   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 845   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 846   if (nunroll > nunroll_by_av)
 847     nunroll = nunroll_by_av;
 848   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 849     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 850
 851   /* Skip big loops.  */
 852   if (nunroll <= 1)
 853     {
 854       if (dump_file)
 855         fprintf (dump_file, ";; Not considering loop, is too big\n");
 856       return;
 857     }
 858
 859   /* Check for simple loops.  */
 860   desc = get_simple_loop_desc (loop);
 861
 862   /* Check simpleness.  */
 863   if (!desc->simple_p || desc->assumptions)
 864     {
 865       if (dump_file)
 866         fprintf (dump_file,
 867                  ";; Unable to prove that the number of iterations "
 868                  "can be counted in runtime\n");
 869       return;
 870     }
 871
 872   if (desc->const_iter)
 873     {
 874       if (dump_file)
 875         fprintf (dump_file, ";; Loop iterates constant times\n");
 876       return;
 877     }
 878
 879   /* If we have profile feedback, check whether the loop rolls.  */
 880   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
 881     {
 882       if (dump_file)
 883         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 884       return;
 885     }
 886
 887   /* Success; now force nunroll to be power of 2, as we are unable to
 888      cope with overflows in computation of number of iterations.  */
 889   for (i = 1; 2 * i <= nunroll; i *= 2)
 890     continue;
 891
 892   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 893   loop->lpt_decision.times = i - 1;
 894
 895   if (dump_file)
 896     fprintf (dump_file,
 897              ";; Decided to unroll the runtime computable "
 898              "times rolling loop, %d times.\n",
 899              loop->lpt_decision.times);
 900 }
 901
 902 /* Splits edge E and inserts INSNS on it.  */
 903
 904 basic_block
 905 split_edge_and_insert (edge e, rtx insns)
 906 {
 907   basic_block bb = split_edge (e);
 908   gcc_assert (insns != NULL_RTX);
 909   emit_insn_after (insns, BB_END (bb));
 910   bb->flags |= BB_SUPERBLOCK;
 911   return bb;
 912 }
 913
 914 /* Unroll LOOP for that we are able to count number of iterations in runtime
 915    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
 916    extra care for case n < 0):
 917
 918    for (i = 0; i < n; i++)
 919      body;
 920
 921    ==>
 922
 923    i = 0;
 924    mod = n % 4;
 925
 926    switch (mod)
 927      {
 928        case 3:
 929          body; i++;
 930        case 2:
 931          body; i++;
 932        case 1:
 933          body; i++;
 934        case 0: ;
 935      }
 936
 937    while (i < n)
 938      {
 939        body; i++;
 940        body; i++;
 941        body; i++;
 942        body; i++;
 943      }
 944    */
 945 static void
 946 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
 947 {
 948   rtx old_niter, niter, init_code, branch_code, tmp;
 949   unsigned i, j, p;
 950   basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
 951   unsigned n_dom_bbs;
 952   sbitmap wont_exit;
 953   int may_exit_copy;
 954   unsigned n_peel, n_remove_edges;
 955   edge *remove_edges, e;
 956   bool extra_zero_check, last_may_exit;
 957   unsigned max_unroll = loop->lpt_decision.times;
 958   struct niter_desc *desc = get_simple_loop_desc (loop);
 959   bool exit_at_end = loop_exit_at_end_p (loop);
 960   struct opt_info *opt_info = NULL;
 961   bool ok;
 962
 963   if (flag_split_ivs_in_unroller
 964       || flag_variable_expansion_in_unroller)
 965     opt_info = analyze_insns_in_loop (loop);
 966
 967   /* Remember blocks whose dominators will have to be updated.  */
 968   dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
 969   n_dom_bbs = 0;
 970
 971   body = get_loop_body (loop);
 972   for (i = 0; i < loop->num_nodes; i++)
 973     {
 974       unsigned nldom;
 975       basic_block *ldom;
 976
 977       nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
 978       for (j = 0; j < nldom; j++)
 979         if (!flow_bb_inside_loop_p (loop, ldom[j]))
 980           dom_bbs[n_dom_bbs++] = ldom[j];
 981
 982       free (ldom);
 983     }
 984   free (body);
 985
 986   if (!exit_at_end)
 987     {
 988       /* Leave exit in first copy (for explanation why see comment in
 989          unroll_loop_constant_iterations).  */
 990       may_exit_copy = 0;
 991       n_peel = max_unroll - 1;
 992       extra_zero_check = true;
 993       last_may_exit = false;
 994     }
 995   else
 996     {
 997       /* Leave exit in last copy (for explanation why see comment in
 998          unroll_loop_constant_iterations).  */
 999       may_exit_copy = max_unroll;
1000       n_peel = max_unroll;
1001       extra_zero_check = false;
1002       last_may_exit = true;
1003     }
1004
1005   /* Get expression for number of iterations.  */
1006   start_sequence ();
1007   old_niter = niter = gen_reg_rtx (desc->mode);
1008   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
1009   if (tmp != niter)
1010     emit_move_insn (niter, tmp);
1011
1012   /* Count modulo by ANDing it with max_unroll; we use the fact that
1013      the number of unrollings is a power of two, and thus this is correct
1014      even if there is overflow in the computation.  */
1015   niter = expand_simple_binop (desc->mode, AND,
1016                                niter,
1017                                GEN_INT (max_unroll),
1018                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
1019
1020   init_code = get_insns ();
1021   end_sequence ();
1022
1023   /* Precondition the loop.  */
1024   split_edge_and_insert (loop_preheader_edge (loop), init_code);
1025
1026   remove_edges = XCNEWVEC (edge, max_unroll + n_peel + 1);
1027   n_remove_edges = 0;
1028
1029   wont_exit = sbitmap_alloc (max_unroll + 2);
1030
1031   /* Peel the first copy of loop body (almost always we must leave exit test
1032      here; the only exception is when we have extra zero check and the number
1033      of iterations is reliable.  Also record the place of (possible) extra
1034      zero check.  */
1035   sbitmap_zero (wont_exit);
1036   if (extra_zero_check
1037       && !desc->noloop_assumptions)
1038     SET_BIT (wont_exit, 1);
1039   ezc_swtch = loop_preheader_edge (loop)->src;
1040   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1041                                       loops, 1,
1042                                       wont_exit, desc->out_edge,
1043                                       remove_edges, &n_remove_edges,
1044                                       DLTHE_FLAG_UPDATE_FREQ);
1045   gcc_assert (ok);
1046
1047   /* Record the place where switch will be built for preconditioning.  */
1048   swtch = split_edge (loop_preheader_edge (loop));
1049
1050   for (i = 0; i < n_peel; i++)
1051     {
1052       /* Peel the copy.  */
1053       sbitmap_zero (wont_exit);
1054       if (i != n_peel - 1 || !last_may_exit)
1055         SET_BIT (wont_exit, 1);
1056       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1057                                           loops, 1,
1058                                           wont_exit, desc->out_edge,
1059                                           remove_edges, &n_remove_edges,
1060                                           DLTHE_FLAG_UPDATE_FREQ);
1061       gcc_assert (ok);
1062
1063       /* Create item for switch.  */
1064       j = n_peel - i - (extra_zero_check ? 0 : 1);
1065       p = REG_BR_PROB_BASE / (i + 2);
1066
1067       preheader = split_edge (loop_preheader_edge (loop));
1068       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1069                                           block_label (preheader), p,
1070                                           NULL_RTX);
1071
1072       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1073       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1074       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1075       e = make_edge (swtch, preheader,
1076                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1077       e->probability = p;
1078     }
1079
1080   if (extra_zero_check)
1081     {
1082       /* Add branch for zero iterations.  */
1083       p = REG_BR_PROB_BASE / (max_unroll + 1);
1084       swtch = ezc_swtch;
1085       preheader = split_edge (loop_preheader_edge (loop));
1086       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1087                                           block_label (preheader), p,
1088                                           NULL_RTX);
1089
1090       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1091       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1092       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1093       e = make_edge (swtch, preheader,
1094                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1095       e->probability = p;
1096     }
1097
1098   /* Recount dominators for outer blocks.  */
1099   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1100
1101   /* And unroll loop.  */
1102
1103   sbitmap_ones (wont_exit);
1104   RESET_BIT (wont_exit, may_exit_copy);
1105   opt_info_start_duplication (opt_info);
1106
1107   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1108                                       loops, max_unroll,
1109                                       wont_exit, desc->out_edge,
1110                                       remove_edges, &n_remove_edges,
1111                                       DLTHE_FLAG_UPDATE_FREQ
1112                                       | (opt_info
1113                                          ? DLTHE_RECORD_COPY_NUMBER
1114                                            : 0));
1115   gcc_assert (ok);
1116
1117   if (opt_info)
1118     {
1119       apply_opt_in_copies (opt_info, max_unroll, true, true);
1120       free_opt_info (opt_info);
1121     }
1122
1123   free (wont_exit);
1124
1125   if (exit_at_end)
1126     {
1127       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1128       /* Find a new in and out edge; they are in the last copy we have
1129          made.  */
1130
1131       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1132         {
1133           desc->out_edge = EDGE_SUCC (exit_block, 0);
1134           desc->in_edge = EDGE_SUCC (exit_block, 1);
1135         }
1136       else
1137         {
1138           desc->out_edge = EDGE_SUCC (exit_block, 1);
1139           desc->in_edge = EDGE_SUCC (exit_block, 0);
1140         }
1141     }
1142
1143   /* Remove the edges.  */
1144   for (i = 0; i < n_remove_edges; i++)
1145     remove_path (loops, remove_edges[i]);
1146   free (remove_edges);
1147
1148   /* We must be careful when updating the number of iterations due to
1149      preconditioning and the fact that the value must be valid at entry
1150      of the loop.  After passing through the above code, we see that
1151      the correct new number of iterations is this:  */
1152   gcc_assert (!desc->const_iter);
1153   desc->niter_expr =
1154     simplify_gen_binary (UDIV, desc->mode, old_niter,
1155                          GEN_INT (max_unroll + 1));
1156   desc->niter_max /= max_unroll + 1;
1157   if (exit_at_end)
1158     {
1159       desc->niter_expr =
1160         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1161       desc->noloop_assumptions = NULL_RTX;
1162       desc->niter_max--;
1163     }
1164
1165   if (dump_file)
1166     fprintf (dump_file,
1167              ";; Unrolled loop %d times, counting # of iterations "
1168              "in runtime, %i insns\n",
1169              max_unroll, num_loop_insns (loop));
1170
1171   if (dom_bbs)
1172     free (dom_bbs);
1173 }
1174
1175 /* Decide whether to simply peel LOOP and how much.  */
1176 static void
1177 decide_peel_simple (struct loop *loop, int flags)
1178 {
1179   unsigned npeel;
1180   struct niter_desc *desc;
1181
1182   if (!(flags & UAP_PEEL))
1183     {
1184       /* We were not asked to, just return back silently.  */
1185       return;
1186     }
1187
1188   if (dump_file)
1189     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1190
1191   /* npeel = number of iterations to peel.  */
1192   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1193   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1194     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1195
1196   /* Skip big loops.  */
1197   if (!npeel)
1198     {
1199       if (dump_file)
1200         fprintf (dump_file, ";; Not considering loop, is too big\n");
1201       return;
1202     }
1203
1204   /* Check for simple loops.  */
1205   desc = get_simple_loop_desc (loop);
1206
1207   /* Check number of iterations.  */
1208   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1209     {
1210       if (dump_file)
1211         fprintf (dump_file, ";; Loop iterates constant times\n");
1212       return;
1213     }
1214
1215   /* Do not simply peel loops with branches inside -- it increases number
1216      of mispredicts.  */
1217   if (num_loop_branches (loop) > 1)
1218     {
1219       if (dump_file)
1220         fprintf (dump_file, ";; Not peeling, contains branches\n");
1221       return;
1222     }
1223
1224   if (loop->header->count)
1225     {
1226       unsigned niter = expected_loop_iterations (loop);
1227       if (niter + 1 > npeel)
1228         {
1229           if (dump_file)
1230             {
1231               fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1232               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1233                        (HOST_WIDEST_INT) (niter + 1));
1234               fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1235                        npeel);
1236             }
1237           return;
1238         }
1239       npeel = niter + 1;
1240     }
1241   else
1242     {
1243       /* For now we have no good heuristics to decide whether loop peeling
1244          will be effective, so disable it.  */
1245       if (dump_file)
1246         fprintf (dump_file,
1247                  ";; Not peeling loop, no evidence it will be profitable\n");
1248       return;
1249     }
1250
1251   /* Success.  */
1252   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1253   loop->lpt_decision.times = npeel;
1254
1255   if (dump_file)
1256     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1257              loop->lpt_decision.times);
1258 }
1259
1260 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1261    while (cond)
1262      body;
1263
1264    ==>
1265
1266    if (!cond) goto end;
1267    body;
1268    if (!cond) goto end;
1269    body;
1270    while (cond)
1271      body;
1272    end: ;
1273    */
1274 static void
1275 peel_loop_simple (struct loops *loops, struct loop *loop)
1276 {
1277   sbitmap wont_exit;
1278   unsigned npeel = loop->lpt_decision.times;
1279   struct niter_desc *desc = get_simple_loop_desc (loop);
1280   struct opt_info *opt_info = NULL;
1281   bool ok;
1282
1283   if (flag_split_ivs_in_unroller && npeel > 1)
1284     opt_info = analyze_insns_in_loop (loop);
1285
1286   wont_exit = sbitmap_alloc (npeel + 1);
1287   sbitmap_zero (wont_exit);
1288
1289   opt_info_start_duplication (opt_info);
1290
1291   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1292                                       loops, npeel, wont_exit,
1293                                       NULL, NULL,
1294                                       NULL, DLTHE_FLAG_UPDATE_FREQ
1295                                       | (opt_info
1296                                          ? DLTHE_RECORD_COPY_NUMBER
1297                                            : 0));
1298   gcc_assert (ok);
1299
1300   free (wont_exit);
1301
1302   if (opt_info)
1303     {
1304       apply_opt_in_copies (opt_info, npeel, false, false);
1305       free_opt_info (opt_info);
1306     }
1307
1308   if (desc->simple_p)
1309     {
1310       if (desc->const_iter)
1311         {
1312           desc->niter -= npeel;
1313           desc->niter_expr = GEN_INT (desc->niter);
1314           desc->noloop_assumptions = NULL_RTX;
1315         }
1316       else
1317         {
1318           /* We cannot just update niter_expr, as its value might be clobbered
1319              inside loop.  We could handle this by counting the number into
1320              temporary just like we do in runtime unrolling, but it does not
1321              seem worthwhile.  */
1322           free_simple_loop_desc (loop);
1323         }
1324     }
1325   if (dump_file)
1326     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1327 }
1328
1329 /* Decide whether to unroll LOOP stupidly and how much.  */
1330 static void
1331 decide_unroll_stupid (struct loop *loop, int flags)
1332 {
1333   unsigned nunroll, nunroll_by_av, i;
1334   struct niter_desc *desc;
1335
1336   if (!(flags & UAP_UNROLL_ALL))
1337     {
1338       /* We were not asked to, just return back silently.  */
1339       return;
1340     }
1341
1342   if (dump_file)
1343     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1344
1345   /* nunroll = total number of copies of the original loop body in
1346      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1347   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1348   nunroll_by_av
1349     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1350   if (nunroll > nunroll_by_av)
1351     nunroll = nunroll_by_av;
1352   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1353     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1354
1355   /* Skip big loops.  */
1356   if (nunroll <= 1)
1357     {
1358       if (dump_file)
1359         fprintf (dump_file, ";; Not considering loop, is too big\n");
1360       return;
1361     }
1362
1363   /* Check for simple loops.  */
1364   desc = get_simple_loop_desc (loop);
1365
1366   /* Check simpleness.  */
1367   if (desc->simple_p && !desc->assumptions)
1368     {
1369       if (dump_file)
1370         fprintf (dump_file, ";; The loop is simple\n");
1371       return;
1372     }
1373
1374   /* Do not unroll loops with branches inside -- it increases number
1375      of mispredicts.  */
1376   if (num_loop_branches (loop) > 1)
1377     {
1378       if (dump_file)
1379         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1380       return;
1381     }
1382
1383   /* If we have profile feedback, check whether the loop rolls.  */
1384   if (loop->header->count
1385       && expected_loop_iterations (loop) < 2 * nunroll)
1386     {
1387       if (dump_file)
1388         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1389       return;
1390     }
1391
1392   /* Success.  Now force nunroll to be power of 2, as it seems that this
1393      improves results (partially because of better alignments, partially
1394      because of some dark magic).  */
1395   for (i = 1; 2 * i <= nunroll; i *= 2)
1396     continue;
1397
1398   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1399   loop->lpt_decision.times = i - 1;
1400
1401   if (dump_file)
1402     fprintf (dump_file,
1403              ";; Decided to unroll the loop stupidly, %d times.\n",
1404              loop->lpt_decision.times);
1405 }
1406
1407 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1408    while (cond)
1409      body;
1410
1411    ==>
1412
1413    while (cond)
1414      {
1415        body;
1416        if (!cond) break;
1417        body;
1418        if (!cond) break;
1419        body;
1420        if (!cond) break;
1421        body;
1422      }
1423    */
1424 static void
1425 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1426 {
1427   sbitmap wont_exit;
1428   unsigned nunroll = loop->lpt_decision.times;
1429   struct niter_desc *desc = get_simple_loop_desc (loop);
1430   struct opt_info *opt_info = NULL;
1431   bool ok;
1432
1433   if (flag_split_ivs_in_unroller
1434       || flag_variable_expansion_in_unroller)
1435     opt_info = analyze_insns_in_loop (loop);
1436
1437
1438   wont_exit = sbitmap_alloc (nunroll + 1);
1439   sbitmap_zero (wont_exit);
1440   opt_info_start_duplication (opt_info);
1441
1442   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1443                                       loops, nunroll, wont_exit,
1444                                       NULL, NULL, NULL,
1445                                       DLTHE_FLAG_UPDATE_FREQ
1446                                       | (opt_info
1447                                          ? DLTHE_RECORD_COPY_NUMBER
1448                                            : 0));
1449   gcc_assert (ok);
1450
1451   if (opt_info)
1452     {
1453       apply_opt_in_copies (opt_info, nunroll, true, true);
1454       free_opt_info (opt_info);
1455     }
1456
1457   free (wont_exit);
1458
1459   if (desc->simple_p)
1460     {
1461       /* We indeed may get here provided that there are nontrivial assumptions
1462          for a loop to be really simple.  We could update the counts, but the
1463          problem is that we are unable to decide which exit will be taken
1464          (not really true in case the number of iterations is constant,
1465          but noone will do anything with this information, so we do not
1466          worry about it).  */
1467       desc->simple_p = false;
1468     }
1469
1470   if (dump_file)
1471     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1472              nunroll, num_loop_insns (loop));
1473 }
1474
1475 /* A hash function for information about insns to split.  */
1476
1477 static hashval_t
1478 si_info_hash (const void *ivts)
1479 {
1480   return (hashval_t) INSN_UID (((struct iv_to_split *) ivts)->insn);
1481 }
1482
1483 /* An equality functions for information about insns to split.  */
1484
1485 static int
1486 si_info_eq (const void *ivts1, const void *ivts2)
1487 {
1488   const struct iv_to_split *i1 = ivts1;
1489   const struct iv_to_split *i2 = ivts2;
1490
1491   return i1->insn == i2->insn;
1492 }
1493
1494 /* Return a hash for VES, which is really a "var_to_expand *".  */
1495
1496 static hashval_t
1497 ve_info_hash (const void *ves)
1498 {
1499   return (hashval_t) INSN_UID (((struct var_to_expand *) ves)->insn);
1500 }
1501
1502 /* Return true if IVTS1 and IVTS2 (which are really both of type
1503    "var_to_expand *") refer to the same instruction.  */
1504
1505 static int
1506 ve_info_eq (const void *ivts1, const void *ivts2)
1507 {
1508   const struct var_to_expand *i1 = ivts1;
1509   const struct var_to_expand *i2 = ivts2;
1510
1511   return i1->insn == i2->insn;
1512 }
1513
1514 /* Returns true if REG is referenced in one insn in LOOP.  */
1515
1516 bool
1517 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1518 {
1519   basic_block *body, bb;
1520   unsigned i;
1521   int count_ref = 0;
1522   rtx insn;
1523
1524   body = get_loop_body (loop);
1525   for (i = 0; i < loop->num_nodes; i++)
1526     {
1527       bb = body[i];
1528
1529       FOR_BB_INSNS (bb, insn)
1530       {
1531         if (rtx_referenced_p (reg, insn))
1532           count_ref++;
1533       }
1534     }
1535   return (count_ref  == 1);
1536 }
1537
1538 /* Determine whether INSN contains an accumulator
1539    which can be expanded into separate copies,
1540    one for each copy of the LOOP body.
1541
1542    for (i = 0 ; i < n; i++)
1543      sum += a[i];
1544
1545    ==>
1546
1547    sum += a[i]
1548    ....
1549    i = i+1;
1550    sum1 += a[i]
1551    ....
1552    i = i+1
1553    sum2 += a[i];
1554    ....
1555
1556    Return NULL if INSN contains no opportunity for expansion of accumulator.
1557    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1558    information and return a pointer to it.
1559 */
1560
1561 static struct var_to_expand *
1562 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1563 {
1564   rtx set, dest, src, op1;
1565   struct var_to_expand *ves;
1566   enum machine_mode mode1, mode2;
1567
1568   set = single_set (insn);
1569   if (!set)
1570     return NULL;
1571
1572   dest = SET_DEST (set);
1573   src = SET_SRC (set);
1574
1575   if (GET_CODE (src) != PLUS
1576       && GET_CODE (src) != MINUS
1577       && GET_CODE (src) != MULT)
1578     return NULL;
1579
1580   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1581      in MD.  But if there is no optab to generate the insn, we can not
1582      perform the variable expansion.  This can happen if an MD provides
1583      an insn but not a named pattern to generate it, for example to avoid
1584      producing code that needs additional mode switches like for x87/mmx.
1585
1586      So we check have_insn_for which looks for an optab for the operation
1587      in SRC.  If it doesn't exist, we can't perform the expansion even
1588      though INSN is valid.  */
1589   if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
1590     return NULL;
1591
1592   if (!XEXP (src, 0))
1593     return NULL;
1594
1595   op1 = XEXP (src, 0);
1596
1597   if (!REG_P (dest)
1598       && !(GET_CODE (dest) == SUBREG
1599            && REG_P (SUBREG_REG (dest))))
1600     return NULL;
1601
1602   if (!rtx_equal_p (dest, op1))
1603     return NULL;
1604
1605   if (!referenced_in_one_insn_in_loop_p (loop, dest))
1606     return NULL;
1607
1608   if (rtx_referenced_p (dest, XEXP (src, 1)))
1609     return NULL;
1610
1611   mode1 = GET_MODE (dest);
1612   mode2 = GET_MODE (XEXP (src, 1));
1613   if ((FLOAT_MODE_P (mode1)
1614        || FLOAT_MODE_P (mode2))
1615       && !flag_unsafe_math_optimizations)
1616     return NULL;
1617
1618   /* Record the accumulator to expand.  */
1619   ves = XNEW (struct var_to_expand);
1620   ves->insn = insn;
1621   ves->var_expansions = VEC_alloc (rtx, heap, 1);
1622   ves->reg = copy_rtx (dest);
1623   ves->op = GET_CODE (src);
1624   ves->expansion_count = 0;
1625   ves->reuse_expansion = 0;
1626   return ves;
1627 }
1628
1629 /* Determine whether there is an induction variable in INSN that
1630    we would like to split during unrolling.
1631
1632    I.e. replace
1633
1634    i = i + 1;
1635    ...
1636    i = i + 1;
1637    ...
1638    i = i + 1;
1639    ...
1640
1641    type chains by
1642
1643    i0 = i + 1
1644    ...
1645    i = i0 + 1
1646    ...
1647    i = i0 + 2
1648    ...
1649
1650    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1651    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1652    pointer to it.  */
1653
1654 static struct iv_to_split *
1655 analyze_iv_to_split_insn (rtx insn)
1656 {
1657   rtx set, dest;
1658   struct rtx_iv iv;
1659   struct iv_to_split *ivts;
1660   bool ok;
1661
1662   /* For now we just split the basic induction variables.  Later this may be
1663      extended for example by selecting also addresses of memory references.  */
1664   set = single_set (insn);
1665   if (!set)
1666     return NULL;
1667
1668   dest = SET_DEST (set);
1669   if (!REG_P (dest))
1670     return NULL;
1671
1672   if (!biv_p (insn, dest))
1673     return NULL;
1674
1675   ok = iv_analyze_result (insn, dest, &iv);
1676
1677   /* This used to be an assert under the assumption that if biv_p returns
1678      true that iv_analyze_result must also return true.  However, that
1679      assumption is not strictly correct as evidenced by pr25569.
1680
1681      Returning NULL when iv_analyze_result returns false is safe and
1682      avoids the problems in pr25569 until the iv_analyze_* routines
1683      can be fixed, which is apparently hard and time consuming
1684      according to their author.  */
1685   if (! ok)
1686     return NULL;
1687
1688   if (iv.step == const0_rtx
1689       || iv.mode != iv.extend_mode)
1690     return NULL;
1691
1692   /* Record the insn to split.  */
1693   ivts = XNEW (struct iv_to_split);
1694   ivts->insn = insn;
1695   ivts->base_var = NULL_RTX;
1696   ivts->step = iv.step;
1697   ivts->n_loc = 1;
1698   ivts->loc[0] = 1;
1699
1700   return ivts;
1701 }
1702
1703 /* Determines which of insns in LOOP can be optimized.
1704    Return a OPT_INFO struct with the relevant hash tables filled
1705    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1706    is undefined for the return value.  */
1707
1708 static struct opt_info *
1709 analyze_insns_in_loop (struct loop *loop)
1710 {
1711   basic_block *body, bb;
1712   unsigned i;
1713   struct opt_info *opt_info = XCNEW (struct opt_info);
1714   rtx insn;
1715   struct iv_to_split *ivts = NULL;
1716   struct var_to_expand *ves = NULL;
1717   PTR *slot1;
1718   PTR *slot2;
1719   VEC (edge, heap) *edges = get_loop_exit_edges (loop);
1720   edge exit;
1721   bool can_apply = false;
1722
1723   iv_analysis_loop_init (loop);
1724
1725   body = get_loop_body (loop);
1726
1727   if (flag_split_ivs_in_unroller)
1728     opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1729                                             si_info_hash, si_info_eq, free);
1730
1731   /* Record the loop exit bb and loop preheader before the unrolling.  */
1732   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1733
1734   if (VEC_length (edge, edges) == 1)
1735     {
1736       exit = VEC_index (edge, edges, 0);
1737       if (!(exit->flags & EDGE_COMPLEX))
1738         {
1739           opt_info->loop_exit = split_edge (exit);
1740           can_apply = true;
1741         }
1742     }
1743
1744   if (flag_variable_expansion_in_unroller
1745       && can_apply)
1746     opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1747                                                       ve_info_hash, ve_info_eq, free);
1748
1749   for (i = 0; i < loop->num_nodes; i++)
1750     {
1751       bb = body[i];
1752       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1753         continue;
1754
1755       FOR_BB_INSNS (bb, insn)
1756       {
1757         if (!INSN_P (insn))
1758           continue;
1759
1760         if (opt_info->insns_to_split)
1761           ivts = analyze_iv_to_split_insn (insn);
1762
1763         if (ivts)
1764           {
1765             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1766             *slot1 = ivts;
1767             continue;
1768           }
1769
1770         if (opt_info->insns_with_var_to_expand)
1771           ves = analyze_insn_to_expand_var (loop, insn);
1772
1773         if (ves)
1774           {
1775             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1776             *slot2 = ves;
1777           }
1778       }
1779     }
1780
1781   VEC_free (edge, heap, edges);
1782   free (body);
1783   return opt_info;
1784 }
1785
1786 /* Called just before loop duplication.  Records start of duplicated area
1787    to OPT_INFO.  */
1788
1789 static void
1790 opt_info_start_duplication (struct opt_info *opt_info)
1791 {
1792   if (opt_info)
1793     opt_info->first_new_block = last_basic_block;
1794 }
1795
1796 /* Determine the number of iterations between initialization of the base
1797    variable and the current copy (N_COPY).  N_COPIES is the total number
1798    of newly created copies.  UNROLLING is true if we are unrolling
1799    (not peeling) the loop.  */
1800
1801 static unsigned
1802 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1803 {
1804   if (unrolling)
1805     {
1806       /* If we are unrolling, initialization is done in the original loop
1807          body (number 0).  */
1808       return n_copy;
1809     }
1810   else
1811     {
1812       /* If we are peeling, the copy in that the initialization occurs has
1813          number 1.  The original loop (number 0) is the last.  */
1814       if (n_copy)
1815         return n_copy - 1;
1816       else
1817         return n_copies;
1818     }
1819 }
1820
1821 /* Locate in EXPR the expression corresponding to the location recorded
1822    in IVTS, and return a pointer to the RTX for this location.  */
1823
1824 static rtx *
1825 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1826 {
1827   unsigned i;
1828   rtx *ret = &expr;
1829
1830   for (i = 0; i < ivts->n_loc; i++)
1831     ret = &XEXP (*ret, ivts->loc[i]);
1832
1833   return ret;
1834 }
1835
1836 /* Allocate basic variable for the induction variable chain.  Callback for
1837    htab_traverse.  */
1838
1839 static int
1840 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1841 {
1842   struct iv_to_split *ivts = *slot;
1843   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1844
1845   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1846
1847   return 1;
1848 }
1849
1850 /* Insert initialization of basic variable of IVTS before INSN, taking
1851    the initial value from INSN.  */
1852
1853 static void
1854 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1855 {
1856   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1857   rtx seq;
1858
1859   start_sequence ();
1860   expr = force_operand (expr, ivts->base_var);
1861   if (expr != ivts->base_var)
1862     emit_move_insn (ivts->base_var, expr);
1863   seq = get_insns ();
1864   end_sequence ();
1865
1866   emit_insn_before (seq, insn);
1867 }
1868
1869 /* Replace the use of induction variable described in IVTS in INSN
1870    by base variable + DELTA * step.  */
1871
1872 static void
1873 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1874 {
1875   rtx expr, *loc, seq, incr, var;
1876   enum machine_mode mode = GET_MODE (ivts->base_var);
1877   rtx src, dest, set;
1878
1879   /* Construct base + DELTA * step.  */
1880   if (!delta)
1881     expr = ivts->base_var;
1882   else
1883     {
1884       incr = simplify_gen_binary (MULT, mode,
1885                                   ivts->step, gen_int_mode (delta, mode));
1886       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1887                                   ivts->base_var, incr);
1888     }
1889
1890   /* Figure out where to do the replacement.  */
1891   loc = get_ivts_expr (single_set (insn), ivts);
1892
1893   /* If we can make the replacement right away, we're done.  */
1894   if (validate_change (insn, loc, expr, 0))
1895     return;
1896
1897   /* Otherwise, force EXPR into a register and try again.  */
1898   start_sequence ();
1899   var = gen_reg_rtx (mode);
1900   expr = force_operand (expr, var);
1901   if (expr != var)
1902     emit_move_insn (var, expr);
1903   seq = get_insns ();
1904   end_sequence ();
1905   emit_insn_before (seq, insn);
1906
1907   if (validate_change (insn, loc, var, 0))
1908     return;
1909
1910   /* The last chance.  Try recreating the assignment in insn
1911      completely from scratch.  */
1912   set = single_set (insn);
1913   gcc_assert (set);
1914
1915   start_sequence ();
1916   *loc = var;
1917   src = copy_rtx (SET_SRC (set));
1918   dest = copy_rtx (SET_DEST (set));
1919   src = force_operand (src, dest);
1920   if (src != dest)
1921     emit_move_insn (dest, src);
1922   seq = get_insns ();
1923   end_sequence ();
1924
1925   emit_insn_before (seq, insn);
1926   delete_insn (insn);
1927 }
1928
1929
1930 /* Return one expansion of the accumulator recorded in struct VE.  */
1931
1932 static rtx
1933 get_expansion (struct var_to_expand *ve)
1934 {
1935   rtx reg;
1936
1937   if (ve->reuse_expansion == 0)
1938     reg = ve->reg;
1939   else
1940     reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1941
1942   if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1943     ve->reuse_expansion = 0;
1944   else
1945     ve->reuse_expansion++;
1946
1947   return reg;
1948 }
1949
1950
1951 /* Given INSN replace the uses of the accumulator recorded in VE
1952    with a new register.  */
1953
1954 static void
1955 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1956 {
1957   rtx new_reg, set;
1958   bool really_new_expansion = false;
1959
1960   set = single_set (insn);
1961   gcc_assert (set);
1962
1963   /* Generate a new register only if the expansion limit has not been
1964      reached.  Else reuse an already existing expansion.  */
1965   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1966     {
1967       really_new_expansion = true;
1968       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1969     }
1970   else
1971     new_reg = get_expansion (ve);
1972
1973   validate_change (insn, &SET_DEST (set), new_reg, 1);
1974   validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1975
1976   if (apply_change_group ())
1977     if (really_new_expansion)
1978       {
1979         VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
1980         ve->expansion_count++;
1981       }
1982 }
1983
1984 /* Initialize the variable expansions in loop preheader.
1985    Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1986    basic block where the initialization of the expansions
1987    should take place.  */
1988
1989 static int
1990 insert_var_expansion_initialization (void **slot, void *place_p)
1991 {
1992   struct var_to_expand *ve = *slot;
1993   basic_block place = (basic_block)place_p;
1994   rtx seq, var, zero_init, insn;
1995   unsigned i;
1996
1997   if (VEC_length (rtx, ve->var_expansions) == 0)
1998     return 1;
1999
2000   start_sequence ();
2001   if (ve->op == PLUS || ve->op == MINUS)
2002     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2003       {
2004         zero_init =  CONST0_RTX (GET_MODE (var));
2005         emit_move_insn (var, zero_init);
2006       }
2007   else if (ve->op == MULT)
2008     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2009       {
2010         zero_init =  CONST1_RTX (GET_MODE (var));
2011         emit_move_insn (var, zero_init);
2012       }
2013
2014   seq = get_insns ();
2015   end_sequence ();
2016
2017   insn = BB_HEAD (place);
2018   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2019     insn = NEXT_INSN (insn);
2020
2021   emit_insn_after (seq, insn);
2022   /* Continue traversing the hash table.  */
2023   return 1;
2024 }
2025
2026 /*  Combine the variable expansions at the loop exit.
2027     Callbacks for htab_traverse.  PLACE_P is the loop exit
2028     basic block where the summation of the expansions should
2029     take place.  */
2030
2031 static int
2032 combine_var_copies_in_loop_exit (void **slot, void *place_p)
2033 {
2034   struct var_to_expand *ve = *slot;
2035   basic_block place = (basic_block)place_p;
2036   rtx sum = ve->reg;
2037   rtx expr, seq, var, insn;
2038   unsigned i;
2039
2040   if (VEC_length (rtx, ve->var_expansions) == 0)
2041     return 1;
2042
2043   start_sequence ();
2044   if (ve->op == PLUS || ve->op == MINUS)
2045     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2046       {
2047         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2048                                    var, sum);
2049       }
2050   else if (ve->op == MULT)
2051     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2052       {
2053         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2054                                    var, sum);
2055       }
2056
2057   expr = force_operand (sum, ve->reg);
2058   if (expr != ve->reg)
2059     emit_move_insn (ve->reg, expr);
2060   seq = get_insns ();
2061   end_sequence ();
2062
2063   insn = BB_HEAD (place);
2064   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2065     insn = NEXT_INSN (insn);
2066
2067   emit_insn_after (seq, insn);
2068
2069   /* Continue traversing the hash table.  */
2070   return 1;
2071 }
2072
2073 /* Apply loop optimizations in loop copies using the
2074    data which gathered during the unrolling.  Structure
2075    OPT_INFO record that data.
2076
2077    UNROLLING is true if we unrolled (not peeled) the loop.
2078    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2079    the loop (as it should happen in complete unrolling, but not in ordinary
2080    peeling of the loop).  */
2081
2082 static void
2083 apply_opt_in_copies (struct opt_info *opt_info,
2084                      unsigned n_copies, bool unrolling,
2085                      bool rewrite_original_loop)
2086 {
2087   unsigned i, delta;
2088   basic_block bb, orig_bb;
2089   rtx insn, orig_insn, next;
2090   struct iv_to_split ivts_templ, *ivts;
2091   struct var_to_expand ve_templ, *ves;
2092
2093   /* Sanity check -- we need to put initialization in the original loop
2094      body.  */
2095   gcc_assert (!unrolling || rewrite_original_loop);
2096
2097   /* Allocate the basic variables (i0).  */
2098   if (opt_info->insns_to_split)
2099     htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2100
2101   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2102     {
2103       bb = BASIC_BLOCK (i);
2104       orig_bb = get_bb_original (bb);
2105
2106       /* bb->aux holds position in copy sequence initialized by
2107          duplicate_loop_to_header_edge.  */
2108       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2109                                         unrolling);
2110       bb->aux = 0;
2111       orig_insn = BB_HEAD (orig_bb);
2112       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2113         {
2114           next = NEXT_INSN (insn);
2115           if (!INSN_P (insn))
2116             continue;
2117
2118           while (!INSN_P (orig_insn))
2119             orig_insn = NEXT_INSN (orig_insn);
2120
2121           ivts_templ.insn = orig_insn;
2122           ve_templ.insn = orig_insn;
2123
2124           /* Apply splitting iv optimization.  */
2125           if (opt_info->insns_to_split)
2126             {
2127               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2128
2129               if (ivts)
2130                 {
2131                   gcc_assert (GET_CODE (PATTERN (insn))
2132                               == GET_CODE (PATTERN (orig_insn)));
2133
2134                   if (!delta)
2135                     insert_base_initialization (ivts, insn);
2136                   split_iv (ivts, insn, delta);
2137                 }
2138             }
2139           /* Apply variable expansion optimization.  */
2140           if (unrolling && opt_info->insns_with_var_to_expand)
2141             {
2142               ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2143               if (ves)
2144                 {
2145                   gcc_assert (GET_CODE (PATTERN (insn))
2146                               == GET_CODE (PATTERN (orig_insn)));
2147                   expand_var_during_unrolling (ves, insn);
2148                 }
2149             }
2150           orig_insn = NEXT_INSN (orig_insn);
2151         }
2152     }
2153
2154   if (!rewrite_original_loop)
2155     return;
2156
2157   /* Initialize the variable expansions in the loop preheader
2158      and take care of combining them at the loop exit.  */
2159   if (opt_info->insns_with_var_to_expand)
2160     {
2161       htab_traverse (opt_info->insns_with_var_to_expand,
2162                      insert_var_expansion_initialization,
2163                      opt_info->loop_preheader);
2164       htab_traverse (opt_info->insns_with_var_to_expand,
2165                      combine_var_copies_in_loop_exit,
2166                      opt_info->loop_exit);
2167     }
2168
2169   /* Rewrite also the original loop body.  Find them as originals of the blocks
2170      in the last copied iteration, i.e. those that have
2171      get_bb_copy (get_bb_original (bb)) == bb.  */
2172   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2173     {
2174       bb = BASIC_BLOCK (i);
2175       orig_bb = get_bb_original (bb);
2176       if (get_bb_copy (orig_bb) != bb)
2177         continue;
2178
2179       delta = determine_split_iv_delta (0, n_copies, unrolling);
2180       for (orig_insn = BB_HEAD (orig_bb);
2181            orig_insn != NEXT_INSN (BB_END (bb));
2182            orig_insn = next)
2183         {
2184           next = NEXT_INSN (orig_insn);
2185
2186           if (!INSN_P (orig_insn))
2187             continue;
2188
2189           ivts_templ.insn = orig_insn;
2190           if (opt_info->insns_to_split)
2191             {
2192               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2193               if (ivts)
2194                 {
2195                   if (!delta)
2196                     insert_base_initialization (ivts, orig_insn);
2197                   split_iv (ivts, orig_insn, delta);
2198                   continue;
2199                 }
2200             }
2201
2202         }
2203     }
2204 }
2205
2206 /*  Release the data structures used for the variable expansion
2207     optimization.  Callbacks for htab_traverse.  */
2208
2209 static int
2210 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2211 {
2212   struct var_to_expand *ve = *slot;
2213
2214   VEC_free (rtx, heap, ve->var_expansions);
2215
2216   /* Continue traversing the hash table.  */
2217   return 1;
2218 }
2219
2220 /* Release OPT_INFO.  */
2221
2222 static void
2223 free_opt_info (struct opt_info *opt_info)
2224 {
2225   if (opt_info->insns_to_split)
2226     htab_delete (opt_info->insns_to_split);
2227   if (opt_info->insns_with_var_to_expand)
2228     {
2229       htab_traverse (opt_info->insns_with_var_to_expand,
2230                      release_var_copies, NULL);
2231       htab_delete (opt_info->insns_with_var_to_expand);
2232     }
2233   free (opt_info);
2234 }