gcc/loop-unroll.c

   1 /* Loop unrolling and peeling.
   2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 2, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "basic-block.h"
  29 #include "cfgloop.h"
  30 #include "cfglayout.h"
  31 #include "params.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "hashtab.h"
  35 #include "recog.h"
  36
  37 /* This pass performs loop unrolling and peeling.  We only perform these
  38    optimizations on innermost loops (with single exception) because
  39    the impact on performance is greatest here, and we want to avoid
  40    unnecessary code size growth.  The gain is caused by greater sequentiality
  41    of code, better code to optimize for further passes and in some cases
  42    by fewer testings of exit conditions.  The main problem is code growth,
  43    that impacts performance negatively due to effect of caches.
  44
  45    What we do:
  46
  47    -- complete peeling of once-rolling loops; this is the above mentioned
  48       exception, as this causes loop to be cancelled completely and
  49       does not cause code growth
  50    -- complete peeling of loops that roll (small) constant times.
  51    -- simple peeling of first iterations of loops that do not roll much
  52       (according to profile feedback)
  53    -- unrolling of loops that roll constant times; this is almost always
  54       win, as we get rid of exit condition tests.
  55    -- unrolling of loops that roll number of times that we can compute
  56       in runtime; we also get rid of exit condition tests here, but there
  57       is the extra expense for calculating the number of iterations
  58    -- simple unrolling of remaining loops; this is performed only if we
  59       are asked to, as the gain is questionable in this case and often
  60       it may even slow down the code
  61    For more detailed descriptions of each of those, see comments at
  62    appropriate function below.
  63
  64    There is a lot of parameters (defined and described in params.def) that
  65    control how much we unroll/peel.
  66
  67    ??? A great problem is that we don't have a good way how to determine
  68    how many times we should unroll the loop; the experiments I have made
  69    showed that this choice may affect performance in order of several %.
  70    */
  71
  72 /* Information about induction variables to split.  */
  73
  74 struct iv_to_split
  75 {
  76   rtx insn;             /* The insn in that the induction variable occurs.  */
  77   rtx base_var;         /* The variable on that the values in the further
  78                            iterations are based.  */
  79   rtx step;             /* Step of the induction variable.  */
  80   unsigned n_loc;
  81   unsigned loc[3];      /* Location where the definition of the induction
  82                            variable occurs in the insn.  For example if
  83                            N_LOC is 2, the expression is located at
  84                            XEXP (XEXP (single_set, loc[0]), loc[1]).  */
  85 };
  86
  87 /* Information about accumulators to expand.  */
  88
  89 struct var_to_expand
  90 {
  91   rtx insn;                        /* The insn in that the variable expansion occurs.  */
  92   rtx reg;                         /* The accumulator which is expanded.  */
  93   VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
  94   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  95                                       or multiplication.  */
  96   int expansion_count;             /* Count the number of expansions generated so far.  */
  97   int reuse_expansion;             /* The expansion we intend to reuse to expand
  98                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
  99                                       the original accumulator.  Else use
 100                                       var_expansions[REUSE_EXPANSION - 1].  */
 101 };
 102
 103 /* Information about optimization applied in
 104    the unrolled loop.  */
 105
 106 struct opt_info
 107 {
 108   htab_t insns_to_split;           /* A hashtable of insns to split.  */
 109   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
 110                                       to expand.  */
 111   unsigned first_new_block;        /* The first basic block that was
 112                                       duplicated.  */
 113   basic_block loop_exit;           /* The loop exit basic block.  */
 114   basic_block loop_preheader;      /* The loop preheader basic block.  */
 115 };
 116
 117 static void decide_unrolling_and_peeling (struct loops *, int);
 118 static void peel_loops_completely (struct loops *, int);
 119 static void decide_peel_simple (struct loop *, int);
 120 static void decide_peel_once_rolling (struct loop *, int);
 121 static void decide_peel_completely (struct loop *, int);
 122 static void decide_unroll_stupid (struct loop *, int);
 123 static void decide_unroll_constant_iterations (struct loop *, int);
 124 static void decide_unroll_runtime_iterations (struct loop *, int);
 125 static void peel_loop_simple (struct loops *, struct loop *);
 126 static void peel_loop_completely (struct loops *, struct loop *);
 127 static void unroll_loop_stupid (struct loops *, struct loop *);
 128 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
 129 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
 130 static struct opt_info *analyze_insns_in_loop (struct loop *);
 131 static void opt_info_start_duplication (struct opt_info *);
 132 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 133 static void free_opt_info (struct opt_info *);
 134 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
 135 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
 136 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
 137 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
 138 static int insert_var_expansion_initialization (void **, void *);
 139 static int combine_var_copies_in_loop_exit (void **, void *);
 140 static int release_var_copies (void **, void *);
 141 static rtx get_expansion (struct var_to_expand *);
 142
 143 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 144 void
 145 unroll_and_peel_loops (struct loops *loops, int flags)
 146 {
 147   struct loop *loop, *next;
 148   bool check;
 149
 150   /* First perform complete loop peeling (it is almost surely a win,
 151      and affects parameters for further decision a lot).  */
 152   peel_loops_completely (loops, flags);
 153
 154   /* Now decide rest of unrolling and peeling.  */
 155   decide_unrolling_and_peeling (loops, flags);
 156
 157   loop = loops->tree_root;
 158   while (loop->inner)
 159     loop = loop->inner;
 160
 161   /* Scan the loops, inner ones first.  */
 162   while (loop != loops->tree_root)
 163     {
 164       if (loop->next)
 165         {
 166           next = loop->next;
 167           while (next->inner)
 168             next = next->inner;
 169         }
 170       else
 171         next = loop->outer;
 172
 173       check = true;
 174       /* And perform the appropriate transformations.  */
 175       switch (loop->lpt_decision.decision)
 176         {
 177         case LPT_PEEL_COMPLETELY:
 178           /* Already done.  */
 179           gcc_unreachable ();
 180         case LPT_PEEL_SIMPLE:
 181           peel_loop_simple (loops, loop);
 182           break;
 183         case LPT_UNROLL_CONSTANT:
 184           unroll_loop_constant_iterations (loops, loop);
 185           break;
 186         case LPT_UNROLL_RUNTIME:
 187           unroll_loop_runtime_iterations (loops, loop);
 188           break;
 189         case LPT_UNROLL_STUPID:
 190           unroll_loop_stupid (loops, loop);
 191           break;
 192         case LPT_NONE:
 193           check = false;
 194           break;
 195         default:
 196           gcc_unreachable ();
 197         }
 198       if (check)
 199         {
 200 #ifdef ENABLE_CHECKING
 201           verify_dominators (CDI_DOMINATORS);
 202           verify_loop_structure (loops);
 203 #endif
 204         }
 205       loop = next;
 206     }
 207
 208   iv_analysis_done ();
 209 }
 210
 211 /* Check whether exit of the LOOP is at the end of loop body.  */
 212
 213 static bool
 214 loop_exit_at_end_p (struct loop *loop)
 215 {
 216   struct niter_desc *desc = get_simple_loop_desc (loop);
 217   rtx insn;
 218
 219   if (desc->in_edge->dest != loop->latch)
 220     return false;
 221
 222   /* Check that the latch is empty.  */
 223   FOR_BB_INSNS (loop->latch, insn)
 224     {
 225       if (INSN_P (insn))
 226         return false;
 227     }
 228
 229   return true;
 230 }
 231
 232 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
 233 static void
 234 peel_loops_completely (struct loops *loops, int flags)
 235 {
 236   struct loop *loop, *next;
 237
 238   loop = loops->tree_root;
 239   while (loop->inner)
 240     loop = loop->inner;
 241
 242   while (loop != loops->tree_root)
 243     {
 244       if (loop->next)
 245         {
 246           next = loop->next;
 247           while (next->inner)
 248             next = next->inner;
 249         }
 250       else
 251         next = loop->outer;
 252
 253       loop->lpt_decision.decision = LPT_NONE;
 254
 255       if (dump_file)
 256         fprintf (dump_file,
 257                  "\n;; *** Considering loop %d for complete peeling ***\n",
 258                  loop->num);
 259
 260       loop->ninsns = num_loop_insns (loop);
 261
 262       decide_peel_once_rolling (loop, flags);
 263       if (loop->lpt_decision.decision == LPT_NONE)
 264         decide_peel_completely (loop, flags);
 265
 266       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
 267         {
 268           peel_loop_completely (loops, loop);
 269 #ifdef ENABLE_CHECKING
 270           verify_dominators (CDI_DOMINATORS);
 271           verify_loop_structure (loops);
 272 #endif
 273         }
 274       loop = next;
 275     }
 276 }
 277
 278 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
 279 static void
 280 decide_unrolling_and_peeling (struct loops *loops, int flags)
 281 {
 282   struct loop *loop = loops->tree_root, *next;
 283
 284   while (loop->inner)
 285     loop = loop->inner;
 286
 287   /* Scan the loops, inner ones first.  */
 288   while (loop != loops->tree_root)
 289     {
 290       if (loop->next)
 291         {
 292           next = loop->next;
 293           while (next->inner)
 294             next = next->inner;
 295         }
 296       else
 297         next = loop->outer;
 298
 299       loop->lpt_decision.decision = LPT_NONE;
 300
 301       if (dump_file)
 302         fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
 303
 304       /* Do not peel cold areas.  */
 305       if (!maybe_hot_bb_p (loop->header))
 306         {
 307           if (dump_file)
 308             fprintf (dump_file, ";; Not considering loop, cold area\n");
 309           loop = next;
 310           continue;
 311         }
 312
 313       /* Can the loop be manipulated?  */
 314       if (!can_duplicate_loop_p (loop))
 315         {
 316           if (dump_file)
 317             fprintf (dump_file,
 318                      ";; Not considering loop, cannot duplicate\n");
 319           loop = next;
 320           continue;
 321         }
 322
 323       /* Skip non-innermost loops.  */
 324       if (loop->inner)
 325         {
 326           if (dump_file)
 327             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 328           loop = next;
 329           continue;
 330         }
 331
 332       loop->ninsns = num_loop_insns (loop);
 333       loop->av_ninsns = average_num_loop_insns (loop);
 334
 335       /* Try transformations one by one in decreasing order of
 336          priority.  */
 337
 338       decide_unroll_constant_iterations (loop, flags);
 339       if (loop->lpt_decision.decision == LPT_NONE)
 340         decide_unroll_runtime_iterations (loop, flags);
 341       if (loop->lpt_decision.decision == LPT_NONE)
 342         decide_unroll_stupid (loop, flags);
 343       if (loop->lpt_decision.decision == LPT_NONE)
 344         decide_peel_simple (loop, flags);
 345
 346       loop = next;
 347     }
 348 }
 349
 350 /* Decide whether the LOOP is once rolling and suitable for complete
 351    peeling.  */
 352 static void
 353 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 354 {
 355   struct niter_desc *desc;
 356
 357   if (dump_file)
 358     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 359
 360   /* Is the loop small enough?  */
 361   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
 362     {
 363       if (dump_file)
 364         fprintf (dump_file, ";; Not considering loop, is too big\n");
 365       return;
 366     }
 367
 368   /* Check for simple loops.  */
 369   desc = get_simple_loop_desc (loop);
 370
 371   /* Check number of iterations.  */
 372   if (!desc->simple_p
 373       || desc->assumptions
 374       || desc->infinite
 375       || !desc->const_iter
 376       || desc->niter != 0)
 377     {
 378       if (dump_file)
 379         fprintf (dump_file,
 380                  ";; Unable to prove that the loop rolls exactly once\n");
 381       return;
 382     }
 383
 384   /* Success.  */
 385   if (dump_file)
 386     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
 387   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 388 }
 389
 390 /* Decide whether the LOOP is suitable for complete peeling.  */
 391 static void
 392 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 393 {
 394   unsigned npeel;
 395   struct niter_desc *desc;
 396
 397   if (dump_file)
 398     fprintf (dump_file, "\n;; Considering peeling completely\n");
 399
 400   /* Skip non-innermost loops.  */
 401   if (loop->inner)
 402     {
 403       if (dump_file)
 404         fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 405       return;
 406     }
 407
 408   /* Do not peel cold areas.  */
 409   if (!maybe_hot_bb_p (loop->header))
 410     {
 411       if (dump_file)
 412         fprintf (dump_file, ";; Not considering loop, cold area\n");
 413       return;
 414     }
 415
 416   /* Can the loop be manipulated?  */
 417   if (!can_duplicate_loop_p (loop))
 418     {
 419       if (dump_file)
 420         fprintf (dump_file,
 421                  ";; Not considering loop, cannot duplicate\n");
 422       return;
 423     }
 424
 425   /* npeel = number of iterations to peel.  */
 426   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
 427   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 428     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 429
 430   /* Is the loop small enough?  */
 431   if (!npeel)
 432     {
 433       if (dump_file)
 434         fprintf (dump_file, ";; Not considering loop, is too big\n");
 435       return;
 436     }
 437
 438   /* Check for simple loops.  */
 439   desc = get_simple_loop_desc (loop);
 440
 441   /* Check number of iterations.  */
 442   if (!desc->simple_p
 443       || desc->assumptions
 444       || !desc->const_iter
 445       || desc->infinite)
 446     {
 447       if (dump_file)
 448         fprintf (dump_file,
 449                  ";; Unable to prove that the loop iterates constant times\n");
 450       return;
 451     }
 452
 453   if (desc->niter > npeel - 1)
 454     {
 455       if (dump_file)
 456         {
 457           fprintf (dump_file,
 458                    ";; Not peeling loop completely, rolls too much (");
 459           fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
 460           fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
 461         }
 462       return;
 463     }
 464
 465   /* Success.  */
 466   if (dump_file)
 467     fprintf (dump_file, ";; Decided to peel loop completely\n");
 468   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 469 }
 470
 471 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
 472    completely.  The transformation done:
 473
 474    for (i = 0; i < 4; i++)
 475      body;
 476
 477    ==>
 478
 479    i = 0;
 480    body; i++;
 481    body; i++;
 482    body; i++;
 483    body; i++;
 484    */
 485 static void
 486 peel_loop_completely (struct loops *loops, struct loop *loop)
 487 {
 488   sbitmap wont_exit;
 489   unsigned HOST_WIDE_INT npeel;
 490   unsigned n_remove_edges, i;
 491   edge *remove_edges, ein;
 492   struct niter_desc *desc = get_simple_loop_desc (loop);
 493   struct opt_info *opt_info = NULL;
 494
 495   npeel = desc->niter;
 496
 497   if (npeel)
 498     {
 499       bool ok;
 500
 501       wont_exit = sbitmap_alloc (npeel + 1);
 502       sbitmap_ones (wont_exit);
 503       RESET_BIT (wont_exit, 0);
 504       if (desc->noloop_assumptions)
 505         RESET_BIT (wont_exit, 1);
 506
 507       remove_edges = XCNEWVEC (edge, npeel);
 508       n_remove_edges = 0;
 509
 510       if (flag_split_ivs_in_unroller)
 511         opt_info = analyze_insns_in_loop (loop);
 512
 513       opt_info_start_duplication (opt_info);
 514       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 515                                           loops, npeel,
 516                                           wont_exit, desc->out_edge,
 517                                           remove_edges, &n_remove_edges,
 518                                           DLTHE_FLAG_UPDATE_FREQ
 519                                           | DLTHE_FLAG_COMPLETTE_PEEL
 520                                           | (opt_info
 521                                              ? DLTHE_RECORD_COPY_NUMBER : 0));
 522       gcc_assert (ok);
 523
 524       free (wont_exit);
 525
 526       if (opt_info)
 527         {
 528           apply_opt_in_copies (opt_info, npeel, false, true);
 529           free_opt_info (opt_info);
 530         }
 531
 532       /* Remove the exit edges.  */
 533       for (i = 0; i < n_remove_edges; i++)
 534         remove_path (loops, remove_edges[i]);
 535       free (remove_edges);
 536     }
 537
 538   ein = desc->in_edge;
 539   free_simple_loop_desc (loop);
 540
 541   /* Now remove the unreachable part of the last iteration and cancel
 542      the loop.  */
 543   remove_path (loops, ein);
 544
 545   if (dump_file)
 546     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
 547 }
 548
 549 /* Decide whether to unroll LOOP iterating constant number of times
 550    and how much.  */
 551
 552 static void
 553 decide_unroll_constant_iterations (struct loop *loop, int flags)
 554 {
 555   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 556   struct niter_desc *desc;
 557
 558   if (!(flags & UAP_UNROLL))
 559     {
 560       /* We were not asked to, just return back silently.  */
 561       return;
 562     }
 563
 564   if (dump_file)
 565     fprintf (dump_file,
 566              "\n;; Considering unrolling loop with constant "
 567              "number of iterations\n");
 568
 569   /* nunroll = total number of copies of the original loop body in
 570      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 571   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 572   nunroll_by_av
 573     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 574   if (nunroll > nunroll_by_av)
 575     nunroll = nunroll_by_av;
 576   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 577     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 578
 579   /* Skip big loops.  */
 580   if (nunroll <= 1)
 581     {
 582       if (dump_file)
 583         fprintf (dump_file, ";; Not considering loop, is too big\n");
 584       return;
 585     }
 586
 587   /* Check for simple loops.  */
 588   desc = get_simple_loop_desc (loop);
 589
 590   /* Check number of iterations.  */
 591   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 592     {
 593       if (dump_file)
 594         fprintf (dump_file,
 595                  ";; Unable to prove that the loop iterates constant times\n");
 596       return;
 597     }
 598
 599   /* Check whether the loop rolls enough to consider.  */
 600   if (desc->niter < 2 * nunroll)
 601     {
 602       if (dump_file)
 603         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 604       return;
 605     }
 606
 607   /* Success; now compute number of iterations to unroll.  We alter
 608      nunroll so that as few as possible copies of loop body are
 609      necessary, while still not decreasing the number of unrollings
 610      too much (at most by 1).  */
 611   best_copies = 2 * nunroll + 10;
 612
 613   i = 2 * nunroll + 2;
 614   if (i - 1 >= desc->niter)
 615     i = desc->niter - 2;
 616
 617   for (; i >= nunroll - 1; i--)
 618     {
 619       unsigned exit_mod = desc->niter % (i + 1);
 620
 621       if (!loop_exit_at_end_p (loop))
 622         n_copies = exit_mod + i + 1;
 623       else if (exit_mod != (unsigned) i
 624                || desc->noloop_assumptions != NULL_RTX)
 625         n_copies = exit_mod + i + 2;
 626       else
 627         n_copies = i + 1;
 628
 629       if (n_copies < best_copies)
 630         {
 631           best_copies = n_copies;
 632           best_unroll = i;
 633         }
 634     }
 635
 636   if (dump_file)
 637     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
 638              best_unroll + 1, best_copies, nunroll);
 639
 640   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 641   loop->lpt_decision.times = best_unroll;
 642
 643   if (dump_file)
 644     fprintf (dump_file,
 645              ";; Decided to unroll the constant times rolling loop, %d times.\n",
 646              loop->lpt_decision.times);
 647 }
 648
 649 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
 650    times.  The transformation does this:
 651
 652    for (i = 0; i < 102; i++)
 653      body;
 654
 655    ==>
 656
 657    i = 0;
 658    body; i++;
 659    body; i++;
 660    while (i < 102)
 661      {
 662        body; i++;
 663        body; i++;
 664        body; i++;
 665        body; i++;
 666      }
 667   */
 668 static void
 669 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
 670 {
 671   unsigned HOST_WIDE_INT niter;
 672   unsigned exit_mod;
 673   sbitmap wont_exit;
 674   unsigned n_remove_edges, i;
 675   edge *remove_edges;
 676   unsigned max_unroll = loop->lpt_decision.times;
 677   struct niter_desc *desc = get_simple_loop_desc (loop);
 678   bool exit_at_end = loop_exit_at_end_p (loop);
 679   struct opt_info *opt_info = NULL;
 680   bool ok;
 681
 682   niter = desc->niter;
 683
 684   /* Should not get here (such loop should be peeled instead).  */
 685   gcc_assert (niter > max_unroll + 1);
 686
 687   exit_mod = niter % (max_unroll + 1);
 688
 689   wont_exit = sbitmap_alloc (max_unroll + 1);
 690   sbitmap_ones (wont_exit);
 691
 692   remove_edges = XCNEWVEC (edge, max_unroll + exit_mod + 1);
 693   n_remove_edges = 0;
 694   if (flag_split_ivs_in_unroller
 695       || flag_variable_expansion_in_unroller)
 696     opt_info = analyze_insns_in_loop (loop);
 697
 698   if (!exit_at_end)
 699     {
 700       /* The exit is not at the end of the loop; leave exit test
 701          in the first copy, so that the loops that start with test
 702          of exit condition have continuous body after unrolling.  */
 703
 704       if (dump_file)
 705         fprintf (dump_file, ";; Condition on beginning of loop.\n");
 706
 707       /* Peel exit_mod iterations.  */
 708       RESET_BIT (wont_exit, 0);
 709       if (desc->noloop_assumptions)
 710         RESET_BIT (wont_exit, 1);
 711
 712       if (exit_mod)
 713         {
 714           opt_info_start_duplication (opt_info);
 715           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 716                                               loops, exit_mod,
 717                                               wont_exit, desc->out_edge,
 718                                               remove_edges, &n_remove_edges,
 719                                               DLTHE_FLAG_UPDATE_FREQ
 720                                               | (opt_info && exit_mod > 1
 721                                                  ? DLTHE_RECORD_COPY_NUMBER
 722                                                    : 0));
 723           gcc_assert (ok);
 724
 725           if (opt_info && exit_mod > 1)
 726             apply_opt_in_copies (opt_info, exit_mod, false, false);
 727
 728           desc->noloop_assumptions = NULL_RTX;
 729           desc->niter -= exit_mod;
 730           desc->niter_max -= exit_mod;
 731         }
 732
 733       SET_BIT (wont_exit, 1);
 734     }
 735   else
 736     {
 737       /* Leave exit test in last copy, for the same reason as above if
 738          the loop tests the condition at the end of loop body.  */
 739
 740       if (dump_file)
 741         fprintf (dump_file, ";; Condition on end of loop.\n");
 742
 743       /* We know that niter >= max_unroll + 2; so we do not need to care of
 744          case when we would exit before reaching the loop.  So just peel
 745          exit_mod + 1 iterations.  */
 746       if (exit_mod != max_unroll
 747           || desc->noloop_assumptions)
 748         {
 749           RESET_BIT (wont_exit, 0);
 750           if (desc->noloop_assumptions)
 751             RESET_BIT (wont_exit, 1);
 752
 753           opt_info_start_duplication (opt_info);
 754           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 755                                               loops, exit_mod + 1,
 756                                               wont_exit, desc->out_edge,
 757                                               remove_edges, &n_remove_edges,
 758                                               DLTHE_FLAG_UPDATE_FREQ
 759                                               | (opt_info && exit_mod > 0
 760                                                  ? DLTHE_RECORD_COPY_NUMBER
 761                                                    : 0));
 762           gcc_assert (ok);
 763
 764           if (opt_info && exit_mod > 0)
 765             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 766
 767           desc->niter -= exit_mod + 1;
 768           desc->niter_max -= exit_mod + 1;
 769           desc->noloop_assumptions = NULL_RTX;
 770
 771           SET_BIT (wont_exit, 0);
 772           SET_BIT (wont_exit, 1);
 773         }
 774
 775       RESET_BIT (wont_exit, max_unroll);
 776     }
 777
 778   /* Now unroll the loop.  */
 779
 780   opt_info_start_duplication (opt_info);
 781   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 782                                       loops, max_unroll,
 783                                       wont_exit, desc->out_edge,
 784                                       remove_edges, &n_remove_edges,
 785                                       DLTHE_FLAG_UPDATE_FREQ
 786                                       | (opt_info
 787                                          ? DLTHE_RECORD_COPY_NUMBER
 788                                            : 0));
 789   gcc_assert (ok);
 790
 791   if (opt_info)
 792     {
 793       apply_opt_in_copies (opt_info, max_unroll, true, true);
 794       free_opt_info (opt_info);
 795     }
 796
 797   free (wont_exit);
 798
 799   if (exit_at_end)
 800     {
 801       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 802       /* Find a new in and out edge; they are in the last copy we have made.  */
 803
 804       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 805         {
 806           desc->out_edge = EDGE_SUCC (exit_block, 0);
 807           desc->in_edge = EDGE_SUCC (exit_block, 1);
 808         }
 809       else
 810         {
 811           desc->out_edge = EDGE_SUCC (exit_block, 1);
 812           desc->in_edge = EDGE_SUCC (exit_block, 0);
 813         }
 814     }
 815
 816   desc->niter /= max_unroll + 1;
 817   desc->niter_max /= max_unroll + 1;
 818   desc->niter_expr = GEN_INT (desc->niter);
 819
 820   /* Remove the edges.  */
 821   for (i = 0; i < n_remove_edges; i++)
 822     remove_path (loops, remove_edges[i]);
 823   free (remove_edges);
 824
 825   if (dump_file)
 826     fprintf (dump_file,
 827              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 828              max_unroll, num_loop_insns (loop));
 829 }
 830
 831 /* Decide whether to unroll LOOP iterating runtime computable number of times
 832    and how much.  */
 833 static void
 834 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 835 {
 836   unsigned nunroll, nunroll_by_av, i;
 837   struct niter_desc *desc;
 838
 839   if (!(flags & UAP_UNROLL))
 840     {
 841       /* We were not asked to, just return back silently.  */
 842       return;
 843     }
 844
 845   if (dump_file)
 846     fprintf (dump_file,
 847              "\n;; Considering unrolling loop with runtime "
 848              "computable number of iterations\n");
 849
 850   /* nunroll = total number of copies of the original loop body in
 851      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 852   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 853   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 854   if (nunroll > nunroll_by_av)
 855     nunroll = nunroll_by_av;
 856   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 857     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 858
 859   /* Skip big loops.  */
 860   if (nunroll <= 1)
 861     {
 862       if (dump_file)
 863         fprintf (dump_file, ";; Not considering loop, is too big\n");
 864       return;
 865     }
 866
 867   /* Check for simple loops.  */
 868   desc = get_simple_loop_desc (loop);
 869
 870   /* Check simpleness.  */
 871   if (!desc->simple_p || desc->assumptions)
 872     {
 873       if (dump_file)
 874         fprintf (dump_file,
 875                  ";; Unable to prove that the number of iterations "
 876                  "can be counted in runtime\n");
 877       return;
 878     }
 879
 880   if (desc->const_iter)
 881     {
 882       if (dump_file)
 883         fprintf (dump_file, ";; Loop iterates constant times\n");
 884       return;
 885     }
 886
 887   /* If we have profile feedback, check whether the loop rolls.  */
 888   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
 889     {
 890       if (dump_file)
 891         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 892       return;
 893     }
 894
 895   /* Success; now force nunroll to be power of 2, as we are unable to
 896      cope with overflows in computation of number of iterations.  */
 897   for (i = 1; 2 * i <= nunroll; i *= 2)
 898     continue;
 899
 900   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 901   loop->lpt_decision.times = i - 1;
 902
 903   if (dump_file)
 904     fprintf (dump_file,
 905              ";; Decided to unroll the runtime computable "
 906              "times rolling loop, %d times.\n",
 907              loop->lpt_decision.times);
 908 }
 909
 910 /* Unroll LOOP for that we are able to count number of iterations in runtime
 911    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
 912    extra care for case n < 0):
 913
 914    for (i = 0; i < n; i++)
 915      body;
 916
 917    ==>
 918
 919    i = 0;
 920    mod = n % 4;
 921
 922    switch (mod)
 923      {
 924        case 3:
 925          body; i++;
 926        case 2:
 927          body; i++;
 928        case 1:
 929          body; i++;
 930        case 0: ;
 931      }
 932
 933    while (i < n)
 934      {
 935        body; i++;
 936        body; i++;
 937        body; i++;
 938        body; i++;
 939      }
 940    */
 941 static void
 942 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
 943 {
 944   rtx old_niter, niter, init_code, branch_code, tmp;
 945   unsigned i, j, p;
 946   basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
 947   unsigned n_dom_bbs;
 948   sbitmap wont_exit;
 949   int may_exit_copy;
 950   unsigned n_peel, n_remove_edges;
 951   edge *remove_edges, e;
 952   bool extra_zero_check, last_may_exit;
 953   unsigned max_unroll = loop->lpt_decision.times;
 954   struct niter_desc *desc = get_simple_loop_desc (loop);
 955   bool exit_at_end = loop_exit_at_end_p (loop);
 956   struct opt_info *opt_info = NULL;
 957   bool ok;
 958
 959   if (flag_split_ivs_in_unroller
 960       || flag_variable_expansion_in_unroller)
 961     opt_info = analyze_insns_in_loop (loop);
 962
 963   /* Remember blocks whose dominators will have to be updated.  */
 964   dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
 965   n_dom_bbs = 0;
 966
 967   body = get_loop_body (loop);
 968   for (i = 0; i < loop->num_nodes; i++)
 969     {
 970       unsigned nldom;
 971       basic_block *ldom;
 972
 973       nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
 974       for (j = 0; j < nldom; j++)
 975         if (!flow_bb_inside_loop_p (loop, ldom[j]))
 976           dom_bbs[n_dom_bbs++] = ldom[j];
 977
 978       free (ldom);
 979     }
 980   free (body);
 981
 982   if (!exit_at_end)
 983     {
 984       /* Leave exit in first copy (for explanation why see comment in
 985          unroll_loop_constant_iterations).  */
 986       may_exit_copy = 0;
 987       n_peel = max_unroll - 1;
 988       extra_zero_check = true;
 989       last_may_exit = false;
 990     }
 991   else
 992     {
 993       /* Leave exit in last copy (for explanation why see comment in
 994          unroll_loop_constant_iterations).  */
 995       may_exit_copy = max_unroll;
 996       n_peel = max_unroll;
 997       extra_zero_check = false;
 998       last_may_exit = true;
 999     }
1000
1001   /* Get expression for number of iterations.  */
1002   start_sequence ();
1003   old_niter = niter = gen_reg_rtx (desc->mode);
1004   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
1005   if (tmp != niter)
1006     emit_move_insn (niter, tmp);
1007
1008   /* Count modulo by ANDing it with max_unroll; we use the fact that
1009      the number of unrollings is a power of two, and thus this is correct
1010      even if there is overflow in the computation.  */
1011   niter = expand_simple_binop (desc->mode, AND,
1012                                niter,
1013                                GEN_INT (max_unroll),
1014                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
1015
1016   init_code = get_insns ();
1017   end_sequence ();
1018
1019   /* Precondition the loop.  */
1020   loop_split_edge_with (loop_preheader_edge (loop), init_code);
1021
1022   remove_edges = XCNEWVEC (edge, max_unroll + n_peel + 1);
1023   n_remove_edges = 0;
1024
1025   wont_exit = sbitmap_alloc (max_unroll + 2);
1026
1027   /* Peel the first copy of loop body (almost always we must leave exit test
1028      here; the only exception is when we have extra zero check and the number
1029      of iterations is reliable.  Also record the place of (possible) extra
1030      zero check.  */
1031   sbitmap_zero (wont_exit);
1032   if (extra_zero_check
1033       && !desc->noloop_assumptions)
1034     SET_BIT (wont_exit, 1);
1035   ezc_swtch = loop_preheader_edge (loop)->src;
1036   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1037                                       loops, 1,
1038                                       wont_exit, desc->out_edge,
1039                                       remove_edges, &n_remove_edges,
1040                                       DLTHE_FLAG_UPDATE_FREQ);
1041   gcc_assert (ok);
1042
1043   /* Record the place where switch will be built for preconditioning.  */
1044   swtch = loop_split_edge_with (loop_preheader_edge (loop),
1045                                 NULL_RTX);
1046
1047   for (i = 0; i < n_peel; i++)
1048     {
1049       /* Peel the copy.  */
1050       sbitmap_zero (wont_exit);
1051       if (i != n_peel - 1 || !last_may_exit)
1052         SET_BIT (wont_exit, 1);
1053       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1054                                           loops, 1,
1055                                           wont_exit, desc->out_edge,
1056                                           remove_edges, &n_remove_edges,
1057                                           DLTHE_FLAG_UPDATE_FREQ);
1058       gcc_assert (ok);
1059
1060       /* Create item for switch.  */
1061       j = n_peel - i - (extra_zero_check ? 0 : 1);
1062       p = REG_BR_PROB_BASE / (i + 2);
1063
1064       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1065       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1066                                           block_label (preheader), p,
1067                                           NULL_RTX);
1068
1069       swtch = loop_split_edge_with (single_pred_edge (swtch), branch_code);
1070       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1071       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1072       e = make_edge (swtch, preheader,
1073                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1074       e->probability = p;
1075     }
1076
1077   if (extra_zero_check)
1078     {
1079       /* Add branch for zero iterations.  */
1080       p = REG_BR_PROB_BASE / (max_unroll + 1);
1081       swtch = ezc_swtch;
1082       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1083       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1084                                           block_label (preheader), p,
1085                                           NULL_RTX);
1086
1087       swtch = loop_split_edge_with (single_succ_edge (swtch), branch_code);
1088       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1089       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1090       e = make_edge (swtch, preheader,
1091                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1092       e->probability = p;
1093     }
1094
1095   /* Recount dominators for outer blocks.  */
1096   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1097
1098   /* And unroll loop.  */
1099
1100   sbitmap_ones (wont_exit);
1101   RESET_BIT (wont_exit, may_exit_copy);
1102   opt_info_start_duplication (opt_info);
1103
1104   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1105                                       loops, max_unroll,
1106                                       wont_exit, desc->out_edge,
1107                                       remove_edges, &n_remove_edges,
1108                                       DLTHE_FLAG_UPDATE_FREQ
1109                                       | (opt_info
1110                                          ? DLTHE_RECORD_COPY_NUMBER
1111                                            : 0));
1112   gcc_assert (ok);
1113
1114   if (opt_info)
1115     {
1116       apply_opt_in_copies (opt_info, max_unroll, true, true);
1117       free_opt_info (opt_info);
1118     }
1119
1120   free (wont_exit);
1121
1122   if (exit_at_end)
1123     {
1124       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1125       /* Find a new in and out edge; they are in the last copy we have
1126          made.  */
1127
1128       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1129         {
1130           desc->out_edge = EDGE_SUCC (exit_block, 0);
1131           desc->in_edge = EDGE_SUCC (exit_block, 1);
1132         }
1133       else
1134         {
1135           desc->out_edge = EDGE_SUCC (exit_block, 1);
1136           desc->in_edge = EDGE_SUCC (exit_block, 0);
1137         }
1138     }
1139
1140   /* Remove the edges.  */
1141   for (i = 0; i < n_remove_edges; i++)
1142     remove_path (loops, remove_edges[i]);
1143   free (remove_edges);
1144
1145   /* We must be careful when updating the number of iterations due to
1146      preconditioning and the fact that the value must be valid at entry
1147      of the loop.  After passing through the above code, we see that
1148      the correct new number of iterations is this:  */
1149   gcc_assert (!desc->const_iter);
1150   desc->niter_expr =
1151     simplify_gen_binary (UDIV, desc->mode, old_niter,
1152                          GEN_INT (max_unroll + 1));
1153   desc->niter_max /= max_unroll + 1;
1154   if (exit_at_end)
1155     {
1156       desc->niter_expr =
1157         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1158       desc->noloop_assumptions = NULL_RTX;
1159       desc->niter_max--;
1160     }
1161
1162   if (dump_file)
1163     fprintf (dump_file,
1164              ";; Unrolled loop %d times, counting # of iterations "
1165              "in runtime, %i insns\n",
1166              max_unroll, num_loop_insns (loop));
1167
1168   if (dom_bbs)
1169     free (dom_bbs);
1170 }
1171
1172 /* Decide whether to simply peel LOOP and how much.  */
1173 static void
1174 decide_peel_simple (struct loop *loop, int flags)
1175 {
1176   unsigned npeel;
1177   struct niter_desc *desc;
1178
1179   if (!(flags & UAP_PEEL))
1180     {
1181       /* We were not asked to, just return back silently.  */
1182       return;
1183     }
1184
1185   if (dump_file)
1186     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1187
1188   /* npeel = number of iterations to peel.  */
1189   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1190   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1191     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1192
1193   /* Skip big loops.  */
1194   if (!npeel)
1195     {
1196       if (dump_file)
1197         fprintf (dump_file, ";; Not considering loop, is too big\n");
1198       return;
1199     }
1200
1201   /* Check for simple loops.  */
1202   desc = get_simple_loop_desc (loop);
1203
1204   /* Check number of iterations.  */
1205   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1206     {
1207       if (dump_file)
1208         fprintf (dump_file, ";; Loop iterates constant times\n");
1209       return;
1210     }
1211
1212   /* Do not simply peel loops with branches inside -- it increases number
1213      of mispredicts.  */
1214   if (num_loop_branches (loop) > 1)
1215     {
1216       if (dump_file)
1217         fprintf (dump_file, ";; Not peeling, contains branches\n");
1218       return;
1219     }
1220
1221   if (loop->header->count)
1222     {
1223       unsigned niter = expected_loop_iterations (loop);
1224       if (niter + 1 > npeel)
1225         {
1226           if (dump_file)
1227             {
1228               fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1229               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1230                        (HOST_WIDEST_INT) (niter + 1));
1231               fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1232                        npeel);
1233             }
1234           return;
1235         }
1236       npeel = niter + 1;
1237     }
1238   else
1239     {
1240       /* For now we have no good heuristics to decide whether loop peeling
1241          will be effective, so disable it.  */
1242       if (dump_file)
1243         fprintf (dump_file,
1244                  ";; Not peeling loop, no evidence it will be profitable\n");
1245       return;
1246     }
1247
1248   /* Success.  */
1249   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1250   loop->lpt_decision.times = npeel;
1251
1252   if (dump_file)
1253     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1254              loop->lpt_decision.times);
1255 }
1256
1257 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1258    while (cond)
1259      body;
1260
1261    ==>
1262
1263    if (!cond) goto end;
1264    body;
1265    if (!cond) goto end;
1266    body;
1267    while (cond)
1268      body;
1269    end: ;
1270    */
1271 static void
1272 peel_loop_simple (struct loops *loops, struct loop *loop)
1273 {
1274   sbitmap wont_exit;
1275   unsigned npeel = loop->lpt_decision.times;
1276   struct niter_desc *desc = get_simple_loop_desc (loop);
1277   struct opt_info *opt_info = NULL;
1278   bool ok;
1279
1280   if (flag_split_ivs_in_unroller && npeel > 1)
1281     opt_info = analyze_insns_in_loop (loop);
1282
1283   wont_exit = sbitmap_alloc (npeel + 1);
1284   sbitmap_zero (wont_exit);
1285
1286   opt_info_start_duplication (opt_info);
1287
1288   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1289                                       loops, npeel, wont_exit,
1290                                       NULL, NULL,
1291                                       NULL, DLTHE_FLAG_UPDATE_FREQ
1292                                       | (opt_info
1293                                          ? DLTHE_RECORD_COPY_NUMBER
1294                                            : 0));
1295   gcc_assert (ok);
1296
1297   free (wont_exit);
1298
1299   if (opt_info)
1300     {
1301       apply_opt_in_copies (opt_info, npeel, false, false);
1302       free_opt_info (opt_info);
1303     }
1304
1305   if (desc->simple_p)
1306     {
1307       if (desc->const_iter)
1308         {
1309           desc->niter -= npeel;
1310           desc->niter_expr = GEN_INT (desc->niter);
1311           desc->noloop_assumptions = NULL_RTX;
1312         }
1313       else
1314         {
1315           /* We cannot just update niter_expr, as its value might be clobbered
1316              inside loop.  We could handle this by counting the number into
1317              temporary just like we do in runtime unrolling, but it does not
1318              seem worthwhile.  */
1319           free_simple_loop_desc (loop);
1320         }
1321     }
1322   if (dump_file)
1323     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1324 }
1325
1326 /* Decide whether to unroll LOOP stupidly and how much.  */
1327 static void
1328 decide_unroll_stupid (struct loop *loop, int flags)
1329 {
1330   unsigned nunroll, nunroll_by_av, i;
1331   struct niter_desc *desc;
1332
1333   if (!(flags & UAP_UNROLL_ALL))
1334     {
1335       /* We were not asked to, just return back silently.  */
1336       return;
1337     }
1338
1339   if (dump_file)
1340     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1341
1342   /* nunroll = total number of copies of the original loop body in
1343      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1344   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1345   nunroll_by_av
1346     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1347   if (nunroll > nunroll_by_av)
1348     nunroll = nunroll_by_av;
1349   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1350     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1351
1352   /* Skip big loops.  */
1353   if (nunroll <= 1)
1354     {
1355       if (dump_file)
1356         fprintf (dump_file, ";; Not considering loop, is too big\n");
1357       return;
1358     }
1359
1360   /* Check for simple loops.  */
1361   desc = get_simple_loop_desc (loop);
1362
1363   /* Check simpleness.  */
1364   if (desc->simple_p && !desc->assumptions)
1365     {
1366       if (dump_file)
1367         fprintf (dump_file, ";; The loop is simple\n");
1368       return;
1369     }
1370
1371   /* Do not unroll loops with branches inside -- it increases number
1372      of mispredicts.  */
1373   if (num_loop_branches (loop) > 1)
1374     {
1375       if (dump_file)
1376         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1377       return;
1378     }
1379
1380   /* If we have profile feedback, check whether the loop rolls.  */
1381   if (loop->header->count
1382       && expected_loop_iterations (loop) < 2 * nunroll)
1383     {
1384       if (dump_file)
1385         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1386       return;
1387     }
1388
1389   /* Success.  Now force nunroll to be power of 2, as it seems that this
1390      improves results (partially because of better alignments, partially
1391      because of some dark magic).  */
1392   for (i = 1; 2 * i <= nunroll; i *= 2)
1393     continue;
1394
1395   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1396   loop->lpt_decision.times = i - 1;
1397
1398   if (dump_file)
1399     fprintf (dump_file,
1400              ";; Decided to unroll the loop stupidly, %d times.\n",
1401              loop->lpt_decision.times);
1402 }
1403
1404 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1405    while (cond)
1406      body;
1407
1408    ==>
1409
1410    while (cond)
1411      {
1412        body;
1413        if (!cond) break;
1414        body;
1415        if (!cond) break;
1416        body;
1417        if (!cond) break;
1418        body;
1419      }
1420    */
1421 static void
1422 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1423 {
1424   sbitmap wont_exit;
1425   unsigned nunroll = loop->lpt_decision.times;
1426   struct niter_desc *desc = get_simple_loop_desc (loop);
1427   struct opt_info *opt_info = NULL;
1428   bool ok;
1429
1430   if (flag_split_ivs_in_unroller
1431       || flag_variable_expansion_in_unroller)
1432     opt_info = analyze_insns_in_loop (loop);
1433
1434
1435   wont_exit = sbitmap_alloc (nunroll + 1);
1436   sbitmap_zero (wont_exit);
1437   opt_info_start_duplication (opt_info);
1438
1439   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1440                                       loops, nunroll, wont_exit,
1441                                       NULL, NULL, NULL,
1442                                       DLTHE_FLAG_UPDATE_FREQ
1443                                       | (opt_info
1444                                          ? DLTHE_RECORD_COPY_NUMBER
1445                                            : 0));
1446   gcc_assert (ok);
1447
1448   if (opt_info)
1449     {
1450       apply_opt_in_copies (opt_info, nunroll, true, true);
1451       free_opt_info (opt_info);
1452     }
1453
1454   free (wont_exit);
1455
1456   if (desc->simple_p)
1457     {
1458       /* We indeed may get here provided that there are nontrivial assumptions
1459          for a loop to be really simple.  We could update the counts, but the
1460          problem is that we are unable to decide which exit will be taken
1461          (not really true in case the number of iterations is constant,
1462          but noone will do anything with this information, so we do not
1463          worry about it).  */
1464       desc->simple_p = false;
1465     }
1466
1467   if (dump_file)
1468     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1469              nunroll, num_loop_insns (loop));
1470 }
1471
1472 /* A hash function for information about insns to split.  */
1473
1474 static hashval_t
1475 si_info_hash (const void *ivts)
1476 {
1477   return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
1478 }
1479
1480 /* An equality functions for information about insns to split.  */
1481
1482 static int
1483 si_info_eq (const void *ivts1, const void *ivts2)
1484 {
1485   const struct iv_to_split *i1 = ivts1;
1486   const struct iv_to_split *i2 = ivts2;
1487
1488   return i1->insn == i2->insn;
1489 }
1490
1491 /* Return a hash for VES, which is really a "var_to_expand *".  */
1492
1493 static hashval_t
1494 ve_info_hash (const void *ves)
1495 {
1496   return htab_hash_pointer (((struct var_to_expand *) ves)->insn);
1497 }
1498
1499 /* Return true if IVTS1 and IVTS2 (which are really both of type
1500    "var_to_expand *") refer to the same instruction.  */
1501
1502 static int
1503 ve_info_eq (const void *ivts1, const void *ivts2)
1504 {
1505   const struct var_to_expand *i1 = ivts1;
1506   const struct var_to_expand *i2 = ivts2;
1507
1508   return i1->insn == i2->insn;
1509 }
1510
1511 /* Returns true if REG is referenced in one insn in LOOP.  */
1512
1513 bool
1514 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1515 {
1516   basic_block *body, bb;
1517   unsigned i;
1518   int count_ref = 0;
1519   rtx insn;
1520
1521   body = get_loop_body (loop);
1522   for (i = 0; i < loop->num_nodes; i++)
1523     {
1524       bb = body[i];
1525
1526       FOR_BB_INSNS (bb, insn)
1527       {
1528         if (rtx_referenced_p (reg, insn))
1529           count_ref++;
1530       }
1531     }
1532   return (count_ref  == 1);
1533 }
1534
1535 /* Determine whether INSN contains an accumulator
1536    which can be expanded into separate copies,
1537    one for each copy of the LOOP body.
1538
1539    for (i = 0 ; i < n; i++)
1540      sum += a[i];
1541
1542    ==>
1543
1544    sum += a[i]
1545    ....
1546    i = i+1;
1547    sum1 += a[i]
1548    ....
1549    i = i+1
1550    sum2 += a[i];
1551    ....
1552
1553    Return NULL if INSN contains no opportunity for expansion of accumulator.
1554    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1555    information and return a pointer to it.
1556 */
1557
1558 static struct var_to_expand *
1559 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1560 {
1561   rtx set, dest, src, op1;
1562   struct var_to_expand *ves;
1563   enum machine_mode mode1, mode2;
1564
1565   set = single_set (insn);
1566   if (!set)
1567     return NULL;
1568
1569   dest = SET_DEST (set);
1570   src = SET_SRC (set);
1571
1572   if (GET_CODE (src) != PLUS
1573       && GET_CODE (src) != MINUS
1574       && GET_CODE (src) != MULT)
1575     return NULL;
1576
1577   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1578      in MD.  But if there is no optab to generate the insn, we can not
1579      perform the variable expansion.  This can happen if an MD provides
1580      an insn but not a named pattern to generate it, for example to avoid
1581      producing code that needs additional mode switches like for x87/mmx.
1582
1583      So we check have_insn_for which looks for an optab for the operation
1584      in SRC.  If it doesn't exist, we can't perform the expansion even
1585      though INSN is valid.  */
1586   if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
1587     return NULL;
1588
1589   if (!XEXP (src, 0))
1590     return NULL;
1591
1592   op1 = XEXP (src, 0);
1593
1594   if (!REG_P (dest)
1595       && !(GET_CODE (dest) == SUBREG
1596            && REG_P (SUBREG_REG (dest))))
1597     return NULL;
1598
1599   if (!rtx_equal_p (dest, op1))
1600     return NULL;
1601
1602   if (!referenced_in_one_insn_in_loop_p (loop, dest))
1603     return NULL;
1604
1605   if (rtx_referenced_p (dest, XEXP (src, 1)))
1606     return NULL;
1607
1608   mode1 = GET_MODE (dest);
1609   mode2 = GET_MODE (XEXP (src, 1));
1610   if ((FLOAT_MODE_P (mode1)
1611        || FLOAT_MODE_P (mode2))
1612       && !flag_unsafe_math_optimizations)
1613     return NULL;
1614
1615   /* Record the accumulator to expand.  */
1616   ves = XNEW (struct var_to_expand);
1617   ves->insn = insn;
1618   ves->var_expansions = VEC_alloc (rtx, heap, 1);
1619   ves->reg = copy_rtx (dest);
1620   ves->op = GET_CODE (src);
1621   ves->expansion_count = 0;
1622   ves->reuse_expansion = 0;
1623   return ves;
1624 }
1625
1626 /* Determine whether there is an induction variable in INSN that
1627    we would like to split during unrolling.
1628
1629    I.e. replace
1630
1631    i = i + 1;
1632    ...
1633    i = i + 1;
1634    ...
1635    i = i + 1;
1636    ...
1637
1638    type chains by
1639
1640    i0 = i + 1
1641    ...
1642    i = i0 + 1
1643    ...
1644    i = i0 + 2
1645    ...
1646
1647    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1648    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1649    pointer to it.  */
1650
1651 static struct iv_to_split *
1652 analyze_iv_to_split_insn (rtx insn)
1653 {
1654   rtx set, dest;
1655   struct rtx_iv iv;
1656   struct iv_to_split *ivts;
1657   bool ok;
1658
1659   /* For now we just split the basic induction variables.  Later this may be
1660      extended for example by selecting also addresses of memory references.  */
1661   set = single_set (insn);
1662   if (!set)
1663     return NULL;
1664
1665   dest = SET_DEST (set);
1666   if (!REG_P (dest))
1667     return NULL;
1668
1669   if (!biv_p (insn, dest))
1670     return NULL;
1671
1672   ok = iv_analyze_result (insn, dest, &iv);
1673   gcc_assert (ok);
1674
1675   if (iv.step == const0_rtx
1676       || iv.mode != iv.extend_mode)
1677     return NULL;
1678
1679   /* Record the insn to split.  */
1680   ivts = XNEW (struct iv_to_split);
1681   ivts->insn = insn;
1682   ivts->base_var = NULL_RTX;
1683   ivts->step = iv.step;
1684   ivts->n_loc = 1;
1685   ivts->loc[0] = 1;
1686
1687   return ivts;
1688 }
1689
1690 /* Determines which of insns in LOOP can be optimized.
1691    Return a OPT_INFO struct with the relevant hash tables filled
1692    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1693    is undefined for the return value.  */
1694
1695 static struct opt_info *
1696 analyze_insns_in_loop (struct loop *loop)
1697 {
1698   basic_block *body, bb;
1699   unsigned i, num_edges = 0;
1700   struct opt_info *opt_info = XCNEW (struct opt_info);
1701   rtx insn;
1702   struct iv_to_split *ivts = NULL;
1703   struct var_to_expand *ves = NULL;
1704   PTR *slot1;
1705   PTR *slot2;
1706   edge *edges = get_loop_exit_edges (loop, &num_edges);
1707   bool can_apply = false;
1708
1709   iv_analysis_loop_init (loop);
1710
1711   body = get_loop_body (loop);
1712
1713   if (flag_split_ivs_in_unroller)
1714     opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1715                                             si_info_hash, si_info_eq, free);
1716
1717   /* Record the loop exit bb and loop preheader before the unrolling.  */
1718   if (!loop_preheader_edge (loop)->src)
1719     {
1720       loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1721       opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1722     }
1723   else
1724     opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1725
1726   if (num_edges == 1
1727       && !(edges[0]->flags & EDGE_COMPLEX))
1728     {
1729       opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
1730       can_apply = true;
1731     }
1732
1733   if (flag_variable_expansion_in_unroller
1734       && can_apply)
1735     opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1736                                                       ve_info_hash, ve_info_eq, free);
1737
1738   for (i = 0; i < loop->num_nodes; i++)
1739     {
1740       bb = body[i];
1741       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1742         continue;
1743
1744       FOR_BB_INSNS (bb, insn)
1745       {
1746         if (!INSN_P (insn))
1747           continue;
1748
1749         if (opt_info->insns_to_split)
1750           ivts = analyze_iv_to_split_insn (insn);
1751
1752         if (ivts)
1753           {
1754             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1755             *slot1 = ivts;
1756             continue;
1757           }
1758
1759         if (opt_info->insns_with_var_to_expand)
1760           ves = analyze_insn_to_expand_var (loop, insn);
1761
1762         if (ves)
1763           {
1764             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1765             *slot2 = ves;
1766           }
1767       }
1768     }
1769
1770   free (edges);
1771   free (body);
1772   return opt_info;
1773 }
1774
1775 /* Called just before loop duplication.  Records start of duplicated area
1776    to OPT_INFO.  */
1777
1778 static void
1779 opt_info_start_duplication (struct opt_info *opt_info)
1780 {
1781   if (opt_info)
1782     opt_info->first_new_block = last_basic_block;
1783 }
1784
1785 /* Determine the number of iterations between initialization of the base
1786    variable and the current copy (N_COPY).  N_COPIES is the total number
1787    of newly created copies.  UNROLLING is true if we are unrolling
1788    (not peeling) the loop.  */
1789
1790 static unsigned
1791 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1792 {
1793   if (unrolling)
1794     {
1795       /* If we are unrolling, initialization is done in the original loop
1796          body (number 0).  */
1797       return n_copy;
1798     }
1799   else
1800     {
1801       /* If we are peeling, the copy in that the initialization occurs has
1802          number 1.  The original loop (number 0) is the last.  */
1803       if (n_copy)
1804         return n_copy - 1;
1805       else
1806         return n_copies;
1807     }
1808 }
1809
1810 /* Locate in EXPR the expression corresponding to the location recorded
1811    in IVTS, and return a pointer to the RTX for this location.  */
1812
1813 static rtx *
1814 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1815 {
1816   unsigned i;
1817   rtx *ret = &expr;
1818
1819   for (i = 0; i < ivts->n_loc; i++)
1820     ret = &XEXP (*ret, ivts->loc[i]);
1821
1822   return ret;
1823 }
1824
1825 /* Allocate basic variable for the induction variable chain.  Callback for
1826    htab_traverse.  */
1827
1828 static int
1829 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1830 {
1831   struct iv_to_split *ivts = *slot;
1832   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1833
1834   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1835
1836   return 1;
1837 }
1838
1839 /* Insert initialization of basic variable of IVTS before INSN, taking
1840    the initial value from INSN.  */
1841
1842 static void
1843 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1844 {
1845   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1846   rtx seq;
1847
1848   start_sequence ();
1849   expr = force_operand (expr, ivts->base_var);
1850   if (expr != ivts->base_var)
1851     emit_move_insn (ivts->base_var, expr);
1852   seq = get_insns ();
1853   end_sequence ();
1854
1855   emit_insn_before (seq, insn);
1856 }
1857
1858 /* Replace the use of induction variable described in IVTS in INSN
1859    by base variable + DELTA * step.  */
1860
1861 static void
1862 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1863 {
1864   rtx expr, *loc, seq, incr, var;
1865   enum machine_mode mode = GET_MODE (ivts->base_var);
1866   rtx src, dest, set;
1867
1868   /* Construct base + DELTA * step.  */
1869   if (!delta)
1870     expr = ivts->base_var;
1871   else
1872     {
1873       incr = simplify_gen_binary (MULT, mode,
1874                                   ivts->step, gen_int_mode (delta, mode));
1875       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1876                                   ivts->base_var, incr);
1877     }
1878
1879   /* Figure out where to do the replacement.  */
1880   loc = get_ivts_expr (single_set (insn), ivts);
1881
1882   /* If we can make the replacement right away, we're done.  */
1883   if (validate_change (insn, loc, expr, 0))
1884     return;
1885
1886   /* Otherwise, force EXPR into a register and try again.  */
1887   start_sequence ();
1888   var = gen_reg_rtx (mode);
1889   expr = force_operand (expr, var);
1890   if (expr != var)
1891     emit_move_insn (var, expr);
1892   seq = get_insns ();
1893   end_sequence ();
1894   emit_insn_before (seq, insn);
1895
1896   if (validate_change (insn, loc, var, 0))
1897     return;
1898
1899   /* The last chance.  Try recreating the assignment in insn
1900      completely from scratch.  */
1901   set = single_set (insn);
1902   gcc_assert (set);
1903
1904   start_sequence ();
1905   *loc = var;
1906   src = copy_rtx (SET_SRC (set));
1907   dest = copy_rtx (SET_DEST (set));
1908   src = force_operand (src, dest);
1909   if (src != dest)
1910     emit_move_insn (dest, src);
1911   seq = get_insns ();
1912   end_sequence ();
1913
1914   emit_insn_before (seq, insn);
1915   delete_insn (insn);
1916 }
1917
1918
1919 /* Return one expansion of the accumulator recorded in struct VE.  */
1920
1921 static rtx
1922 get_expansion (struct var_to_expand *ve)
1923 {
1924   rtx reg;
1925
1926   if (ve->reuse_expansion == 0)
1927     reg = ve->reg;
1928   else
1929     reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1930
1931   if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1932     ve->reuse_expansion = 0;
1933   else
1934     ve->reuse_expansion++;
1935
1936   return reg;
1937 }
1938
1939
1940 /* Given INSN replace the uses of the accumulator recorded in VE
1941    with a new register.  */
1942
1943 static void
1944 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1945 {
1946   rtx new_reg, set;
1947   bool really_new_expansion = false;
1948
1949   set = single_set (insn);
1950   gcc_assert (set);
1951
1952   /* Generate a new register only if the expansion limit has not been
1953      reached.  Else reuse an already existing expansion.  */
1954   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1955     {
1956       really_new_expansion = true;
1957       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1958     }
1959   else
1960     new_reg = get_expansion (ve);
1961
1962   validate_change (insn, &SET_DEST (set), new_reg, 1);
1963   validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1964
1965   if (apply_change_group ())
1966     if (really_new_expansion)
1967       {
1968         VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
1969         ve->expansion_count++;
1970       }
1971 }
1972
1973 /* Initialize the variable expansions in loop preheader.
1974    Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1975    basic block where the initialization of the expansions
1976    should take place.  */
1977
1978 static int
1979 insert_var_expansion_initialization (void **slot, void *place_p)
1980 {
1981   struct var_to_expand *ve = *slot;
1982   basic_block place = (basic_block)place_p;
1983   rtx seq, var, zero_init, insn;
1984   unsigned i;
1985
1986   if (VEC_length (rtx, ve->var_expansions) == 0)
1987     return 1;
1988
1989   start_sequence ();
1990   if (ve->op == PLUS || ve->op == MINUS)
1991     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1992       {
1993         zero_init =  CONST0_RTX (GET_MODE (var));
1994         emit_move_insn (var, zero_init);
1995       }
1996   else if (ve->op == MULT)
1997     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1998       {
1999         zero_init =  CONST1_RTX (GET_MODE (var));
2000         emit_move_insn (var, zero_init);
2001       }
2002
2003   seq = get_insns ();
2004   end_sequence ();
2005
2006   insn = BB_HEAD (place);
2007   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2008     insn = NEXT_INSN (insn);
2009
2010   emit_insn_after (seq, insn);
2011   /* Continue traversing the hash table.  */
2012   return 1;
2013 }
2014
2015 /*  Combine the variable expansions at the loop exit.
2016     Callbacks for htab_traverse.  PLACE_P is the loop exit
2017     basic block where the summation of the expansions should
2018     take place.  */
2019
2020 static int
2021 combine_var_copies_in_loop_exit (void **slot, void *place_p)
2022 {
2023   struct var_to_expand *ve = *slot;
2024   basic_block place = (basic_block)place_p;
2025   rtx sum = ve->reg;
2026   rtx expr, seq, var, insn;
2027   unsigned i;
2028
2029   if (VEC_length (rtx, ve->var_expansions) == 0)
2030     return 1;
2031
2032   start_sequence ();
2033   if (ve->op == PLUS || ve->op == MINUS)
2034     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2035       {
2036         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2037                                    var, sum);
2038       }
2039   else if (ve->op == MULT)
2040     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2041       {
2042         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2043                                    var, sum);
2044       }
2045
2046   expr = force_operand (sum, ve->reg);
2047   if (expr != ve->reg)
2048     emit_move_insn (ve->reg, expr);
2049   seq = get_insns ();
2050   end_sequence ();
2051
2052   insn = BB_HEAD (place);
2053   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2054     insn = NEXT_INSN (insn);
2055
2056   emit_insn_after (seq, insn);
2057
2058   /* Continue traversing the hash table.  */
2059   return 1;
2060 }
2061
2062 /* Apply loop optimizations in loop copies using the
2063    data which gathered during the unrolling.  Structure
2064    OPT_INFO record that data.
2065
2066    UNROLLING is true if we unrolled (not peeled) the loop.
2067    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2068    the loop (as it should happen in complete unrolling, but not in ordinary
2069    peeling of the loop).  */
2070
2071 static void
2072 apply_opt_in_copies (struct opt_info *opt_info,
2073                      unsigned n_copies, bool unrolling,
2074                      bool rewrite_original_loop)
2075 {
2076   unsigned i, delta;
2077   basic_block bb, orig_bb;
2078   rtx insn, orig_insn, next;
2079   struct iv_to_split ivts_templ, *ivts;
2080   struct var_to_expand ve_templ, *ves;
2081
2082   /* Sanity check -- we need to put initialization in the original loop
2083      body.  */
2084   gcc_assert (!unrolling || rewrite_original_loop);
2085
2086   /* Allocate the basic variables (i0).  */
2087   if (opt_info->insns_to_split)
2088     htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2089
2090   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2091     {
2092       bb = BASIC_BLOCK (i);
2093       orig_bb = get_bb_original (bb);
2094
2095       /* bb->aux holds position in copy sequence initialized by
2096          duplicate_loop_to_header_edge.  */
2097       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2098                                         unrolling);
2099       bb->aux = 0;
2100       orig_insn = BB_HEAD (orig_bb);
2101       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2102         {
2103           next = NEXT_INSN (insn);
2104           if (!INSN_P (insn))
2105             continue;
2106
2107           while (!INSN_P (orig_insn))
2108             orig_insn = NEXT_INSN (orig_insn);
2109
2110           ivts_templ.insn = orig_insn;
2111           ve_templ.insn = orig_insn;
2112
2113           /* Apply splitting iv optimization.  */
2114           if (opt_info->insns_to_split)
2115             {
2116               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2117
2118               if (ivts)
2119                 {
2120                   gcc_assert (GET_CODE (PATTERN (insn))
2121                               == GET_CODE (PATTERN (orig_insn)));
2122
2123                   if (!delta)
2124                     insert_base_initialization (ivts, insn);
2125                   split_iv (ivts, insn, delta);
2126                 }
2127             }
2128           /* Apply variable expansion optimization.  */
2129           if (unrolling && opt_info->insns_with_var_to_expand)
2130             {
2131               ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2132               if (ves)
2133                 {
2134                   gcc_assert (GET_CODE (PATTERN (insn))
2135                               == GET_CODE (PATTERN (orig_insn)));
2136                   expand_var_during_unrolling (ves, insn);
2137                 }
2138             }
2139           orig_insn = NEXT_INSN (orig_insn);
2140         }
2141     }
2142
2143   if (!rewrite_original_loop)
2144     return;
2145
2146   /* Initialize the variable expansions in the loop preheader
2147      and take care of combining them at the loop exit.  */
2148   if (opt_info->insns_with_var_to_expand)
2149     {
2150       htab_traverse (opt_info->insns_with_var_to_expand,
2151                      insert_var_expansion_initialization,
2152                      opt_info->loop_preheader);
2153       htab_traverse (opt_info->insns_with_var_to_expand,
2154                      combine_var_copies_in_loop_exit,
2155                      opt_info->loop_exit);
2156     }
2157
2158   /* Rewrite also the original loop body.  Find them as originals of the blocks
2159      in the last copied iteration, i.e. those that have
2160      get_bb_copy (get_bb_original (bb)) == bb.  */
2161   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2162     {
2163       bb = BASIC_BLOCK (i);
2164       orig_bb = get_bb_original (bb);
2165       if (get_bb_copy (orig_bb) != bb)
2166         continue;
2167
2168       delta = determine_split_iv_delta (0, n_copies, unrolling);
2169       for (orig_insn = BB_HEAD (orig_bb);
2170            orig_insn != NEXT_INSN (BB_END (bb));
2171            orig_insn = next)
2172         {
2173           next = NEXT_INSN (orig_insn);
2174
2175           if (!INSN_P (orig_insn))
2176             continue;
2177
2178           ivts_templ.insn = orig_insn;
2179           if (opt_info->insns_to_split)
2180             {
2181               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2182               if (ivts)
2183                 {
2184                   if (!delta)
2185                     insert_base_initialization (ivts, orig_insn);
2186                   split_iv (ivts, orig_insn, delta);
2187                   continue;
2188                 }
2189             }
2190
2191         }
2192     }
2193 }
2194
2195 /*  Release the data structures used for the variable expansion
2196     optimization.  Callbacks for htab_traverse.  */
2197
2198 static int
2199 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2200 {
2201   struct var_to_expand *ve = *slot;
2202
2203   VEC_free (rtx, heap, ve->var_expansions);
2204
2205   /* Continue traversing the hash table.  */
2206   return 1;
2207 }
2208
2209 /* Release OPT_INFO.  */
2210
2211 static void
2212 free_opt_info (struct opt_info *opt_info)
2213 {
2214   if (opt_info->insns_to_split)
2215     htab_delete (opt_info->insns_to_split);
2216   if (opt_info->insns_with_var_to_expand)
2217     {
2218       htab_traverse (opt_info->insns_with_var_to_expand,
2219                      release_var_copies, NULL);
2220       htab_delete (opt_info->insns_with_var_to_expand);
2221     }
2222   free (opt_info);
2223 }