gcc/loop-unroll.c

   1 /* Loop unrolling and peeling.
   2    Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 2, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "basic-block.h"
  28 #include "cfgloop.h"
  29 #include "cfglayout.h"
  30 #include "params.h"
  31 #include "output.h"
  32 #include "expr.h"
  33 #include "hashtab.h"
  34 #include "recog.h"
  35
  36 /* This pass performs loop unrolling and peeling.  We only perform these
  37    optimizations on innermost loops (with single exception) because
  38    the impact on performance is greatest here, and we want to avoid
  39    unnecessary code size growth.  The gain is caused by greater sequentiality
  40    of code, better code to optimize for further passes and in some cases
  41    by fewer testings of exit conditions.  The main problem is code growth,
  42    that impacts performance negatively due to effect of caches.
  43
  44    What we do:
  45
  46    -- complete peeling of once-rolling loops; this is the above mentioned
  47       exception, as this causes loop to be cancelled completely and
  48       does not cause code growth
  49    -- complete peeling of loops that roll (small) constant times.
  50    -- simple peeling of first iterations of loops that do not roll much
  51       (according to profile feedback)
  52    -- unrolling of loops that roll constant times; this is almost always
  53       win, as we get rid of exit condition tests.
  54    -- unrolling of loops that roll number of times that we can compute
  55       in runtime; we also get rid of exit condition tests here, but there
  56       is the extra expense for calculating the number of iterations
  57    -- simple unrolling of remaining loops; this is performed only if we
  58       are asked to, as the gain is questionable in this case and often
  59       it may even slow down the code
  60    For more detailed descriptions of each of those, see comments at
  61    appropriate function below.
  62
  63    There is a lot of parameters (defined and described in params.def) that
  64    control how much we unroll/peel.
  65
  66    ??? A great problem is that we don't have a good way how to determine
  67    how many times we should unroll the loop; the experiments I have made
  68    showed that this choice may affect performance in order of several %.
  69    */
  70
  71 /* Information about induction variables to split.  */
  72
  73 struct iv_to_split
  74 {
  75   rtx insn;             /* The insn in that the induction variable occurs.  */
  76   rtx base_var;         /* The variable on that the values in the further
  77                            iterations are based.  */
  78   rtx step;             /* Step of the induction variable.  */
  79   unsigned n_loc;
  80   unsigned loc[3];      /* Location where the definition of the induction
  81                            variable occurs in the insn.  For example if
  82                            N_LOC is 2, the expression is located at
  83                            XEXP (XEXP (single_set, loc[0]), loc[1]).  */
  84 };
  85
  86 struct split_ivs_info
  87 {
  88   htab_t insns_to_split;        /* A hashtable of insns to split.  */
  89   unsigned first_new_block;     /* The first basic block that was
  90                                    duplicated.  */
  91 };
  92
  93 static void decide_unrolling_and_peeling (struct loops *, int);
  94 static void peel_loops_completely (struct loops *, int);
  95 static void decide_peel_simple (struct loop *, int);
  96 static void decide_peel_once_rolling (struct loop *, int);
  97 static void decide_peel_completely (struct loop *, int);
  98 static void decide_unroll_stupid (struct loop *, int);
  99 static void decide_unroll_constant_iterations (struct loop *, int);
 100 static void decide_unroll_runtime_iterations (struct loop *, int);
 101 static void peel_loop_simple (struct loops *, struct loop *);
 102 static void peel_loop_completely (struct loops *, struct loop *);
 103 static void unroll_loop_stupid (struct loops *, struct loop *);
 104 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
 105 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
 106 static struct split_ivs_info *analyze_ivs_to_split (struct loop *);
 107 static void si_info_start_duplication (struct split_ivs_info *);
 108 static void split_ivs_in_copies (struct split_ivs_info *, unsigned, bool, bool);
 109 static void free_si_info (struct split_ivs_info *);
 110
 111 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 112 void
 113 unroll_and_peel_loops (struct loops *loops, int flags)
 114 {
 115   struct loop *loop, *next;
 116   bool check;
 117
 118   /* First perform complete loop peeling (it is almost surely a win,
 119      and affects parameters for further decision a lot).  */
 120   peel_loops_completely (loops, flags);
 121
 122   /* Now decide rest of unrolling and peeling.  */
 123   decide_unrolling_and_peeling (loops, flags);
 124
 125   loop = loops->tree_root;
 126   while (loop->inner)
 127     loop = loop->inner;
 128
 129   /* Scan the loops, inner ones first.  */
 130   while (loop != loops->tree_root)
 131     {
 132       if (loop->next)
 133         {
 134           next = loop->next;
 135           while (next->inner)
 136             next = next->inner;
 137         }
 138       else
 139         next = loop->outer;
 140
 141       check = true;
 142       /* And perform the appropriate transformations.  */
 143       switch (loop->lpt_decision.decision)
 144         {
 145         case LPT_PEEL_COMPLETELY:
 146           /* Already done.  */
 147           gcc_unreachable ();
 148         case LPT_PEEL_SIMPLE:
 149           peel_loop_simple (loops, loop);
 150           break;
 151         case LPT_UNROLL_CONSTANT:
 152           unroll_loop_constant_iterations (loops, loop);
 153           break;
 154         case LPT_UNROLL_RUNTIME:
 155           unroll_loop_runtime_iterations (loops, loop);
 156           break;
 157         case LPT_UNROLL_STUPID:
 158           unroll_loop_stupid (loops, loop);
 159           break;
 160         case LPT_NONE:
 161           check = false;
 162           break;
 163         default:
 164           gcc_unreachable ();
 165         }
 166       if (check)
 167         {
 168 #ifdef ENABLE_CHECKING
 169           verify_dominators (CDI_DOMINATORS);
 170           verify_loop_structure (loops);
 171 #endif
 172         }
 173       loop = next;
 174     }
 175
 176   iv_analysis_done ();
 177 }
 178
 179 /* Check whether exit of the LOOP is at the end of loop body.  */
 180
 181 static bool
 182 loop_exit_at_end_p (struct loop *loop)
 183 {
 184   struct niter_desc *desc = get_simple_loop_desc (loop);
 185   rtx insn;
 186
 187   if (desc->in_edge->dest != loop->latch)
 188     return false;
 189
 190   /* Check that the latch is empty.  */
 191   FOR_BB_INSNS (loop->latch, insn)
 192     {
 193       if (INSN_P (insn))
 194         return false;
 195     }
 196
 197   return true;
 198 }
 199
 200 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
 201 static void
 202 peel_loops_completely (struct loops *loops, int flags)
 203 {
 204   struct loop *loop, *next;
 205
 206   loop = loops->tree_root;
 207   while (loop->inner)
 208     loop = loop->inner;
 209
 210   while (loop != loops->tree_root)
 211     {
 212       if (loop->next)
 213         {
 214           next = loop->next;
 215           while (next->inner)
 216             next = next->inner;
 217         }
 218       else
 219         next = loop->outer;
 220
 221       loop->lpt_decision.decision = LPT_NONE;
 222
 223       if (dump_file)
 224         fprintf (dump_file,
 225                  "\n;; *** Considering loop %d for complete peeling ***\n",
 226                  loop->num);
 227
 228       loop->ninsns = num_loop_insns (loop);
 229
 230       decide_peel_once_rolling (loop, flags);
 231       if (loop->lpt_decision.decision == LPT_NONE)
 232         decide_peel_completely (loop, flags);
 233
 234       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
 235         {
 236           peel_loop_completely (loops, loop);
 237 #ifdef ENABLE_CHECKING
 238           verify_dominators (CDI_DOMINATORS);
 239           verify_loop_structure (loops);
 240 #endif
 241         }
 242       loop = next;
 243     }
 244 }
 245
 246 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
 247 static void
 248 decide_unrolling_and_peeling (struct loops *loops, int flags)
 249 {
 250   struct loop *loop = loops->tree_root, *next;
 251
 252   while (loop->inner)
 253     loop = loop->inner;
 254
 255   /* Scan the loops, inner ones first.  */
 256   while (loop != loops->tree_root)
 257     {
 258       if (loop->next)
 259         {
 260           next = loop->next;
 261           while (next->inner)
 262             next = next->inner;
 263         }
 264       else
 265         next = loop->outer;
 266
 267       loop->lpt_decision.decision = LPT_NONE;
 268
 269       if (dump_file)
 270         fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
 271
 272       /* Do not peel cold areas.  */
 273       if (!maybe_hot_bb_p (loop->header))
 274         {
 275           if (dump_file)
 276             fprintf (dump_file, ";; Not considering loop, cold area\n");
 277           loop = next;
 278           continue;
 279         }
 280
 281       /* Can the loop be manipulated?  */
 282       if (!can_duplicate_loop_p (loop))
 283         {
 284           if (dump_file)
 285             fprintf (dump_file,
 286                      ";; Not considering loop, cannot duplicate\n");
 287           loop = next;
 288           continue;
 289         }
 290
 291       /* Skip non-innermost loops.  */
 292       if (loop->inner)
 293         {
 294           if (dump_file)
 295             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 296           loop = next;
 297           continue;
 298         }
 299
 300       loop->ninsns = num_loop_insns (loop);
 301       loop->av_ninsns = average_num_loop_insns (loop);
 302
 303       /* Try transformations one by one in decreasing order of
 304          priority.  */
 305
 306       decide_unroll_constant_iterations (loop, flags);
 307       if (loop->lpt_decision.decision == LPT_NONE)
 308         decide_unroll_runtime_iterations (loop, flags);
 309       if (loop->lpt_decision.decision == LPT_NONE)
 310         decide_unroll_stupid (loop, flags);
 311       if (loop->lpt_decision.decision == LPT_NONE)
 312         decide_peel_simple (loop, flags);
 313
 314       loop = next;
 315     }
 316 }
 317
 318 /* Decide whether the LOOP is once rolling and suitable for complete
 319    peeling.  */
 320 static void
 321 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 322 {
 323   struct niter_desc *desc;
 324
 325   if (dump_file)
 326     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 327
 328   /* Is the loop small enough?  */
 329   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
 330     {
 331       if (dump_file)
 332         fprintf (dump_file, ";; Not considering loop, is too big\n");
 333       return;
 334     }
 335
 336   /* Check for simple loops.  */
 337   desc = get_simple_loop_desc (loop);
 338
 339   /* Check number of iterations.  */
 340   if (!desc->simple_p
 341       || desc->assumptions
 342       || !desc->const_iter
 343       || desc->niter != 0)
 344     {
 345       if (dump_file)
 346         fprintf (dump_file,
 347                  ";; Unable to prove that the loop rolls exactly once\n");
 348       return;
 349     }
 350
 351   /* Success.  */
 352   if (dump_file)
 353     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
 354   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 355 }
 356
 357 /* Decide whether the LOOP is suitable for complete peeling.  */
 358 static void
 359 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 360 {
 361   unsigned npeel;
 362   struct niter_desc *desc;
 363
 364   if (dump_file)
 365     fprintf (dump_file, "\n;; Considering peeling completely\n");
 366
 367   /* Skip non-innermost loops.  */
 368   if (loop->inner)
 369     {
 370       if (dump_file)
 371         fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 372       return;
 373     }
 374
 375   /* Do not peel cold areas.  */
 376   if (!maybe_hot_bb_p (loop->header))
 377     {
 378       if (dump_file)
 379         fprintf (dump_file, ";; Not considering loop, cold area\n");
 380       return;
 381     }
 382
 383   /* Can the loop be manipulated?  */
 384   if (!can_duplicate_loop_p (loop))
 385     {
 386       if (dump_file)
 387         fprintf (dump_file,
 388                  ";; Not considering loop, cannot duplicate\n");
 389       return;
 390     }
 391
 392   /* npeel = number of iterations to peel.  */
 393   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
 394   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 395     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 396
 397   /* Is the loop small enough?  */
 398   if (!npeel)
 399     {
 400       if (dump_file)
 401         fprintf (dump_file, ";; Not considering loop, is too big\n");
 402       return;
 403     }
 404
 405   /* Check for simple loops.  */
 406   desc = get_simple_loop_desc (loop);
 407
 408   /* Check number of iterations.  */
 409   if (!desc->simple_p
 410       || desc->assumptions
 411       || !desc->const_iter)
 412     {
 413       if (dump_file)
 414         fprintf (dump_file,
 415                  ";; Unable to prove that the loop iterates constant times\n");
 416       return;
 417     }
 418
 419   if (desc->niter > npeel - 1)
 420     {
 421       if (dump_file)
 422         {
 423           fprintf (dump_file,
 424                    ";; Not peeling loop completely, rolls too much (");
 425           fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
 426           fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
 427         }
 428       return;
 429     }
 430
 431   /* Success.  */
 432   if (dump_file)
 433     fprintf (dump_file, ";; Decided to peel loop completely\n");
 434   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 435 }
 436
 437 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
 438    completely.  The transformation done:
 439
 440    for (i = 0; i < 4; i++)
 441      body;
 442
 443    ==>
 444
 445    i = 0;
 446    body; i++;
 447    body; i++;
 448    body; i++;
 449    body; i++;
 450    */
 451 static void
 452 peel_loop_completely (struct loops *loops, struct loop *loop)
 453 {
 454   sbitmap wont_exit;
 455   unsigned HOST_WIDE_INT npeel;
 456   unsigned n_remove_edges, i;
 457   edge *remove_edges, ein;
 458   struct niter_desc *desc = get_simple_loop_desc (loop);
 459   struct split_ivs_info *si_info = NULL;
 460
 461   npeel = desc->niter;
 462
 463   if (npeel)
 464     {
 465       wont_exit = sbitmap_alloc (npeel + 1);
 466       sbitmap_ones (wont_exit);
 467       RESET_BIT (wont_exit, 0);
 468       if (desc->noloop_assumptions)
 469         RESET_BIT (wont_exit, 1);
 470
 471       remove_edges = xcalloc (npeel, sizeof (edge));
 472       n_remove_edges = 0;
 473
 474       if (flag_split_ivs_in_unroller)
 475         si_info = analyze_ivs_to_split (loop);
 476
 477       si_info_start_duplication (si_info);
 478       if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 479                 loops, npeel,
 480                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
 481                 DLTHE_FLAG_UPDATE_FREQ))
 482         abort ();
 483
 484       free (wont_exit);
 485
 486       if (si_info)
 487         {
 488           split_ivs_in_copies (si_info, npeel, false, true);
 489           free_si_info (si_info);
 490         }
 491
 492       /* Remove the exit edges.  */
 493       for (i = 0; i < n_remove_edges; i++)
 494         remove_path (loops, remove_edges[i]);
 495       free (remove_edges);
 496     }
 497
 498   ein = desc->in_edge;
 499   free_simple_loop_desc (loop);
 500
 501   /* Now remove the unreachable part of the last iteration and cancel
 502      the loop.  */
 503   remove_path (loops, ein);
 504
 505   if (dump_file)
 506     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
 507 }
 508
 509 /* Decide whether to unroll LOOP iterating constant number of times
 510    and how much.  */
 511
 512 static void
 513 decide_unroll_constant_iterations (struct loop *loop, int flags)
 514 {
 515   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 516   struct niter_desc *desc;
 517
 518   if (!(flags & UAP_UNROLL))
 519     {
 520       /* We were not asked to, just return back silently.  */
 521       return;
 522     }
 523
 524   if (dump_file)
 525     fprintf (dump_file,
 526              "\n;; Considering unrolling loop with constant "
 527              "number of iterations\n");
 528
 529   /* nunroll = total number of copies of the original loop body in
 530      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 531   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 532   nunroll_by_av
 533     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 534   if (nunroll > nunroll_by_av)
 535     nunroll = nunroll_by_av;
 536   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 537     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 538
 539   /* Skip big loops.  */
 540   if (nunroll <= 1)
 541     {
 542       if (dump_file)
 543         fprintf (dump_file, ";; Not considering loop, is too big\n");
 544       return;
 545     }
 546
 547   /* Check for simple loops.  */
 548   desc = get_simple_loop_desc (loop);
 549
 550   /* Check number of iterations.  */
 551   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 552     {
 553       if (dump_file)
 554         fprintf (dump_file,
 555                  ";; Unable to prove that the loop iterates constant times\n");
 556       return;
 557     }
 558
 559   /* Check whether the loop rolls enough to consider.  */
 560   if (desc->niter < 2 * nunroll)
 561     {
 562       if (dump_file)
 563         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 564       return;
 565     }
 566
 567   /* Success; now compute number of iterations to unroll.  We alter
 568      nunroll so that as few as possible copies of loop body are
 569      necessary, while still not decreasing the number of unrollings
 570      too much (at most by 1).  */
 571   best_copies = 2 * nunroll + 10;
 572
 573   i = 2 * nunroll + 2;
 574   if (i - 1 >= desc->niter)
 575     i = desc->niter - 2;
 576
 577   for (; i >= nunroll - 1; i--)
 578     {
 579       unsigned exit_mod = desc->niter % (i + 1);
 580
 581       if (!loop_exit_at_end_p (loop))
 582         n_copies = exit_mod + i + 1;
 583       else if (exit_mod != (unsigned) i
 584                || desc->noloop_assumptions != NULL_RTX)
 585         n_copies = exit_mod + i + 2;
 586       else
 587         n_copies = i + 1;
 588
 589       if (n_copies < best_copies)
 590         {
 591           best_copies = n_copies;
 592           best_unroll = i;
 593         }
 594     }
 595
 596   if (dump_file)
 597     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
 598              best_unroll + 1, best_copies, nunroll);
 599
 600   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 601   loop->lpt_decision.times = best_unroll;
 602
 603   if (dump_file)
 604     fprintf (dump_file,
 605              ";; Decided to unroll the constant times rolling loop, %d times.\n",
 606              loop->lpt_decision.times);
 607 }
 608
 609 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
 610    times.  The transformation does this:
 611
 612    for (i = 0; i < 102; i++)
 613      body;
 614
 615    ==>
 616
 617    i = 0;
 618    body; i++;
 619    body; i++;
 620    while (i < 102)
 621      {
 622        body; i++;
 623        body; i++;
 624        body; i++;
 625        body; i++;
 626      }
 627   */
 628 static void
 629 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
 630 {
 631   unsigned HOST_WIDE_INT niter;
 632   unsigned exit_mod;
 633   sbitmap wont_exit;
 634   unsigned n_remove_edges, i;
 635   edge *remove_edges;
 636   unsigned max_unroll = loop->lpt_decision.times;
 637   struct niter_desc *desc = get_simple_loop_desc (loop);
 638   bool exit_at_end = loop_exit_at_end_p (loop);
 639   struct split_ivs_info *si_info = NULL;
 640
 641   niter = desc->niter;
 642
 643   /* Should not get here (such loop should be peeled instead).  */
 644   gcc_assert (niter > max_unroll + 1);
 645
 646   exit_mod = niter % (max_unroll + 1);
 647
 648   wont_exit = sbitmap_alloc (max_unroll + 1);
 649   sbitmap_ones (wont_exit);
 650
 651   remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
 652   n_remove_edges = 0;
 653
 654   if (flag_split_ivs_in_unroller)
 655     si_info = analyze_ivs_to_split (loop);
 656
 657   if (!exit_at_end)
 658     {
 659       /* The exit is not at the end of the loop; leave exit test
 660          in the first copy, so that the loops that start with test
 661          of exit condition have continuous body after unrolling.  */
 662
 663       if (dump_file)
 664         fprintf (dump_file, ";; Condition on beginning of loop.\n");
 665
 666       /* Peel exit_mod iterations.  */
 667       RESET_BIT (wont_exit, 0);
 668       if (desc->noloop_assumptions)
 669         RESET_BIT (wont_exit, 1);
 670
 671       if (exit_mod)
 672         {
 673           si_info_start_duplication (si_info);
 674           if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 675                                               loops, exit_mod,
 676                                               wont_exit, desc->out_edge,
 677                                               remove_edges, &n_remove_edges,
 678                                               DLTHE_FLAG_UPDATE_FREQ))
 679             abort ();
 680
 681           if (si_info && exit_mod > 1)
 682             split_ivs_in_copies (si_info, exit_mod, false, false);
 683
 684           desc->noloop_assumptions = NULL_RTX;
 685           desc->niter -= exit_mod;
 686           desc->niter_max -= exit_mod;
 687         }
 688
 689       SET_BIT (wont_exit, 1);
 690     }
 691   else
 692     {
 693       /* Leave exit test in last copy, for the same reason as above if
 694          the loop tests the condition at the end of loop body.  */
 695
 696       if (dump_file)
 697         fprintf (dump_file, ";; Condition on end of loop.\n");
 698
 699       /* We know that niter >= max_unroll + 2; so we do not need to care of
 700          case when we would exit before reaching the loop.  So just peel
 701          exit_mod + 1 iterations.  */
 702       if (exit_mod != max_unroll
 703           || desc->noloop_assumptions)
 704         {
 705           RESET_BIT (wont_exit, 0);
 706           if (desc->noloop_assumptions)
 707             RESET_BIT (wont_exit, 1);
 708
 709           si_info_start_duplication (si_info);
 710           if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 711                 loops, exit_mod + 1,
 712                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
 713                 DLTHE_FLAG_UPDATE_FREQ))
 714             abort ();
 715
 716           if (si_info && exit_mod > 0)
 717             split_ivs_in_copies (si_info, exit_mod + 1, false, false);
 718
 719           desc->niter -= exit_mod + 1;
 720           desc->niter_max -= exit_mod + 1;
 721           desc->noloop_assumptions = NULL_RTX;
 722
 723           SET_BIT (wont_exit, 0);
 724           SET_BIT (wont_exit, 1);
 725         }
 726
 727       RESET_BIT (wont_exit, max_unroll);
 728     }
 729
 730   /* Now unroll the loop.  */
 731   si_info_start_duplication (si_info);
 732   if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 733                 loops, max_unroll,
 734                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
 735                 DLTHE_FLAG_UPDATE_FREQ))
 736     abort ();
 737
 738   if (si_info)
 739     {
 740       split_ivs_in_copies (si_info, max_unroll, true, true);
 741       free_si_info (si_info);
 742     }
 743
 744   free (wont_exit);
 745
 746   if (exit_at_end)
 747     {
 748       basic_block exit_block = desc->in_edge->src->rbi->copy;
 749       /* Find a new in and out edge; they are in the last copy we have made.  */
 750
 751       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 752         {
 753           desc->out_edge = EDGE_SUCC (exit_block, 0);
 754           desc->in_edge = EDGE_SUCC (exit_block, 1);
 755         }
 756       else
 757         {
 758           desc->out_edge = EDGE_SUCC (exit_block, 1);
 759           desc->in_edge = EDGE_SUCC (exit_block, 0);
 760         }
 761     }
 762
 763   desc->niter /= max_unroll + 1;
 764   desc->niter_max /= max_unroll + 1;
 765   desc->niter_expr = GEN_INT (desc->niter);
 766
 767   /* Remove the edges.  */
 768   for (i = 0; i < n_remove_edges; i++)
 769     remove_path (loops, remove_edges[i]);
 770   free (remove_edges);
 771
 772   if (dump_file)
 773     fprintf (dump_file,
 774              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 775              max_unroll, num_loop_insns (loop));
 776 }
 777
 778 /* Decide whether to unroll LOOP iterating runtime computable number of times
 779    and how much.  */
 780 static void
 781 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 782 {
 783   unsigned nunroll, nunroll_by_av, i;
 784   struct niter_desc *desc;
 785
 786   if (!(flags & UAP_UNROLL))
 787     {
 788       /* We were not asked to, just return back silently.  */
 789       return;
 790     }
 791
 792   if (dump_file)
 793     fprintf (dump_file,
 794              "\n;; Considering unrolling loop with runtime "
 795              "computable number of iterations\n");
 796
 797   /* nunroll = total number of copies of the original loop body in
 798      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 799   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 800   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 801   if (nunroll > nunroll_by_av)
 802     nunroll = nunroll_by_av;
 803   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 804     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 805
 806   /* Skip big loops.  */
 807   if (nunroll <= 1)
 808     {
 809       if (dump_file)
 810         fprintf (dump_file, ";; Not considering loop, is too big\n");
 811       return;
 812     }
 813
 814   /* Check for simple loops.  */
 815   desc = get_simple_loop_desc (loop);
 816
 817   /* Check simpleness.  */
 818   if (!desc->simple_p || desc->assumptions)
 819     {
 820       if (dump_file)
 821         fprintf (dump_file,
 822                  ";; Unable to prove that the number of iterations "
 823                  "can be counted in runtime\n");
 824       return;
 825     }
 826
 827   if (desc->const_iter)
 828     {
 829       if (dump_file)
 830         fprintf (dump_file, ";; Loop iterates constant times\n");
 831       return;
 832     }
 833
 834   /* If we have profile feedback, check whether the loop rolls.  */
 835   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
 836     {
 837       if (dump_file)
 838         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 839       return;
 840     }
 841
 842   /* Success; now force nunroll to be power of 2, as we are unable to
 843      cope with overflows in computation of number of iterations.  */
 844   for (i = 1; 2 * i <= nunroll; i *= 2)
 845     continue;
 846
 847   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 848   loop->lpt_decision.times = i - 1;
 849
 850   if (dump_file)
 851     fprintf (dump_file,
 852              ";; Decided to unroll the runtime computable "
 853              "times rolling loop, %d times.\n",
 854              loop->lpt_decision.times);
 855 }
 856
 857 /* Unroll LOOP for that we are able to count number of iterations in runtime
 858    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
 859    extra care for case n < 0):
 860
 861    for (i = 0; i < n; i++)
 862      body;
 863
 864    ==>
 865
 866    i = 0;
 867    mod = n % 4;
 868
 869    switch (mod)
 870      {
 871        case 3:
 872          body; i++;
 873        case 2:
 874          body; i++;
 875        case 1:
 876          body; i++;
 877        case 0: ;
 878      }
 879
 880    while (i < n)
 881      {
 882        body; i++;
 883        body; i++;
 884        body; i++;
 885        body; i++;
 886      }
 887    */
 888 static void
 889 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
 890 {
 891   rtx old_niter, niter, init_code, branch_code, tmp;
 892   unsigned i, j, p;
 893   basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
 894   unsigned n_dom_bbs;
 895   sbitmap wont_exit;
 896   int may_exit_copy;
 897   unsigned n_peel, n_remove_edges;
 898   edge *remove_edges, e;
 899   bool extra_zero_check, last_may_exit;
 900   unsigned max_unroll = loop->lpt_decision.times;
 901   struct niter_desc *desc = get_simple_loop_desc (loop);
 902   bool exit_at_end = loop_exit_at_end_p (loop);
 903   struct split_ivs_info *si_info = NULL;
 904
 905   if (flag_split_ivs_in_unroller)
 906     si_info = analyze_ivs_to_split (loop);
 907
 908   /* Remember blocks whose dominators will have to be updated.  */
 909   dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
 910   n_dom_bbs = 0;
 911
 912   body = get_loop_body (loop);
 913   for (i = 0; i < loop->num_nodes; i++)
 914     {
 915       unsigned nldom;
 916       basic_block *ldom;
 917
 918       nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
 919       for (j = 0; j < nldom; j++)
 920         if (!flow_bb_inside_loop_p (loop, ldom[j]))
 921           dom_bbs[n_dom_bbs++] = ldom[j];
 922
 923       free (ldom);
 924     }
 925   free (body);
 926
 927   if (!exit_at_end)
 928     {
 929       /* Leave exit in first copy (for explanation why see comment in
 930          unroll_loop_constant_iterations).  */
 931       may_exit_copy = 0;
 932       n_peel = max_unroll - 1;
 933       extra_zero_check = true;
 934       last_may_exit = false;
 935     }
 936   else
 937     {
 938       /* Leave exit in last copy (for explanation why see comment in
 939          unroll_loop_constant_iterations).  */
 940       may_exit_copy = max_unroll;
 941       n_peel = max_unroll;
 942       extra_zero_check = false;
 943       last_may_exit = true;
 944     }
 945
 946   /* Get expression for number of iterations.  */
 947   start_sequence ();
 948   old_niter = niter = gen_reg_rtx (desc->mode);
 949   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 950   if (tmp != niter)
 951     emit_move_insn (niter, tmp);
 952
 953   /* Count modulo by ANDing it with max_unroll; we use the fact that
 954      the number of unrollings is a power of two, and thus this is correct
 955      even if there is overflow in the computation.  */
 956   niter = expand_simple_binop (desc->mode, AND,
 957                                niter,
 958                                GEN_INT (max_unroll),
 959                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 960
 961   init_code = get_insns ();
 962   end_sequence ();
 963
 964   /* Precondition the loop.  */
 965   loop_split_edge_with (loop_preheader_edge (loop), init_code);
 966
 967   remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
 968   n_remove_edges = 0;
 969
 970   wont_exit = sbitmap_alloc (max_unroll + 2);
 971
 972   /* Peel the first copy of loop body (almost always we must leave exit test
 973      here; the only exception is when we have extra zero check and the number
 974      of iterations is reliable.  Also record the place of (possible) extra
 975      zero check.  */
 976   sbitmap_zero (wont_exit);
 977   if (extra_zero_check
 978       && !desc->noloop_assumptions)
 979     SET_BIT (wont_exit, 1);
 980   ezc_swtch = loop_preheader_edge (loop)->src;
 981   if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 982                 loops, 1,
 983                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
 984                 DLTHE_FLAG_UPDATE_FREQ))
 985     abort ();
 986
 987   /* Record the place where switch will be built for preconditioning.  */
 988   swtch = loop_split_edge_with (loop_preheader_edge (loop),
 989                                 NULL_RTX);
 990
 991   for (i = 0; i < n_peel; i++)
 992     {
 993       /* Peel the copy.  */
 994       sbitmap_zero (wont_exit);
 995       if (i != n_peel - 1 || !last_may_exit)
 996         SET_BIT (wont_exit, 1);
 997       if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 998                 loops, 1,
 999                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
1000                 DLTHE_FLAG_UPDATE_FREQ))
1001         abort ();
1002
1003       /* Create item for switch.  */
1004       j = n_peel - i - (extra_zero_check ? 0 : 1);
1005       p = REG_BR_PROB_BASE / (i + 2);
1006
1007       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1008       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1009                                           block_label (preheader), p, NULL_RTX);
1010
1011       swtch = loop_split_edge_with (EDGE_PRED (swtch, 0), branch_code);
1012       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1013       EDGE_SUCC (swtch, 0)->probability = REG_BR_PROB_BASE - p;
1014       e = make_edge (swtch, preheader,
1015                      EDGE_SUCC (swtch, 0)->flags & EDGE_IRREDUCIBLE_LOOP);
1016       e->probability = p;
1017     }
1018
1019   if (extra_zero_check)
1020     {
1021       /* Add branch for zero iterations.  */
1022       p = REG_BR_PROB_BASE / (max_unroll + 1);
1023       swtch = ezc_swtch;
1024       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1025       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1026                                           block_label (preheader), p, NULL_RTX);
1027
1028       swtch = loop_split_edge_with (EDGE_SUCC (swtch, 0), branch_code);
1029       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1030       EDGE_SUCC (swtch, 0)->probability = REG_BR_PROB_BASE - p;
1031       e = make_edge (swtch, preheader,
1032                      EDGE_SUCC (swtch, 0)->flags & EDGE_IRREDUCIBLE_LOOP);
1033       e->probability = p;
1034     }
1035
1036   /* Recount dominators for outer blocks.  */
1037   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1038
1039   /* And unroll loop.  */
1040
1041   sbitmap_ones (wont_exit);
1042   RESET_BIT (wont_exit, may_exit_copy);
1043
1044   si_info_start_duplication (si_info);
1045   if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1046                 loops, max_unroll,
1047                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
1048                 DLTHE_FLAG_UPDATE_FREQ))
1049     abort ();
1050
1051   if (si_info)
1052     {
1053       split_ivs_in_copies (si_info, max_unroll, true, true);
1054       free_si_info (si_info);
1055     }
1056
1057   free (wont_exit);
1058
1059   if (exit_at_end)
1060     {
1061       basic_block exit_block = desc->in_edge->src->rbi->copy;
1062       /* Find a new in and out edge; they are in the last copy we have made.  */
1063
1064       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1065         {
1066           desc->out_edge = EDGE_SUCC (exit_block, 0);
1067           desc->in_edge = EDGE_SUCC (exit_block, 1);
1068         }
1069       else
1070         {
1071           desc->out_edge = EDGE_SUCC (exit_block, 1);
1072           desc->in_edge = EDGE_SUCC (exit_block, 0);
1073         }
1074     }
1075
1076   /* Remove the edges.  */
1077   for (i = 0; i < n_remove_edges; i++)
1078     remove_path (loops, remove_edges[i]);
1079   free (remove_edges);
1080
1081   /* We must be careful when updating the number of iterations due to
1082      preconditioning and the fact that the value must be valid at entry
1083      of the loop.  After passing through the above code, we see that
1084      the correct new number of iterations is this:  */
1085   gcc_assert (!desc->const_iter);
1086   desc->niter_expr =
1087     simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1));
1088   desc->niter_max /= max_unroll + 1;
1089   if (exit_at_end)
1090     {
1091       desc->niter_expr =
1092         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1093       desc->noloop_assumptions = NULL_RTX;
1094       desc->niter_max--;
1095     }
1096
1097   if (dump_file)
1098     fprintf (dump_file,
1099              ";; Unrolled loop %d times, counting # of iterations "
1100              "in runtime, %i insns\n",
1101              max_unroll, num_loop_insns (loop));
1102 }
1103
1104 /* Decide whether to simply peel LOOP and how much.  */
1105 static void
1106 decide_peel_simple (struct loop *loop, int flags)
1107 {
1108   unsigned npeel;
1109   struct niter_desc *desc;
1110
1111   if (!(flags & UAP_PEEL))
1112     {
1113       /* We were not asked to, just return back silently.  */
1114       return;
1115     }
1116
1117   if (dump_file)
1118     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1119
1120   /* npeel = number of iterations to peel.  */
1121   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1122   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1123     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1124
1125   /* Skip big loops.  */
1126   if (!npeel)
1127     {
1128       if (dump_file)
1129         fprintf (dump_file, ";; Not considering loop, is too big\n");
1130       return;
1131     }
1132
1133   /* Check for simple loops.  */
1134   desc = get_simple_loop_desc (loop);
1135
1136   /* Check number of iterations.  */
1137   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1138     {
1139       if (dump_file)
1140         fprintf (dump_file, ";; Loop iterates constant times\n");
1141       return;
1142     }
1143
1144   /* Do not simply peel loops with branches inside -- it increases number
1145      of mispredicts.  */
1146   if (num_loop_branches (loop) > 1)
1147     {
1148       if (dump_file)
1149         fprintf (dump_file, ";; Not peeling, contains branches\n");
1150       return;
1151     }
1152
1153   if (loop->header->count)
1154     {
1155       unsigned niter = expected_loop_iterations (loop);
1156       if (niter + 1 > npeel)
1157         {
1158           if (dump_file)
1159             {
1160               fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1161               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1162                        (HOST_WIDEST_INT) (niter + 1));
1163               fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1164                        npeel);
1165             }
1166           return;
1167         }
1168       npeel = niter + 1;
1169     }
1170   else
1171     {
1172       /* For now we have no good heuristics to decide whether loop peeling
1173          will be effective, so disable it.  */
1174       if (dump_file)
1175         fprintf (dump_file,
1176                  ";; Not peeling loop, no evidence it will be profitable\n");
1177       return;
1178     }
1179
1180   /* Success.  */
1181   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1182   loop->lpt_decision.times = npeel;
1183
1184   if (dump_file)
1185     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1186              loop->lpt_decision.times);
1187 }
1188
1189 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1190    while (cond)
1191      body;
1192
1193    ==>
1194
1195    if (!cond) goto end;
1196    body;
1197    if (!cond) goto end;
1198    body;
1199    while (cond)
1200      body;
1201    end: ;
1202    */
1203 static void
1204 peel_loop_simple (struct loops *loops, struct loop *loop)
1205 {
1206   sbitmap wont_exit;
1207   unsigned npeel = loop->lpt_decision.times;
1208   struct niter_desc *desc = get_simple_loop_desc (loop);
1209   struct split_ivs_info *si_info = NULL;
1210
1211   if (flag_split_ivs_in_unroller && npeel > 1)
1212     si_info = analyze_ivs_to_split (loop);
1213
1214   wont_exit = sbitmap_alloc (npeel + 1);
1215   sbitmap_zero (wont_exit);
1216
1217   si_info_start_duplication (si_info);
1218   if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1219                 loops, npeel, wont_exit, NULL, NULL, NULL,
1220                 DLTHE_FLAG_UPDATE_FREQ))
1221     abort ();
1222
1223   free (wont_exit);
1224
1225   if (si_info)
1226     {
1227       split_ivs_in_copies (si_info, npeel, false, false);
1228       free_si_info (si_info);
1229     }
1230
1231   if (desc->simple_p)
1232     {
1233       if (desc->const_iter)
1234         {
1235           desc->niter -= npeel;
1236           desc->niter_expr = GEN_INT (desc->niter);
1237           desc->noloop_assumptions = NULL_RTX;
1238         }
1239       else
1240         {
1241           /* We cannot just update niter_expr, as its value might be clobbered
1242              inside loop.  We could handle this by counting the number into
1243              temporary just like we do in runtime unrolling, but it does not
1244              seem worthwhile.  */
1245           free_simple_loop_desc (loop);
1246         }
1247     }
1248   if (dump_file)
1249     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1250 }
1251
1252 /* Decide whether to unroll LOOP stupidly and how much.  */
1253 static void
1254 decide_unroll_stupid (struct loop *loop, int flags)
1255 {
1256   unsigned nunroll, nunroll_by_av, i;
1257   struct niter_desc *desc;
1258
1259   if (!(flags & UAP_UNROLL_ALL))
1260     {
1261       /* We were not asked to, just return back silently.  */
1262       return;
1263     }
1264
1265   if (dump_file)
1266     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1267
1268   /* nunroll = total number of copies of the original loop body in
1269      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1270   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1271   nunroll_by_av
1272     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1273   if (nunroll > nunroll_by_av)
1274     nunroll = nunroll_by_av;
1275   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1276     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1277
1278   /* Skip big loops.  */
1279   if (nunroll <= 1)
1280     {
1281       if (dump_file)
1282         fprintf (dump_file, ";; Not considering loop, is too big\n");
1283       return;
1284     }
1285
1286   /* Check for simple loops.  */
1287   desc = get_simple_loop_desc (loop);
1288
1289   /* Check simpleness.  */
1290   if (desc->simple_p && !desc->assumptions)
1291     {
1292       if (dump_file)
1293         fprintf (dump_file, ";; The loop is simple\n");
1294       return;
1295     }
1296
1297   /* Do not unroll loops with branches inside -- it increases number
1298      of mispredicts.  */
1299   if (num_loop_branches (loop) > 1)
1300     {
1301       if (dump_file)
1302         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1303       return;
1304     }
1305
1306   /* If we have profile feedback, check whether the loop rolls.  */
1307   if (loop->header->count
1308       && expected_loop_iterations (loop) < 2 * nunroll)
1309     {
1310       if (dump_file)
1311         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1312       return;
1313     }
1314
1315   /* Success.  Now force nunroll to be power of 2, as it seems that this
1316      improves results (partially because of better alignments, partially
1317      because of some dark magic).  */
1318   for (i = 1; 2 * i <= nunroll; i *= 2)
1319     continue;
1320
1321   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1322   loop->lpt_decision.times = i - 1;
1323
1324   if (dump_file)
1325     fprintf (dump_file,
1326              ";; Decided to unroll the loop stupidly, %d times.\n",
1327              loop->lpt_decision.times);
1328 }
1329
1330 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1331    while (cond)
1332      body;
1333
1334    ==>
1335
1336    while (cond)
1337      {
1338        body;
1339        if (!cond) break;
1340        body;
1341        if (!cond) break;
1342        body;
1343        if (!cond) break;
1344        body;
1345      }
1346    */
1347 static void
1348 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1349 {
1350   sbitmap wont_exit;
1351   unsigned nunroll = loop->lpt_decision.times;
1352   struct niter_desc *desc = get_simple_loop_desc (loop);
1353   struct split_ivs_info *si_info = NULL;
1354
1355   if (flag_split_ivs_in_unroller)
1356     si_info = analyze_ivs_to_split (loop);
1357
1358   wont_exit = sbitmap_alloc (nunroll + 1);
1359   sbitmap_zero (wont_exit);
1360
1361   si_info_start_duplication (si_info);
1362   if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1363                 loops, nunroll, wont_exit, NULL, NULL, NULL,
1364                 DLTHE_FLAG_UPDATE_FREQ))
1365     abort ();
1366
1367   if (si_info)
1368     {
1369       split_ivs_in_copies (si_info, nunroll, true, true);
1370       free_si_info (si_info);
1371     }
1372
1373   free (wont_exit);
1374
1375   if (desc->simple_p)
1376     {
1377       /* We indeed may get here provided that there are nontrivial assumptions
1378          for a loop to be really simple.  We could update the counts, but the
1379          problem is that we are unable to decide which exit will be taken
1380          (not really true in case the number of iterations is constant,
1381          but noone will do anything with this information, so we do not
1382          worry about it).  */
1383       desc->simple_p = false;
1384     }
1385
1386   if (dump_file)
1387     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1388              nunroll, num_loop_insns (loop));
1389 }
1390
1391 /* A hash function for information about insns to split.  */
1392
1393 static hashval_t
1394 si_info_hash (const void *ivts)
1395 {
1396   return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
1397 }
1398
1399 /* An equality functions for information about insns to split.  */
1400
1401 static int
1402 si_info_eq (const void *ivts1, const void *ivts2)
1403 {
1404   const struct iv_to_split *i1 = ivts1;
1405   const struct iv_to_split *i2 = ivts2;
1406
1407   return i1->insn == i2->insn;
1408 }
1409
1410 /* Determine whether there is an induction variable in INSN that
1411    we would like to split during unrolling.  Return NULL if INSN
1412    contains no interesting IVs.  Otherwise, allocate an IV_TO_SPLIT
1413    structure, fill it with the relevant information and return a
1414    pointer to it.  */
1415
1416 static struct iv_to_split *
1417 analyze_iv_to_split_insn (rtx insn)
1418 {
1419   rtx set, dest;
1420   struct rtx_iv iv;
1421   struct iv_to_split *ivts;
1422
1423   /* For now we just split the basic induction variables.  Later this may be
1424      extended for example by selecting also addresses of memory references.  */
1425   set = single_set (insn);
1426   if (!set)
1427     return NULL;
1428
1429   dest = SET_DEST (set);
1430   if (!REG_P (dest))
1431     return NULL;
1432
1433   if (!biv_p (insn, dest))
1434     return NULL;
1435
1436   if (!iv_analyze (insn, dest, &iv))
1437     abort ();
1438
1439   if (iv.step == const0_rtx
1440       || iv.mode != iv.extend_mode)
1441     return NULL;
1442
1443   /* Record the insn to split.  */
1444   ivts = xmalloc (sizeof (struct iv_to_split));
1445   ivts->insn = insn;
1446   ivts->base_var = NULL_RTX;
1447   ivts->step = iv.step;
1448   ivts->n_loc = 1;
1449   ivts->loc[0] = 1;
1450
1451   return ivts;
1452 }
1453
1454 /* Determines which of induction variables in LOOP to split.
1455    Return a SPLIT_IVS_INFO struct with the hash table filled
1456    with all insns to split IVs in.  The FIRST_NEW_BLOCK field
1457    is undefined for the return value.  */
1458
1459 static struct split_ivs_info *
1460 analyze_ivs_to_split (struct loop *loop)
1461 {
1462   basic_block *body, bb;
1463   unsigned i;
1464   struct split_ivs_info *si_info = xcalloc (1, sizeof (struct split_ivs_info));
1465   rtx insn;
1466   struct iv_to_split *ivts;
1467   PTR *slot;
1468
1469   si_info->insns_to_split = htab_create (5 * loop->num_nodes,
1470                                          si_info_hash, si_info_eq, free);
1471
1472   iv_analysis_loop_init (loop);
1473
1474   body = get_loop_body (loop);
1475   for (i = 0; i < loop->num_nodes; i++)
1476     {
1477       bb = body[i];
1478       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1479         continue;
1480
1481       FOR_BB_INSNS (bb, insn)
1482         {
1483           if (!INSN_P (insn))
1484             continue;
1485
1486           ivts = analyze_iv_to_split_insn (insn);
1487
1488           if (!ivts)
1489             continue;
1490
1491           slot = htab_find_slot (si_info->insns_to_split, ivts, INSERT);
1492           *slot = ivts;
1493         }
1494     }
1495
1496   free (body);
1497
1498   return si_info;
1499 }
1500
1501 /* Called just before loop duplication.  Records start of duplicated area
1502    to SI_INFO.  */
1503
1504 static void
1505 si_info_start_duplication (struct split_ivs_info *si_info)
1506 {
1507   if (si_info)
1508     si_info->first_new_block = last_basic_block;
1509 }
1510
1511 /* Determine the number of iterations between initialization of the base
1512    variable and the current copy (N_COPY).  N_COPIES is the total number
1513    of newly created copies.  UNROLLING is true if we are unrolling
1514    (not peeling) the loop.  */
1515
1516 static unsigned
1517 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1518 {
1519   if (unrolling)
1520     {
1521       /* If we are unrolling, initialization is done in the original loop
1522          body (number 0).  */
1523       return n_copy;
1524     }
1525   else
1526     {
1527       /* If we are peeling, the copy in that the initialization occurs has
1528          number 1.  The original loop (number 0) is the last.  */
1529       if (n_copy)
1530         return n_copy - 1;
1531       else
1532         return n_copies;
1533     }
1534 }
1535
1536 /* Locate in EXPR the expression corresponding to the location recorded
1537    in IVTS, and return a pointer to the RTX for this location.  */
1538
1539 static rtx *
1540 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1541 {
1542   unsigned i;
1543   rtx *ret = &expr;
1544
1545   for (i = 0; i < ivts->n_loc; i++)
1546     ret = &XEXP (*ret, ivts->loc[i]);
1547
1548   return ret;
1549 }
1550
1551 /* Allocate basic variable for the induction variable chain.  Callback for
1552    htab_traverse.  */
1553
1554 static int
1555 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1556 {
1557   struct iv_to_split *ivts = *slot;
1558   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1559
1560   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1561
1562   return 1;
1563 }
1564
1565 /* Insert initialization of basic variable of IVTS before INSN, taking
1566    the initial value from INSN.  */
1567
1568 static void
1569 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1570 {
1571   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1572   rtx seq;
1573
1574   start_sequence ();
1575   expr = force_operand (expr, ivts->base_var);
1576   if (expr != ivts->base_var)
1577     emit_move_insn (ivts->base_var, expr);
1578   seq = get_insns ();
1579   end_sequence ();
1580
1581   emit_insn_before (seq, insn);
1582 }
1583
1584 /* Replace the use of induction variable described in IVTS in INSN
1585    by base variable + DELTA * step.  */
1586
1587 static void
1588 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1589 {
1590   rtx expr, *loc, seq, incr, var;
1591   enum machine_mode mode = GET_MODE (ivts->base_var);
1592   rtx src, dest, set;
1593
1594   /* Construct base + DELTA * step.  */
1595   if (!delta)
1596     expr = ivts->base_var;
1597   else
1598     {
1599       incr = simplify_gen_binary (MULT, mode,
1600                                   ivts->step, gen_int_mode (delta, mode));
1601       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1602                                   ivts->base_var, incr);
1603     }
1604
1605   /* Figure out where to do the replacement.  */
1606   loc = get_ivts_expr (single_set (insn), ivts);
1607
1608   /* If we can make the replacement right away, we're done.  */
1609   if (validate_change (insn, loc, expr, 0))
1610     return;
1611
1612   /* Otherwise, force EXPR into a register and try again.  */
1613   start_sequence ();
1614   var = gen_reg_rtx (mode);
1615   expr = force_operand (expr, var);
1616   if (expr != var)
1617     emit_move_insn (var, expr);
1618   seq = get_insns ();
1619   end_sequence ();
1620   emit_insn_before (seq, insn);
1621
1622   if (validate_change (insn, loc, var, 0))
1623     return;
1624
1625   /* The last chance.  Try recreating the assignment in insn
1626      completely from scratch.  */
1627   set = single_set (insn);
1628   gcc_assert (set);
1629
1630   start_sequence ();
1631   *loc = var;
1632   src = copy_rtx (SET_SRC (set));
1633   dest = copy_rtx (SET_DEST (set));
1634   src = force_operand (src, dest);
1635   if (src != dest)
1636     emit_move_insn (dest, src);
1637   seq = get_insns ();
1638   end_sequence ();
1639
1640   emit_insn_before (seq, insn);
1641   delete_insn (insn);
1642 }
1643
1644 /* Splits induction variables (that are marked in SI_INFO) in copies of loop.
1645    I.e. replace
1646
1647    i = i + 1;
1648    ...
1649    i = i + 1;
1650    ...
1651    i = i + 1;
1652    ...
1653
1654    type chains by
1655
1656    i0 = i + 1
1657    ...
1658    i = i0 + 1
1659    ...
1660    i = i0 + 2
1661    ...
1662
1663    UNROLLING is true if we unrolled (not peeled) the loop.
1664    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1665    the loop (as it should happen in complete unrolling, but not in ordinary
1666    peeling of the loop).  */
1667
1668 static void
1669 split_ivs_in_copies (struct split_ivs_info *si_info, unsigned n_copies,
1670                      bool unrolling, bool rewrite_original_loop)
1671 {
1672   unsigned i, delta;
1673   basic_block bb, orig_bb;
1674   rtx insn, orig_insn, next;
1675   struct iv_to_split ivts_templ, *ivts;
1676
1677   /* Sanity check -- we need to put initialization in the original loop
1678      body.  */
1679   gcc_assert (!unrolling || rewrite_original_loop);
1680
1681   /* Allocate the basic variables (i0).  */
1682   htab_traverse (si_info->insns_to_split, allocate_basic_variable, NULL);
1683
1684   for (i = si_info->first_new_block; i < (unsigned) last_basic_block; i++)
1685     {
1686       bb = BASIC_BLOCK (i);
1687       orig_bb = bb->rbi->original;
1688
1689       delta = determine_split_iv_delta (bb->rbi->copy_number, n_copies,
1690                                         unrolling);
1691       orig_insn = BB_HEAD (orig_bb);
1692       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
1693         {
1694           next = NEXT_INSN (insn);
1695           if (!INSN_P (insn))
1696             continue;
1697
1698           while (!INSN_P (orig_insn))
1699             orig_insn = NEXT_INSN (orig_insn);
1700
1701           ivts_templ.insn = orig_insn;
1702           ivts = htab_find (si_info->insns_to_split, &ivts_templ);
1703           if (ivts)
1704             {
1705
1706 #ifdef ENABLE_CHECKING
1707               if (!rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)))
1708                 abort ();
1709 #endif
1710
1711               if (!delta)
1712                 insert_base_initialization (ivts, insn);
1713               split_iv (ivts, insn, delta);
1714             }
1715           orig_insn = NEXT_INSN (orig_insn);
1716         }
1717     }
1718
1719   if (!rewrite_original_loop)
1720     return;
1721
1722   /* Rewrite also the original loop body.  Find them as originals of the blocks
1723      in the last copied iteration, i.e. those that have
1724      bb->rbi->original->copy == bb.  */
1725   for (i = si_info->first_new_block; i < (unsigned) last_basic_block; i++)
1726     {
1727       bb = BASIC_BLOCK (i);
1728       orig_bb = bb->rbi->original;
1729       if (orig_bb->rbi->copy != bb)
1730         continue;
1731
1732       delta = determine_split_iv_delta (0, n_copies, unrolling);
1733       for (orig_insn = BB_HEAD (orig_bb);
1734            orig_insn != NEXT_INSN (BB_END (bb));
1735            orig_insn = next)
1736         {
1737           next = NEXT_INSN (orig_insn);
1738
1739           if (!INSN_P (orig_insn))
1740             continue;
1741
1742           ivts_templ.insn = orig_insn;
1743           ivts = htab_find (si_info->insns_to_split, &ivts_templ);
1744           if (!ivts)
1745             continue;
1746
1747           if (!delta)
1748             insert_base_initialization (ivts, orig_insn);
1749           split_iv (ivts, orig_insn, delta);
1750         }
1751     }
1752 }
1753
1754 /* Release SI_INFO.  */
1755
1756 static void
1757 free_si_info (struct split_ivs_info *si_info)
1758 {
1759   htab_delete (si_info->insns_to_split);
1760   free (si_info);
1761 }