gcc/loop-unroll.c

   1 /* Loop unrolling and peeling.
   2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 2, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "basic-block.h"
  29 #include "cfgloop.h"
  30 #include "cfglayout.h"
  31 #include "params.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "hashtab.h"
  35 #include "recog.h"
  36
  37 /* This pass performs loop unrolling and peeling.  We only perform these
  38    optimizations on innermost loops (with single exception) because
  39    the impact on performance is greatest here, and we want to avoid
  40    unnecessary code size growth.  The gain is caused by greater sequentiality
  41    of code, better code to optimize for further passes and in some cases
  42    by fewer testings of exit conditions.  The main problem is code growth,
  43    that impacts performance negatively due to effect of caches.
  44
  45    What we do:
  46
  47    -- complete peeling of once-rolling loops; this is the above mentioned
  48       exception, as this causes loop to be cancelled completely and
  49       does not cause code growth
  50    -- complete peeling of loops that roll (small) constant times.
  51    -- simple peeling of first iterations of loops that do not roll much
  52       (according to profile feedback)
  53    -- unrolling of loops that roll constant times; this is almost always
  54       win, as we get rid of exit condition tests.
  55    -- unrolling of loops that roll number of times that we can compute
  56       in runtime; we also get rid of exit condition tests here, but there
  57       is the extra expense for calculating the number of iterations
  58    -- simple unrolling of remaining loops; this is performed only if we
  59       are asked to, as the gain is questionable in this case and often
  60       it may even slow down the code
  61    For more detailed descriptions of each of those, see comments at
  62    appropriate function below.
  63
  64    There is a lot of parameters (defined and described in params.def) that
  65    control how much we unroll/peel.
  66
  67    ??? A great problem is that we don't have a good way how to determine
  68    how many times we should unroll the loop; the experiments I have made
  69    showed that this choice may affect performance in order of several %.
  70    */
  71
  72 /* Information about induction variables to split.  */
  73
  74 struct iv_to_split
  75 {
  76   rtx insn;             /* The insn in that the induction variable occurs.  */
  77   rtx base_var;         /* The variable on that the values in the further
  78                            iterations are based.  */
  79   rtx step;             /* Step of the induction variable.  */
  80   unsigned n_loc;
  81   unsigned loc[3];      /* Location where the definition of the induction
  82                            variable occurs in the insn.  For example if
  83                            N_LOC is 2, the expression is located at
  84                            XEXP (XEXP (single_set, loc[0]), loc[1]).  */
  85 };
  86
  87 /* Information about accumulators to expand.  */
  88
  89 struct var_to_expand
  90 {
  91   rtx insn;                        /* The insn in that the variable expansion occurs.  */
  92   rtx reg;                         /* The accumulator which is expanded.  */
  93   VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
  94   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  95                                       or multiplication.  */
  96   int expansion_count;             /* Count the number of expansions generated so far.  */
  97   int reuse_expansion;             /* The expansion we intend to reuse to expand
  98                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
  99                                       the original accumulator.  Else use
 100                                       var_expansions[REUSE_EXPANSION - 1].  */
 101 };
 102
 103 /* Information about optimization applied in
 104    the unrolled loop.  */
 105
 106 struct opt_info
 107 {
 108   htab_t insns_to_split;           /* A hashtable of insns to split.  */
 109   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
 110                                       to expand.  */
 111   unsigned first_new_block;        /* The first basic block that was
 112                                       duplicated.  */
 113   basic_block loop_exit;           /* The loop exit basic block.  */
 114   basic_block loop_preheader;      /* The loop preheader basic block.  */
 115 };
 116
 117 static void decide_unrolling_and_peeling (struct loops *, int);
 118 static void peel_loops_completely (struct loops *, int);
 119 static void decide_peel_simple (struct loop *, int);
 120 static void decide_peel_once_rolling (struct loop *, int);
 121 static void decide_peel_completely (struct loop *, int);
 122 static void decide_unroll_stupid (struct loop *, int);
 123 static void decide_unroll_constant_iterations (struct loop *, int);
 124 static void decide_unroll_runtime_iterations (struct loop *, int);
 125 static void peel_loop_simple (struct loops *, struct loop *);
 126 static void peel_loop_completely (struct loops *, struct loop *);
 127 static void unroll_loop_stupid (struct loops *, struct loop *);
 128 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
 129 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
 130 static struct opt_info *analyze_insns_in_loop (struct loop *);
 131 static void opt_info_start_duplication (struct opt_info *);
 132 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 133 static void free_opt_info (struct opt_info *);
 134 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
 135 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
 136 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
 137 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
 138 static int insert_var_expansion_initialization (void **, void *);
 139 static int combine_var_copies_in_loop_exit (void **, void *);
 140 static int release_var_copies (void **, void *);
 141 static rtx get_expansion (struct var_to_expand *);
 142
 143 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 144 void
 145 unroll_and_peel_loops (struct loops *loops, int flags)
 146 {
 147   struct loop *loop, *next;
 148   bool check;
 149
 150   /* First perform complete loop peeling (it is almost surely a win,
 151      and affects parameters for further decision a lot).  */
 152   peel_loops_completely (loops, flags);
 153
 154   /* Now decide rest of unrolling and peeling.  */
 155   decide_unrolling_and_peeling (loops, flags);
 156
 157   loop = loops->tree_root;
 158   while (loop->inner)
 159     loop = loop->inner;
 160
 161   /* Scan the loops, inner ones first.  */
 162   while (loop != loops->tree_root)
 163     {
 164       if (loop->next)
 165         {
 166           next = loop->next;
 167           while (next->inner)
 168             next = next->inner;
 169         }
 170       else
 171         next = loop->outer;
 172
 173       check = true;
 174       /* And perform the appropriate transformations.  */
 175       switch (loop->lpt_decision.decision)
 176         {
 177         case LPT_PEEL_COMPLETELY:
 178           /* Already done.  */
 179           gcc_unreachable ();
 180         case LPT_PEEL_SIMPLE:
 181           peel_loop_simple (loops, loop);
 182           break;
 183         case LPT_UNROLL_CONSTANT:
 184           unroll_loop_constant_iterations (loops, loop);
 185           break;
 186         case LPT_UNROLL_RUNTIME:
 187           unroll_loop_runtime_iterations (loops, loop);
 188           break;
 189         case LPT_UNROLL_STUPID:
 190           unroll_loop_stupid (loops, loop);
 191           break;
 192         case LPT_NONE:
 193           check = false;
 194           break;
 195         default:
 196           gcc_unreachable ();
 197         }
 198       if (check)
 199         {
 200 #ifdef ENABLE_CHECKING
 201           verify_dominators (CDI_DOMINATORS);
 202           verify_loop_structure (loops);
 203 #endif
 204         }
 205       loop = next;
 206     }
 207
 208   iv_analysis_done ();
 209 }
 210
 211 /* Check whether exit of the LOOP is at the end of loop body.  */
 212
 213 static bool
 214 loop_exit_at_end_p (struct loop *loop)
 215 {
 216   struct niter_desc *desc = get_simple_loop_desc (loop);
 217   rtx insn;
 218
 219   if (desc->in_edge->dest != loop->latch)
 220     return false;
 221
 222   /* Check that the latch is empty.  */
 223   FOR_BB_INSNS (loop->latch, insn)
 224     {
 225       if (INSN_P (insn))
 226         return false;
 227     }
 228
 229   return true;
 230 }
 231
 232 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
 233 static void
 234 peel_loops_completely (struct loops *loops, int flags)
 235 {
 236   struct loop *loop, *next;
 237
 238   loop = loops->tree_root;
 239   while (loop->inner)
 240     loop = loop->inner;
 241
 242   while (loop != loops->tree_root)
 243     {
 244       if (loop->next)
 245         {
 246           next = loop->next;
 247           while (next->inner)
 248             next = next->inner;
 249         }
 250       else
 251         next = loop->outer;
 252
 253       loop->lpt_decision.decision = LPT_NONE;
 254
 255       if (dump_file)
 256         fprintf (dump_file,
 257                  "\n;; *** Considering loop %d for complete peeling ***\n",
 258                  loop->num);
 259
 260       loop->ninsns = num_loop_insns (loop);
 261
 262       decide_peel_once_rolling (loop, flags);
 263       if (loop->lpt_decision.decision == LPT_NONE)
 264         decide_peel_completely (loop, flags);
 265
 266       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
 267         {
 268           peel_loop_completely (loops, loop);
 269 #ifdef ENABLE_CHECKING
 270           verify_dominators (CDI_DOMINATORS);
 271           verify_loop_structure (loops);
 272 #endif
 273         }
 274       loop = next;
 275     }
 276 }
 277
 278 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
 279 static void
 280 decide_unrolling_and_peeling (struct loops *loops, int flags)
 281 {
 282   struct loop *loop = loops->tree_root, *next;
 283
 284   while (loop->inner)
 285     loop = loop->inner;
 286
 287   /* Scan the loops, inner ones first.  */
 288   while (loop != loops->tree_root)
 289     {
 290       if (loop->next)
 291         {
 292           next = loop->next;
 293           while (next->inner)
 294             next = next->inner;
 295         }
 296       else
 297         next = loop->outer;
 298
 299       loop->lpt_decision.decision = LPT_NONE;
 300
 301       if (dump_file)
 302         fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
 303
 304       /* Do not peel cold areas.  */
 305       if (!maybe_hot_bb_p (loop->header))
 306         {
 307           if (dump_file)
 308             fprintf (dump_file, ";; Not considering loop, cold area\n");
 309           loop = next;
 310           continue;
 311         }
 312
 313       /* Can the loop be manipulated?  */
 314       if (!can_duplicate_loop_p (loop))
 315         {
 316           if (dump_file)
 317             fprintf (dump_file,
 318                      ";; Not considering loop, cannot duplicate\n");
 319           loop = next;
 320           continue;
 321         }
 322
 323       /* Skip non-innermost loops.  */
 324       if (loop->inner)
 325         {
 326           if (dump_file)
 327             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 328           loop = next;
 329           continue;
 330         }
 331
 332       loop->ninsns = num_loop_insns (loop);
 333       loop->av_ninsns = average_num_loop_insns (loop);
 334
 335       /* Try transformations one by one in decreasing order of
 336          priority.  */
 337
 338       decide_unroll_constant_iterations (loop, flags);
 339       if (loop->lpt_decision.decision == LPT_NONE)
 340         decide_unroll_runtime_iterations (loop, flags);
 341       if (loop->lpt_decision.decision == LPT_NONE)
 342         decide_unroll_stupid (loop, flags);
 343       if (loop->lpt_decision.decision == LPT_NONE)
 344         decide_peel_simple (loop, flags);
 345
 346       loop = next;
 347     }
 348 }
 349
 350 /* Decide whether the LOOP is once rolling and suitable for complete
 351    peeling.  */
 352 static void
 353 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 354 {
 355   struct niter_desc *desc;
 356
 357   if (dump_file)
 358     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 359
 360   /* Is the loop small enough?  */
 361   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
 362     {
 363       if (dump_file)
 364         fprintf (dump_file, ";; Not considering loop, is too big\n");
 365       return;
 366     }
 367
 368   /* Check for simple loops.  */
 369   desc = get_simple_loop_desc (loop);
 370
 371   /* Check number of iterations.  */
 372   if (!desc->simple_p
 373       || desc->assumptions
 374       || desc->infinite
 375       || !desc->const_iter
 376       || desc->niter != 0)
 377     {
 378       if (dump_file)
 379         fprintf (dump_file,
 380                  ";; Unable to prove that the loop rolls exactly once\n");
 381       return;
 382     }
 383
 384   /* Success.  */
 385   if (dump_file)
 386     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
 387   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 388 }
 389
 390 /* Decide whether the LOOP is suitable for complete peeling.  */
 391 static void
 392 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 393 {
 394   unsigned npeel;
 395   struct niter_desc *desc;
 396
 397   if (dump_file)
 398     fprintf (dump_file, "\n;; Considering peeling completely\n");
 399
 400   /* Skip non-innermost loops.  */
 401   if (loop->inner)
 402     {
 403       if (dump_file)
 404         fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 405       return;
 406     }
 407
 408   /* Do not peel cold areas.  */
 409   if (!maybe_hot_bb_p (loop->header))
 410     {
 411       if (dump_file)
 412         fprintf (dump_file, ";; Not considering loop, cold area\n");
 413       return;
 414     }
 415
 416   /* Can the loop be manipulated?  */
 417   if (!can_duplicate_loop_p (loop))
 418     {
 419       if (dump_file)
 420         fprintf (dump_file,
 421                  ";; Not considering loop, cannot duplicate\n");
 422       return;
 423     }
 424
 425   /* npeel = number of iterations to peel.  */
 426   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
 427   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 428     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 429
 430   /* Is the loop small enough?  */
 431   if (!npeel)
 432     {
 433       if (dump_file)
 434         fprintf (dump_file, ";; Not considering loop, is too big\n");
 435       return;
 436     }
 437
 438   /* Check for simple loops.  */
 439   desc = get_simple_loop_desc (loop);
 440
 441   /* Check number of iterations.  */
 442   if (!desc->simple_p
 443       || desc->assumptions
 444       || !desc->const_iter
 445       || desc->infinite)
 446     {
 447       if (dump_file)
 448         fprintf (dump_file,
 449                  ";; Unable to prove that the loop iterates constant times\n");
 450       return;
 451     }
 452
 453   if (desc->niter > npeel - 1)
 454     {
 455       if (dump_file)
 456         {
 457           fprintf (dump_file,
 458                    ";; Not peeling loop completely, rolls too much (");
 459           fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
 460           fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
 461         }
 462       return;
 463     }
 464
 465   /* Success.  */
 466   if (dump_file)
 467     fprintf (dump_file, ";; Decided to peel loop completely\n");
 468   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 469 }
 470
 471 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
 472    completely.  The transformation done:
 473
 474    for (i = 0; i < 4; i++)
 475      body;
 476
 477    ==>
 478
 479    i = 0;
 480    body; i++;
 481    body; i++;
 482    body; i++;
 483    body; i++;
 484    */
 485 static void
 486 peel_loop_completely (struct loops *loops, struct loop *loop)
 487 {
 488   sbitmap wont_exit;
 489   unsigned HOST_WIDE_INT npeel;
 490   unsigned n_remove_edges, i;
 491   edge *remove_edges, ein;
 492   struct niter_desc *desc = get_simple_loop_desc (loop);
 493   struct opt_info *opt_info = NULL;
 494
 495   npeel = desc->niter;
 496
 497   if (npeel)
 498     {
 499       bool ok;
 500
 501       wont_exit = sbitmap_alloc (npeel + 1);
 502       sbitmap_ones (wont_exit);
 503       RESET_BIT (wont_exit, 0);
 504       if (desc->noloop_assumptions)
 505         RESET_BIT (wont_exit, 1);
 506
 507       remove_edges = XCNEWVEC (edge, npeel);
 508       n_remove_edges = 0;
 509
 510       if (flag_split_ivs_in_unroller)
 511         opt_info = analyze_insns_in_loop (loop);
 512
 513       opt_info_start_duplication (opt_info);
 514       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 515                                           loops, npeel,
 516                                           wont_exit, desc->out_edge,
 517                                           remove_edges, &n_remove_edges,
 518                                           DLTHE_FLAG_UPDATE_FREQ
 519                                           | DLTHE_FLAG_COMPLETTE_PEEL
 520                                           | (opt_info
 521                                              ? DLTHE_RECORD_COPY_NUMBER : 0));
 522       gcc_assert (ok);
 523
 524       free (wont_exit);
 525
 526       if (opt_info)
 527         {
 528           apply_opt_in_copies (opt_info, npeel, false, true);
 529           free_opt_info (opt_info);
 530         }
 531
 532       /* Remove the exit edges.  */
 533       for (i = 0; i < n_remove_edges; i++)
 534         remove_path (loops, remove_edges[i]);
 535       free (remove_edges);
 536     }
 537
 538   ein = desc->in_edge;
 539   free_simple_loop_desc (loop);
 540
 541   /* Now remove the unreachable part of the last iteration and cancel
 542      the loop.  */
 543   remove_path (loops, ein);
 544
 545   if (dump_file)
 546     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
 547 }
 548
 549 /* Decide whether to unroll LOOP iterating constant number of times
 550    and how much.  */
 551
 552 static void
 553 decide_unroll_constant_iterations (struct loop *loop, int flags)
 554 {
 555   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 556   struct niter_desc *desc;
 557
 558   if (!(flags & UAP_UNROLL))
 559     {
 560       /* We were not asked to, just return back silently.  */
 561       return;
 562     }
 563
 564   if (dump_file)
 565     fprintf (dump_file,
 566              "\n;; Considering unrolling loop with constant "
 567              "number of iterations\n");
 568
 569   /* nunroll = total number of copies of the original loop body in
 570      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 571   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 572   nunroll_by_av
 573     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 574   if (nunroll > nunroll_by_av)
 575     nunroll = nunroll_by_av;
 576   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 577     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 578
 579   /* Skip big loops.  */
 580   if (nunroll <= 1)
 581     {
 582       if (dump_file)
 583         fprintf (dump_file, ";; Not considering loop, is too big\n");
 584       return;
 585     }
 586
 587   /* Check for simple loops.  */
 588   desc = get_simple_loop_desc (loop);
 589
 590   /* Check number of iterations.  */
 591   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 592     {
 593       if (dump_file)
 594         fprintf (dump_file,
 595                  ";; Unable to prove that the loop iterates constant times\n");
 596       return;
 597     }
 598
 599   /* Check whether the loop rolls enough to consider.  */
 600   if (desc->niter < 2 * nunroll)
 601     {
 602       if (dump_file)
 603         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 604       return;
 605     }
 606
 607   /* Success; now compute number of iterations to unroll.  We alter
 608      nunroll so that as few as possible copies of loop body are
 609      necessary, while still not decreasing the number of unrollings
 610      too much (at most by 1).  */
 611   best_copies = 2 * nunroll + 10;
 612
 613   i = 2 * nunroll + 2;
 614   if (i - 1 >= desc->niter)
 615     i = desc->niter - 2;
 616
 617   for (; i >= nunroll - 1; i--)
 618     {
 619       unsigned exit_mod = desc->niter % (i + 1);
 620
 621       if (!loop_exit_at_end_p (loop))
 622         n_copies = exit_mod + i + 1;
 623       else if (exit_mod != (unsigned) i
 624                || desc->noloop_assumptions != NULL_RTX)
 625         n_copies = exit_mod + i + 2;
 626       else
 627         n_copies = i + 1;
 628
 629       if (n_copies < best_copies)
 630         {
 631           best_copies = n_copies;
 632           best_unroll = i;
 633         }
 634     }
 635
 636   if (dump_file)
 637     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
 638              best_unroll + 1, best_copies, nunroll);
 639
 640   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 641   loop->lpt_decision.times = best_unroll;
 642
 643   if (dump_file)
 644     fprintf (dump_file,
 645              ";; Decided to unroll the constant times rolling loop, %d times.\n",
 646              loop->lpt_decision.times);
 647 }
 648
 649 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
 650    times.  The transformation does this:
 651
 652    for (i = 0; i < 102; i++)
 653      body;
 654
 655    ==>
 656
 657    i = 0;
 658    body; i++;
 659    body; i++;
 660    while (i < 102)
 661      {
 662        body; i++;
 663        body; i++;
 664        body; i++;
 665        body; i++;
 666      }
 667   */
 668 static void
 669 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
 670 {
 671   unsigned HOST_WIDE_INT niter;
 672   unsigned exit_mod;
 673   sbitmap wont_exit;
 674   unsigned n_remove_edges, i;
 675   edge *remove_edges;
 676   unsigned max_unroll = loop->lpt_decision.times;
 677   struct niter_desc *desc = get_simple_loop_desc (loop);
 678   bool exit_at_end = loop_exit_at_end_p (loop);
 679   struct opt_info *opt_info = NULL;
 680   bool ok;
 681
 682   niter = desc->niter;
 683
 684   /* Should not get here (such loop should be peeled instead).  */
 685   gcc_assert (niter > max_unroll + 1);
 686
 687   exit_mod = niter % (max_unroll + 1);
 688
 689   wont_exit = sbitmap_alloc (max_unroll + 1);
 690   sbitmap_ones (wont_exit);
 691
 692   remove_edges = XCNEWVEC (edge, max_unroll + exit_mod + 1);
 693   n_remove_edges = 0;
 694   if (flag_split_ivs_in_unroller
 695       || flag_variable_expansion_in_unroller)
 696     opt_info = analyze_insns_in_loop (loop);
 697
 698   if (!exit_at_end)
 699     {
 700       /* The exit is not at the end of the loop; leave exit test
 701          in the first copy, so that the loops that start with test
 702          of exit condition have continuous body after unrolling.  */
 703
 704       if (dump_file)
 705         fprintf (dump_file, ";; Condition on beginning of loop.\n");
 706
 707       /* Peel exit_mod iterations.  */
 708       RESET_BIT (wont_exit, 0);
 709       if (desc->noloop_assumptions)
 710         RESET_BIT (wont_exit, 1);
 711
 712       if (exit_mod)
 713         {
 714           opt_info_start_duplication (opt_info);
 715           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 716                                               loops, exit_mod,
 717                                               wont_exit, desc->out_edge,
 718                                               remove_edges, &n_remove_edges,
 719                                               DLTHE_FLAG_UPDATE_FREQ
 720                                               | (opt_info && exit_mod > 1
 721                                                  ? DLTHE_RECORD_COPY_NUMBER
 722                                                    : 0));
 723           gcc_assert (ok);
 724
 725           if (opt_info && exit_mod > 1)
 726             apply_opt_in_copies (opt_info, exit_mod, false, false);
 727
 728           desc->noloop_assumptions = NULL_RTX;
 729           desc->niter -= exit_mod;
 730           desc->niter_max -= exit_mod;
 731         }
 732
 733       SET_BIT (wont_exit, 1);
 734     }
 735   else
 736     {
 737       /* Leave exit test in last copy, for the same reason as above if
 738          the loop tests the condition at the end of loop body.  */
 739
 740       if (dump_file)
 741         fprintf (dump_file, ";; Condition on end of loop.\n");
 742
 743       /* We know that niter >= max_unroll + 2; so we do not need to care of
 744          case when we would exit before reaching the loop.  So just peel
 745          exit_mod + 1 iterations.  */
 746       if (exit_mod != max_unroll
 747           || desc->noloop_assumptions)
 748         {
 749           RESET_BIT (wont_exit, 0);
 750           if (desc->noloop_assumptions)
 751             RESET_BIT (wont_exit, 1);
 752
 753           opt_info_start_duplication (opt_info);
 754           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 755                                               loops, exit_mod + 1,
 756                                               wont_exit, desc->out_edge,
 757                                               remove_edges, &n_remove_edges,
 758                                               DLTHE_FLAG_UPDATE_FREQ
 759                                               | (opt_info && exit_mod > 0
 760                                                  ? DLTHE_RECORD_COPY_NUMBER
 761                                                    : 0));
 762           gcc_assert (ok);
 763
 764           if (opt_info && exit_mod > 0)
 765             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 766
 767           desc->niter -= exit_mod + 1;
 768           desc->niter_max -= exit_mod + 1;
 769           desc->noloop_assumptions = NULL_RTX;
 770
 771           SET_BIT (wont_exit, 0);
 772           SET_BIT (wont_exit, 1);
 773         }
 774
 775       RESET_BIT (wont_exit, max_unroll);
 776     }
 777
 778   /* Now unroll the loop.  */
 779
 780   opt_info_start_duplication (opt_info);
 781   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 782                                       loops, max_unroll,
 783                                       wont_exit, desc->out_edge,
 784                                       remove_edges, &n_remove_edges,
 785                                       DLTHE_FLAG_UPDATE_FREQ
 786                                       | (opt_info
 787                                          ? DLTHE_RECORD_COPY_NUMBER
 788                                            : 0));
 789   gcc_assert (ok);
 790
 791   if (opt_info)
 792     {
 793       apply_opt_in_copies (opt_info, max_unroll, true, true);
 794       free_opt_info (opt_info);
 795     }
 796
 797   free (wont_exit);
 798
 799   if (exit_at_end)
 800     {
 801       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 802       /* Find a new in and out edge; they are in the last copy we have made.  */
 803
 804       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 805         {
 806           desc->out_edge = EDGE_SUCC (exit_block, 0);
 807           desc->in_edge = EDGE_SUCC (exit_block, 1);
 808         }
 809       else
 810         {
 811           desc->out_edge = EDGE_SUCC (exit_block, 1);
 812           desc->in_edge = EDGE_SUCC (exit_block, 0);
 813         }
 814     }
 815
 816   desc->niter /= max_unroll + 1;
 817   desc->niter_max /= max_unroll + 1;
 818   desc->niter_expr = GEN_INT (desc->niter);
 819
 820   /* Remove the edges.  */
 821   for (i = 0; i < n_remove_edges; i++)
 822     remove_path (loops, remove_edges[i]);
 823   free (remove_edges);
 824
 825   if (dump_file)
 826     fprintf (dump_file,
 827              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 828              max_unroll, num_loop_insns (loop));
 829 }
 830
 831 /* Decide whether to unroll LOOP iterating runtime computable number of times
 832    and how much.  */
 833 static void
 834 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 835 {
 836   unsigned nunroll, nunroll_by_av, i;
 837   struct niter_desc *desc;
 838
 839   if (!(flags & UAP_UNROLL))
 840     {
 841       /* We were not asked to, just return back silently.  */
 842       return;
 843     }
 844
 845   if (dump_file)
 846     fprintf (dump_file,
 847              "\n;; Considering unrolling loop with runtime "
 848              "computable number of iterations\n");
 849
 850   /* nunroll = total number of copies of the original loop body in
 851      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 852   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 853   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 854   if (nunroll > nunroll_by_av)
 855     nunroll = nunroll_by_av;
 856   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 857     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 858
 859   /* Skip big loops.  */
 860   if (nunroll <= 1)
 861     {
 862       if (dump_file)
 863         fprintf (dump_file, ";; Not considering loop, is too big\n");
 864       return;
 865     }
 866
 867   /* Check for simple loops.  */
 868   desc = get_simple_loop_desc (loop);
 869
 870   /* Check simpleness.  */
 871   if (!desc->simple_p || desc->assumptions)
 872     {
 873       if (dump_file)
 874         fprintf (dump_file,
 875                  ";; Unable to prove that the number of iterations "
 876                  "can be counted in runtime\n");
 877       return;
 878     }
 879
 880   if (desc->const_iter)
 881     {
 882       if (dump_file)
 883         fprintf (dump_file, ";; Loop iterates constant times\n");
 884       return;
 885     }
 886
 887   /* If we have profile feedback, check whether the loop rolls.  */
 888   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
 889     {
 890       if (dump_file)
 891         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 892       return;
 893     }
 894
 895   /* Success; now force nunroll to be power of 2, as we are unable to
 896      cope with overflows in computation of number of iterations.  */
 897   for (i = 1; 2 * i <= nunroll; i *= 2)
 898     continue;
 899
 900   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 901   loop->lpt_decision.times = i - 1;
 902
 903   if (dump_file)
 904     fprintf (dump_file,
 905              ";; Decided to unroll the runtime computable "
 906              "times rolling loop, %d times.\n",
 907              loop->lpt_decision.times);
 908 }
 909
 910 /* Unroll LOOP for that we are able to count number of iterations in runtime
 911    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
 912    extra care for case n < 0):
 913
 914    for (i = 0; i < n; i++)
 915      body;
 916
 917    ==>
 918
 919    i = 0;
 920    mod = n % 4;
 921
 922    switch (mod)
 923      {
 924        case 3:
 925          body; i++;
 926        case 2:
 927          body; i++;
 928        case 1:
 929          body; i++;
 930        case 0: ;
 931      }
 932
 933    while (i < n)
 934      {
 935        body; i++;
 936        body; i++;
 937        body; i++;
 938        body; i++;
 939      }
 940    */
 941 static void
 942 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
 943 {
 944   rtx old_niter, niter, init_code, branch_code, tmp;
 945   unsigned i, j, p;
 946   basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
 947   unsigned n_dom_bbs;
 948   sbitmap wont_exit;
 949   int may_exit_copy;
 950   unsigned n_peel, n_remove_edges;
 951   edge *remove_edges, e;
 952   bool extra_zero_check, last_may_exit;
 953   unsigned max_unroll = loop->lpt_decision.times;
 954   struct niter_desc *desc = get_simple_loop_desc (loop);
 955   bool exit_at_end = loop_exit_at_end_p (loop);
 956   struct opt_info *opt_info = NULL;
 957   bool ok;
 958
 959   if (flag_split_ivs_in_unroller
 960       || flag_variable_expansion_in_unroller)
 961     opt_info = analyze_insns_in_loop (loop);
 962
 963   /* Remember blocks whose dominators will have to be updated.  */
 964   dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
 965   n_dom_bbs = 0;
 966
 967   body = get_loop_body (loop);
 968   for (i = 0; i < loop->num_nodes; i++)
 969     {
 970       unsigned nldom;
 971       basic_block *ldom;
 972
 973       nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
 974       for (j = 0; j < nldom; j++)
 975         if (!flow_bb_inside_loop_p (loop, ldom[j]))
 976           dom_bbs[n_dom_bbs++] = ldom[j];
 977
 978       free (ldom);
 979     }
 980   free (body);
 981
 982   if (!exit_at_end)
 983     {
 984       /* Leave exit in first copy (for explanation why see comment in
 985          unroll_loop_constant_iterations).  */
 986       may_exit_copy = 0;
 987       n_peel = max_unroll - 1;
 988       extra_zero_check = true;
 989       last_may_exit = false;
 990     }
 991   else
 992     {
 993       /* Leave exit in last copy (for explanation why see comment in
 994          unroll_loop_constant_iterations).  */
 995       may_exit_copy = max_unroll;
 996       n_peel = max_unroll;
 997       extra_zero_check = false;
 998       last_may_exit = true;
 999     }
1000
1001   /* Get expression for number of iterations.  */
1002   start_sequence ();
1003   old_niter = niter = gen_reg_rtx (desc->mode);
1004   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
1005   if (tmp != niter)
1006     emit_move_insn (niter, tmp);
1007
1008   /* Count modulo by ANDing it with max_unroll; we use the fact that
1009      the number of unrollings is a power of two, and thus this is correct
1010      even if there is overflow in the computation.  */
1011   niter = expand_simple_binop (desc->mode, AND,
1012                                niter,
1013                                GEN_INT (max_unroll),
1014                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
1015
1016   init_code = get_insns ();
1017   end_sequence ();
1018
1019   /* Precondition the loop.  */
1020   loop_split_edge_with (loop_preheader_edge (loop), init_code);
1021
1022   remove_edges = XCNEWVEC (edge, max_unroll + n_peel + 1);
1023   n_remove_edges = 0;
1024
1025   wont_exit = sbitmap_alloc (max_unroll + 2);
1026
1027   /* Peel the first copy of loop body (almost always we must leave exit test
1028      here; the only exception is when we have extra zero check and the number
1029      of iterations is reliable.  Also record the place of (possible) extra
1030      zero check.  */
1031   sbitmap_zero (wont_exit);
1032   if (extra_zero_check
1033       && !desc->noloop_assumptions)
1034     SET_BIT (wont_exit, 1);
1035   ezc_swtch = loop_preheader_edge (loop)->src;
1036   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1037                                       loops, 1,
1038                                       wont_exit, desc->out_edge,
1039                                       remove_edges, &n_remove_edges,
1040                                       DLTHE_FLAG_UPDATE_FREQ);
1041   gcc_assert (ok);
1042
1043   /* Record the place where switch will be built for preconditioning.  */
1044   swtch = loop_split_edge_with (loop_preheader_edge (loop),
1045                                 NULL_RTX);
1046
1047   for (i = 0; i < n_peel; i++)
1048     {
1049       /* Peel the copy.  */
1050       sbitmap_zero (wont_exit);
1051       if (i != n_peel - 1 || !last_may_exit)
1052         SET_BIT (wont_exit, 1);
1053       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1054                                           loops, 1,
1055                                           wont_exit, desc->out_edge,
1056                                           remove_edges, &n_remove_edges,
1057                                           DLTHE_FLAG_UPDATE_FREQ);
1058       gcc_assert (ok);
1059
1060       /* Create item for switch.  */
1061       j = n_peel - i - (extra_zero_check ? 0 : 1);
1062       p = REG_BR_PROB_BASE / (i + 2);
1063
1064       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1065       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1066                                           block_label (preheader), p,
1067                                           NULL_RTX);
1068
1069       swtch = loop_split_edge_with (single_pred_edge (swtch), branch_code);
1070       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1071       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1072       e = make_edge (swtch, preheader,
1073                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1074       e->probability = p;
1075     }
1076
1077   if (extra_zero_check)
1078     {
1079       /* Add branch for zero iterations.  */
1080       p = REG_BR_PROB_BASE / (max_unroll + 1);
1081       swtch = ezc_swtch;
1082       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1083       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1084                                           block_label (preheader), p,
1085                                           NULL_RTX);
1086
1087       swtch = loop_split_edge_with (single_succ_edge (swtch), branch_code);
1088       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1089       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1090       e = make_edge (swtch, preheader,
1091                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1092       e->probability = p;
1093     }
1094
1095   /* Recount dominators for outer blocks.  */
1096   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1097
1098   /* And unroll loop.  */
1099
1100   sbitmap_ones (wont_exit);
1101   RESET_BIT (wont_exit, may_exit_copy);
1102   opt_info_start_duplication (opt_info);
1103
1104   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1105                                       loops, max_unroll,
1106                                       wont_exit, desc->out_edge,
1107                                       remove_edges, &n_remove_edges,
1108                                       DLTHE_FLAG_UPDATE_FREQ
1109                                       | (opt_info
1110                                          ? DLTHE_RECORD_COPY_NUMBER
1111                                            : 0));
1112   gcc_assert (ok);
1113
1114   if (opt_info)
1115     {
1116       apply_opt_in_copies (opt_info, max_unroll, true, true);
1117       free_opt_info (opt_info);
1118     }
1119
1120   free (wont_exit);
1121
1122   if (exit_at_end)
1123     {
1124       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1125       /* Find a new in and out edge; they are in the last copy we have
1126          made.  */
1127
1128       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1129         {
1130           desc->out_edge = EDGE_SUCC (exit_block, 0);
1131           desc->in_edge = EDGE_SUCC (exit_block, 1);
1132         }
1133       else
1134         {
1135           desc->out_edge = EDGE_SUCC (exit_block, 1);
1136           desc->in_edge = EDGE_SUCC (exit_block, 0);
1137         }
1138     }
1139
1140   /* Remove the edges.  */
1141   for (i = 0; i < n_remove_edges; i++)
1142     remove_path (loops, remove_edges[i]);
1143   free (remove_edges);
1144
1145   /* We must be careful when updating the number of iterations due to
1146      preconditioning and the fact that the value must be valid at entry
1147      of the loop.  After passing through the above code, we see that
1148      the correct new number of iterations is this:  */
1149   gcc_assert (!desc->const_iter);
1150   desc->niter_expr =
1151     simplify_gen_binary (UDIV, desc->mode, old_niter,
1152                          GEN_INT (max_unroll + 1));
1153   desc->niter_max /= max_unroll + 1;
1154   if (exit_at_end)
1155     {
1156       desc->niter_expr =
1157         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1158       desc->noloop_assumptions = NULL_RTX;
1159       desc->niter_max--;
1160     }
1161
1162   if (dump_file)
1163     fprintf (dump_file,
1164              ";; Unrolled loop %d times, counting # of iterations "
1165              "in runtime, %i insns\n",
1166              max_unroll, num_loop_insns (loop));
1167
1168   if (dom_bbs)
1169     free (dom_bbs);
1170 }
1171
1172 /* Decide whether to simply peel LOOP and how much.  */
1173 static void
1174 decide_peel_simple (struct loop *loop, int flags)
1175 {
1176   unsigned npeel;
1177   struct niter_desc *desc;
1178
1179   if (!(flags & UAP_PEEL))
1180     {
1181       /* We were not asked to, just return back silently.  */
1182       return;
1183     }
1184
1185   if (dump_file)
1186     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1187
1188   /* npeel = number of iterations to peel.  */
1189   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1190   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1191     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1192
1193   /* Skip big loops.  */
1194   if (!npeel)
1195     {
1196       if (dump_file)
1197         fprintf (dump_file, ";; Not considering loop, is too big\n");
1198       return;
1199     }
1200
1201   /* Check for simple loops.  */
1202   desc = get_simple_loop_desc (loop);
1203
1204   /* Check number of iterations.  */
1205   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1206     {
1207       if (dump_file)
1208         fprintf (dump_file, ";; Loop iterates constant times\n");
1209       return;
1210     }
1211
1212   /* Do not simply peel loops with branches inside -- it increases number
1213      of mispredicts.  */
1214   if (num_loop_branches (loop) > 1)
1215     {
1216       if (dump_file)
1217         fprintf (dump_file, ";; Not peeling, contains branches\n");
1218       return;
1219     }
1220
1221   if (loop->header->count)
1222     {
1223       unsigned niter = expected_loop_iterations (loop);
1224       if (niter + 1 > npeel)
1225         {
1226           if (dump_file)
1227             {
1228               fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1229               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1230                        (HOST_WIDEST_INT) (niter + 1));
1231               fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1232                        npeel);
1233             }
1234           return;
1235         }
1236       npeel = niter + 1;
1237     }
1238   else
1239     {
1240       /* For now we have no good heuristics to decide whether loop peeling
1241          will be effective, so disable it.  */
1242       if (dump_file)
1243         fprintf (dump_file,
1244                  ";; Not peeling loop, no evidence it will be profitable\n");
1245       return;
1246     }
1247
1248   /* Success.  */
1249   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1250   loop->lpt_decision.times = npeel;
1251
1252   if (dump_file)
1253     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1254              loop->lpt_decision.times);
1255 }
1256
1257 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1258    while (cond)
1259      body;
1260
1261    ==>
1262
1263    if (!cond) goto end;
1264    body;
1265    if (!cond) goto end;
1266    body;
1267    while (cond)
1268      body;
1269    end: ;
1270    */
1271 static void
1272 peel_loop_simple (struct loops *loops, struct loop *loop)
1273 {
1274   sbitmap wont_exit;
1275   unsigned npeel = loop->lpt_decision.times;
1276   struct niter_desc *desc = get_simple_loop_desc (loop);
1277   struct opt_info *opt_info = NULL;
1278   bool ok;
1279
1280   if (flag_split_ivs_in_unroller && npeel > 1)
1281     opt_info = analyze_insns_in_loop (loop);
1282
1283   wont_exit = sbitmap_alloc (npeel + 1);
1284   sbitmap_zero (wont_exit);
1285
1286   opt_info_start_duplication (opt_info);
1287
1288   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1289                                       loops, npeel, wont_exit,
1290                                       NULL, NULL,
1291                                       NULL, DLTHE_FLAG_UPDATE_FREQ
1292                                       | (opt_info
1293                                          ? DLTHE_RECORD_COPY_NUMBER
1294                                            : 0));
1295   gcc_assert (ok);
1296
1297   free (wont_exit);
1298
1299   if (opt_info)
1300     {
1301       apply_opt_in_copies (opt_info, npeel, false, false);
1302       free_opt_info (opt_info);
1303     }
1304
1305   if (desc->simple_p)
1306     {
1307       if (desc->const_iter)
1308         {
1309           desc->niter -= npeel;
1310           desc->niter_expr = GEN_INT (desc->niter);
1311           desc->noloop_assumptions = NULL_RTX;
1312         }
1313       else
1314         {
1315           /* We cannot just update niter_expr, as its value might be clobbered
1316              inside loop.  We could handle this by counting the number into
1317              temporary just like we do in runtime unrolling, but it does not
1318              seem worthwhile.  */
1319           free_simple_loop_desc (loop);
1320         }
1321     }
1322   if (dump_file)
1323     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1324 }
1325
1326 /* Decide whether to unroll LOOP stupidly and how much.  */
1327 static void
1328 decide_unroll_stupid (struct loop *loop, int flags)
1329 {
1330   unsigned nunroll, nunroll_by_av, i;
1331   struct niter_desc *desc;
1332
1333   if (!(flags & UAP_UNROLL_ALL))
1334     {
1335       /* We were not asked to, just return back silently.  */
1336       return;
1337     }
1338
1339   if (dump_file)
1340     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1341
1342   /* nunroll = total number of copies of the original loop body in
1343      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1344   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1345   nunroll_by_av
1346     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1347   if (nunroll > nunroll_by_av)
1348     nunroll = nunroll_by_av;
1349   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1350     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1351
1352   /* Skip big loops.  */
1353   if (nunroll <= 1)
1354     {
1355       if (dump_file)
1356         fprintf (dump_file, ";; Not considering loop, is too big\n");
1357       return;
1358     }
1359
1360   /* Check for simple loops.  */
1361   desc = get_simple_loop_desc (loop);
1362
1363   /* Check simpleness.  */
1364   if (desc->simple_p && !desc->assumptions)
1365     {
1366       if (dump_file)
1367         fprintf (dump_file, ";; The loop is simple\n");
1368       return;
1369     }
1370
1371   /* Do not unroll loops with branches inside -- it increases number
1372      of mispredicts.  */
1373   if (num_loop_branches (loop) > 1)
1374     {
1375       if (dump_file)
1376         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1377       return;
1378     }
1379
1380   /* If we have profile feedback, check whether the loop rolls.  */
1381   if (loop->header->count
1382       && expected_loop_iterations (loop) < 2 * nunroll)
1383     {
1384       if (dump_file)
1385         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1386       return;
1387     }
1388
1389   /* Success.  Now force nunroll to be power of 2, as it seems that this
1390      improves results (partially because of better alignments, partially
1391      because of some dark magic).  */
1392   for (i = 1; 2 * i <= nunroll; i *= 2)
1393     continue;
1394
1395   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1396   loop->lpt_decision.times = i - 1;
1397
1398   if (dump_file)
1399     fprintf (dump_file,
1400              ";; Decided to unroll the loop stupidly, %d times.\n",
1401              loop->lpt_decision.times);
1402 }
1403
1404 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1405    while (cond)
1406      body;
1407
1408    ==>
1409
1410    while (cond)
1411      {
1412        body;
1413        if (!cond) break;
1414        body;
1415        if (!cond) break;
1416        body;
1417        if (!cond) break;
1418        body;
1419      }
1420    */
1421 static void
1422 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1423 {
1424   sbitmap wont_exit;
1425   unsigned nunroll = loop->lpt_decision.times;
1426   struct niter_desc *desc = get_simple_loop_desc (loop);
1427   struct opt_info *opt_info = NULL;
1428   bool ok;
1429
1430   if (flag_split_ivs_in_unroller
1431       || flag_variable_expansion_in_unroller)
1432     opt_info = analyze_insns_in_loop (loop);
1433
1434
1435   wont_exit = sbitmap_alloc (nunroll + 1);
1436   sbitmap_zero (wont_exit);
1437   opt_info_start_duplication (opt_info);
1438
1439   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1440                                       loops, nunroll, wont_exit,
1441                                       NULL, NULL, NULL,
1442                                       DLTHE_FLAG_UPDATE_FREQ
1443                                       | (opt_info
1444                                          ? DLTHE_RECORD_COPY_NUMBER
1445                                            : 0));
1446   gcc_assert (ok);
1447
1448   if (opt_info)
1449     {
1450       apply_opt_in_copies (opt_info, nunroll, true, true);
1451       free_opt_info (opt_info);
1452     }
1453
1454   free (wont_exit);
1455
1456   if (desc->simple_p)
1457     {
1458       /* We indeed may get here provided that there are nontrivial assumptions
1459          for a loop to be really simple.  We could update the counts, but the
1460          problem is that we are unable to decide which exit will be taken
1461          (not really true in case the number of iterations is constant,
1462          but noone will do anything with this information, so we do not
1463          worry about it).  */
1464       desc->simple_p = false;
1465     }
1466
1467   if (dump_file)
1468     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1469              nunroll, num_loop_insns (loop));
1470 }
1471
1472 /* A hash function for information about insns to split.  */
1473
1474 static hashval_t
1475 si_info_hash (const void *ivts)
1476 {
1477   return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
1478 }
1479
1480 /* An equality functions for information about insns to split.  */
1481
1482 static int
1483 si_info_eq (const void *ivts1, const void *ivts2)
1484 {
1485   const struct iv_to_split *i1 = ivts1;
1486   const struct iv_to_split *i2 = ivts2;
1487
1488   return i1->insn == i2->insn;
1489 }
1490
1491 /* Return a hash for VES, which is really a "var_to_expand *".  */
1492
1493 static hashval_t
1494 ve_info_hash (const void *ves)
1495 {
1496   return htab_hash_pointer (((struct var_to_expand *) ves)->insn);
1497 }
1498
1499 /* Return true if IVTS1 and IVTS2 (which are really both of type
1500    "var_to_expand *") refer to the same instruction.  */
1501
1502 static int
1503 ve_info_eq (const void *ivts1, const void *ivts2)
1504 {
1505   const struct var_to_expand *i1 = ivts1;
1506   const struct var_to_expand *i2 = ivts2;
1507
1508   return i1->insn == i2->insn;
1509 }
1510
1511 /* Returns true if REG is referenced in one insn in LOOP.  */
1512
1513 bool
1514 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1515 {
1516   basic_block *body, bb;
1517   unsigned i;
1518   int count_ref = 0;
1519   rtx insn;
1520
1521   body = get_loop_body (loop);
1522   for (i = 0; i < loop->num_nodes; i++)
1523     {
1524       bb = body[i];
1525
1526       FOR_BB_INSNS (bb, insn)
1527       {
1528         if (rtx_referenced_p (reg, insn))
1529           count_ref++;
1530       }
1531     }
1532   return (count_ref  == 1);
1533 }
1534
1535 /* Determine whether INSN contains an accumulator
1536    which can be expanded into separate copies,
1537    one for each copy of the LOOP body.
1538
1539    for (i = 0 ; i < n; i++)
1540      sum += a[i];
1541
1542    ==>
1543
1544    sum += a[i]
1545    ....
1546    i = i+1;
1547    sum1 += a[i]
1548    ....
1549    i = i+1
1550    sum2 += a[i];
1551    ....
1552
1553    Return NULL if INSN contains no opportunity for expansion of accumulator.
1554    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1555    information and return a pointer to it.
1556 */
1557
1558 static struct var_to_expand *
1559 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1560 {
1561   rtx set, dest, src, op1;
1562   struct var_to_expand *ves;
1563   enum machine_mode mode1, mode2;
1564
1565   set = single_set (insn);
1566   if (!set)
1567     return NULL;
1568
1569   dest = SET_DEST (set);
1570   src = SET_SRC (set);
1571
1572   if (GET_CODE (src) != PLUS
1573       && GET_CODE (src) != MINUS
1574       && GET_CODE (src) != MULT)
1575     return NULL;
1576
1577   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1578      in MD.  But if there is no optab to generate the insn, we can not
1579      perform the variable expansion.  This can happen if an MD provides
1580      an insn but not a named pattern to generate it, for example to avoid
1581      producing code that needs additional mode switches like for x87/mmx.
1582
1583      So we check have_insn_for which looks for an optab for the operation
1584      in SRC.  If it doesn't exist, we can't perform the expansion even
1585      though INSN is valid.  */
1586   if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
1587     return NULL;
1588
1589   if (!XEXP (src, 0))
1590     return NULL;
1591
1592   op1 = XEXP (src, 0);
1593
1594   if (!REG_P (dest)
1595       && !(GET_CODE (dest) == SUBREG
1596            && REG_P (SUBREG_REG (dest))))
1597     return NULL;
1598
1599   if (!rtx_equal_p (dest, op1))
1600     return NULL;
1601
1602   if (!referenced_in_one_insn_in_loop_p (loop, dest))
1603     return NULL;
1604
1605   if (rtx_referenced_p (dest, XEXP (src, 1)))
1606     return NULL;
1607
1608   mode1 = GET_MODE (dest);
1609   mode2 = GET_MODE (XEXP (src, 1));
1610   if ((FLOAT_MODE_P (mode1)
1611        || FLOAT_MODE_P (mode2))
1612       && !flag_unsafe_math_optimizations)
1613     return NULL;
1614
1615   /* Record the accumulator to expand.  */
1616   ves = XNEW (struct var_to_expand);
1617   ves->insn = insn;
1618   ves->var_expansions = VEC_alloc (rtx, heap, 1);
1619   ves->reg = copy_rtx (dest);
1620   ves->op = GET_CODE (src);
1621   ves->expansion_count = 0;
1622   ves->reuse_expansion = 0;
1623   return ves;
1624 }
1625
1626 /* Determine whether there is an induction variable in INSN that
1627    we would like to split during unrolling.
1628
1629    I.e. replace
1630
1631    i = i + 1;
1632    ...
1633    i = i + 1;
1634    ...
1635    i = i + 1;
1636    ...
1637
1638    type chains by
1639
1640    i0 = i + 1
1641    ...
1642    i = i0 + 1
1643    ...
1644    i = i0 + 2
1645    ...
1646
1647    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1648    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1649    pointer to it.  */
1650
1651 static struct iv_to_split *
1652 analyze_iv_to_split_insn (rtx insn)
1653 {
1654   rtx set, dest;
1655   struct rtx_iv iv;
1656   struct iv_to_split *ivts;
1657   bool ok;
1658
1659   /* For now we just split the basic induction variables.  Later this may be
1660      extended for example by selecting also addresses of memory references.  */
1661   set = single_set (insn);
1662   if (!set)
1663     return NULL;
1664
1665   dest = SET_DEST (set);
1666   if (!REG_P (dest))
1667     return NULL;
1668
1669   if (!biv_p (insn, dest))
1670     return NULL;
1671
1672   ok = iv_analyze_result (insn, dest, &iv);
1673
1674   /* This used to be an assert under the assumption that if biv_p returns
1675      true that iv_analyze_result must also return true.  However, that
1676      assumption is not strictly correct as evidenced by pr25569.
1677
1678      Returning NULL when iv_analyze_result returns false is safe and
1679      avoids the problems in pr25569 until the iv_analyze_* routines
1680      can be fixed, which is apparently hard and time consuming
1681      according to their author.  */
1682   if (! ok)
1683     return NULL;
1684
1685   if (iv.step == const0_rtx
1686       || iv.mode != iv.extend_mode)
1687     return NULL;
1688
1689   /* Record the insn to split.  */
1690   ivts = XNEW (struct iv_to_split);
1691   ivts->insn = insn;
1692   ivts->base_var = NULL_RTX;
1693   ivts->step = iv.step;
1694   ivts->n_loc = 1;
1695   ivts->loc[0] = 1;
1696
1697   return ivts;
1698 }
1699
1700 /* Determines which of insns in LOOP can be optimized.
1701    Return a OPT_INFO struct with the relevant hash tables filled
1702    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1703    is undefined for the return value.  */
1704
1705 static struct opt_info *
1706 analyze_insns_in_loop (struct loop *loop)
1707 {
1708   basic_block *body, bb;
1709   unsigned i, num_edges = 0;
1710   struct opt_info *opt_info = XCNEW (struct opt_info);
1711   rtx insn;
1712   struct iv_to_split *ivts = NULL;
1713   struct var_to_expand *ves = NULL;
1714   PTR *slot1;
1715   PTR *slot2;
1716   edge *edges = get_loop_exit_edges (loop, &num_edges);
1717   bool can_apply = false;
1718
1719   iv_analysis_loop_init (loop);
1720
1721   body = get_loop_body (loop);
1722
1723   if (flag_split_ivs_in_unroller)
1724     opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1725                                             si_info_hash, si_info_eq, free);
1726
1727   /* Record the loop exit bb and loop preheader before the unrolling.  */
1728   if (!loop_preheader_edge (loop)->src)
1729     {
1730       loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1731       opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1732     }
1733   else
1734     opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1735
1736   if (num_edges == 1
1737       && !(edges[0]->flags & EDGE_COMPLEX))
1738     {
1739       opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
1740       can_apply = true;
1741     }
1742
1743   if (flag_variable_expansion_in_unroller
1744       && can_apply)
1745     opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1746                                                       ve_info_hash, ve_info_eq, free);
1747
1748   for (i = 0; i < loop->num_nodes; i++)
1749     {
1750       bb = body[i];
1751       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1752         continue;
1753
1754       FOR_BB_INSNS (bb, insn)
1755       {
1756         if (!INSN_P (insn))
1757           continue;
1758
1759         if (opt_info->insns_to_split)
1760           ivts = analyze_iv_to_split_insn (insn);
1761
1762         if (ivts)
1763           {
1764             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1765             *slot1 = ivts;
1766             continue;
1767           }
1768
1769         if (opt_info->insns_with_var_to_expand)
1770           ves = analyze_insn_to_expand_var (loop, insn);
1771
1772         if (ves)
1773           {
1774             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1775             *slot2 = ves;
1776           }
1777       }
1778     }
1779
1780   free (edges);
1781   free (body);
1782   return opt_info;
1783 }
1784
1785 /* Called just before loop duplication.  Records start of duplicated area
1786    to OPT_INFO.  */
1787
1788 static void
1789 opt_info_start_duplication (struct opt_info *opt_info)
1790 {
1791   if (opt_info)
1792     opt_info->first_new_block = last_basic_block;
1793 }
1794
1795 /* Determine the number of iterations between initialization of the base
1796    variable and the current copy (N_COPY).  N_COPIES is the total number
1797    of newly created copies.  UNROLLING is true if we are unrolling
1798    (not peeling) the loop.  */
1799
1800 static unsigned
1801 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1802 {
1803   if (unrolling)
1804     {
1805       /* If we are unrolling, initialization is done in the original loop
1806          body (number 0).  */
1807       return n_copy;
1808     }
1809   else
1810     {
1811       /* If we are peeling, the copy in that the initialization occurs has
1812          number 1.  The original loop (number 0) is the last.  */
1813       if (n_copy)
1814         return n_copy - 1;
1815       else
1816         return n_copies;
1817     }
1818 }
1819
1820 /* Locate in EXPR the expression corresponding to the location recorded
1821    in IVTS, and return a pointer to the RTX for this location.  */
1822
1823 static rtx *
1824 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1825 {
1826   unsigned i;
1827   rtx *ret = &expr;
1828
1829   for (i = 0; i < ivts->n_loc; i++)
1830     ret = &XEXP (*ret, ivts->loc[i]);
1831
1832   return ret;
1833 }
1834
1835 /* Allocate basic variable for the induction variable chain.  Callback for
1836    htab_traverse.  */
1837
1838 static int
1839 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1840 {
1841   struct iv_to_split *ivts = *slot;
1842   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1843
1844   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1845
1846   return 1;
1847 }
1848
1849 /* Insert initialization of basic variable of IVTS before INSN, taking
1850    the initial value from INSN.  */
1851
1852 static void
1853 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1854 {
1855   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1856   rtx seq;
1857
1858   start_sequence ();
1859   expr = force_operand (expr, ivts->base_var);
1860   if (expr != ivts->base_var)
1861     emit_move_insn (ivts->base_var, expr);
1862   seq = get_insns ();
1863   end_sequence ();
1864
1865   emit_insn_before (seq, insn);
1866 }
1867
1868 /* Replace the use of induction variable described in IVTS in INSN
1869    by base variable + DELTA * step.  */
1870
1871 static void
1872 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1873 {
1874   rtx expr, *loc, seq, incr, var;
1875   enum machine_mode mode = GET_MODE (ivts->base_var);
1876   rtx src, dest, set;
1877
1878   /* Construct base + DELTA * step.  */
1879   if (!delta)
1880     expr = ivts->base_var;
1881   else
1882     {
1883       incr = simplify_gen_binary (MULT, mode,
1884                                   ivts->step, gen_int_mode (delta, mode));
1885       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1886                                   ivts->base_var, incr);
1887     }
1888
1889   /* Figure out where to do the replacement.  */
1890   loc = get_ivts_expr (single_set (insn), ivts);
1891
1892   /* If we can make the replacement right away, we're done.  */
1893   if (validate_change (insn, loc, expr, 0))
1894     return;
1895
1896   /* Otherwise, force EXPR into a register and try again.  */
1897   start_sequence ();
1898   var = gen_reg_rtx (mode);
1899   expr = force_operand (expr, var);
1900   if (expr != var)
1901     emit_move_insn (var, expr);
1902   seq = get_insns ();
1903   end_sequence ();
1904   emit_insn_before (seq, insn);
1905
1906   if (validate_change (insn, loc, var, 0))
1907     return;
1908
1909   /* The last chance.  Try recreating the assignment in insn
1910      completely from scratch.  */
1911   set = single_set (insn);
1912   gcc_assert (set);
1913
1914   start_sequence ();
1915   *loc = var;
1916   src = copy_rtx (SET_SRC (set));
1917   dest = copy_rtx (SET_DEST (set));
1918   src = force_operand (src, dest);
1919   if (src != dest)
1920     emit_move_insn (dest, src);
1921   seq = get_insns ();
1922   end_sequence ();
1923
1924   emit_insn_before (seq, insn);
1925   delete_insn (insn);
1926 }
1927
1928
1929 /* Return one expansion of the accumulator recorded in struct VE.  */
1930
1931 static rtx
1932 get_expansion (struct var_to_expand *ve)
1933 {
1934   rtx reg;
1935
1936   if (ve->reuse_expansion == 0)
1937     reg = ve->reg;
1938   else
1939     reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1940
1941   if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1942     ve->reuse_expansion = 0;
1943   else
1944     ve->reuse_expansion++;
1945
1946   return reg;
1947 }
1948
1949
1950 /* Given INSN replace the uses of the accumulator recorded in VE
1951    with a new register.  */
1952
1953 static void
1954 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1955 {
1956   rtx new_reg, set;
1957   bool really_new_expansion = false;
1958
1959   set = single_set (insn);
1960   gcc_assert (set);
1961
1962   /* Generate a new register only if the expansion limit has not been
1963      reached.  Else reuse an already existing expansion.  */
1964   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1965     {
1966       really_new_expansion = true;
1967       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1968     }
1969   else
1970     new_reg = get_expansion (ve);
1971
1972   validate_change (insn, &SET_DEST (set), new_reg, 1);
1973   validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1974
1975   if (apply_change_group ())
1976     if (really_new_expansion)
1977       {
1978         VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
1979         ve->expansion_count++;
1980       }
1981 }
1982
1983 /* Initialize the variable expansions in loop preheader.
1984    Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1985    basic block where the initialization of the expansions
1986    should take place.  */
1987
1988 static int
1989 insert_var_expansion_initialization (void **slot, void *place_p)
1990 {
1991   struct var_to_expand *ve = *slot;
1992   basic_block place = (basic_block)place_p;
1993   rtx seq, var, zero_init, insn;
1994   unsigned i;
1995
1996   if (VEC_length (rtx, ve->var_expansions) == 0)
1997     return 1;
1998
1999   start_sequence ();
2000   if (ve->op == PLUS || ve->op == MINUS)
2001     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2002       {
2003         zero_init =  CONST0_RTX (GET_MODE (var));
2004         emit_move_insn (var, zero_init);
2005       }
2006   else if (ve->op == MULT)
2007     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2008       {
2009         zero_init =  CONST1_RTX (GET_MODE (var));
2010         emit_move_insn (var, zero_init);
2011       }
2012
2013   seq = get_insns ();
2014   end_sequence ();
2015
2016   insn = BB_HEAD (place);
2017   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2018     insn = NEXT_INSN (insn);
2019
2020   emit_insn_after (seq, insn);
2021   /* Continue traversing the hash table.  */
2022   return 1;
2023 }
2024
2025 /*  Combine the variable expansions at the loop exit.
2026     Callbacks for htab_traverse.  PLACE_P is the loop exit
2027     basic block where the summation of the expansions should
2028     take place.  */
2029
2030 static int
2031 combine_var_copies_in_loop_exit (void **slot, void *place_p)
2032 {
2033   struct var_to_expand *ve = *slot;
2034   basic_block place = (basic_block)place_p;
2035   rtx sum = ve->reg;
2036   rtx expr, seq, var, insn;
2037   unsigned i;
2038
2039   if (VEC_length (rtx, ve->var_expansions) == 0)
2040     return 1;
2041
2042   start_sequence ();
2043   if (ve->op == PLUS || ve->op == MINUS)
2044     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2045       {
2046         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2047                                    var, sum);
2048       }
2049   else if (ve->op == MULT)
2050     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2051       {
2052         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2053                                    var, sum);
2054       }
2055
2056   expr = force_operand (sum, ve->reg);
2057   if (expr != ve->reg)
2058     emit_move_insn (ve->reg, expr);
2059   seq = get_insns ();
2060   end_sequence ();
2061
2062   insn = BB_HEAD (place);
2063   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2064     insn = NEXT_INSN (insn);
2065
2066   emit_insn_after (seq, insn);
2067
2068   /* Continue traversing the hash table.  */
2069   return 1;
2070 }
2071
2072 /* Apply loop optimizations in loop copies using the
2073    data which gathered during the unrolling.  Structure
2074    OPT_INFO record that data.
2075
2076    UNROLLING is true if we unrolled (not peeled) the loop.
2077    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2078    the loop (as it should happen in complete unrolling, but not in ordinary
2079    peeling of the loop).  */
2080
2081 static void
2082 apply_opt_in_copies (struct opt_info *opt_info,
2083                      unsigned n_copies, bool unrolling,
2084                      bool rewrite_original_loop)
2085 {
2086   unsigned i, delta;
2087   basic_block bb, orig_bb;
2088   rtx insn, orig_insn, next;
2089   struct iv_to_split ivts_templ, *ivts;
2090   struct var_to_expand ve_templ, *ves;
2091
2092   /* Sanity check -- we need to put initialization in the original loop
2093      body.  */
2094   gcc_assert (!unrolling || rewrite_original_loop);
2095
2096   /* Allocate the basic variables (i0).  */
2097   if (opt_info->insns_to_split)
2098     htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2099
2100   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2101     {
2102       bb = BASIC_BLOCK (i);
2103       orig_bb = get_bb_original (bb);
2104
2105       /* bb->aux holds position in copy sequence initialized by
2106          duplicate_loop_to_header_edge.  */
2107       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2108                                         unrolling);
2109       bb->aux = 0;
2110       orig_insn = BB_HEAD (orig_bb);
2111       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2112         {
2113           next = NEXT_INSN (insn);
2114           if (!INSN_P (insn))
2115             continue;
2116
2117           while (!INSN_P (orig_insn))
2118             orig_insn = NEXT_INSN (orig_insn);
2119
2120           ivts_templ.insn = orig_insn;
2121           ve_templ.insn = orig_insn;
2122
2123           /* Apply splitting iv optimization.  */
2124           if (opt_info->insns_to_split)
2125             {
2126               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2127
2128               if (ivts)
2129                 {
2130                   gcc_assert (GET_CODE (PATTERN (insn))
2131                               == GET_CODE (PATTERN (orig_insn)));
2132
2133                   if (!delta)
2134                     insert_base_initialization (ivts, insn);
2135                   split_iv (ivts, insn, delta);
2136                 }
2137             }
2138           /* Apply variable expansion optimization.  */
2139           if (unrolling && opt_info->insns_with_var_to_expand)
2140             {
2141               ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2142               if (ves)
2143                 {
2144                   gcc_assert (GET_CODE (PATTERN (insn))
2145                               == GET_CODE (PATTERN (orig_insn)));
2146                   expand_var_during_unrolling (ves, insn);
2147                 }
2148             }
2149           orig_insn = NEXT_INSN (orig_insn);
2150         }
2151     }
2152
2153   if (!rewrite_original_loop)
2154     return;
2155
2156   /* Initialize the variable expansions in the loop preheader
2157      and take care of combining them at the loop exit.  */
2158   if (opt_info->insns_with_var_to_expand)
2159     {
2160       htab_traverse (opt_info->insns_with_var_to_expand,
2161                      insert_var_expansion_initialization,
2162                      opt_info->loop_preheader);
2163       htab_traverse (opt_info->insns_with_var_to_expand,
2164                      combine_var_copies_in_loop_exit,
2165                      opt_info->loop_exit);
2166     }
2167
2168   /* Rewrite also the original loop body.  Find them as originals of the blocks
2169      in the last copied iteration, i.e. those that have
2170      get_bb_copy (get_bb_original (bb)) == bb.  */
2171   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2172     {
2173       bb = BASIC_BLOCK (i);
2174       orig_bb = get_bb_original (bb);
2175       if (get_bb_copy (orig_bb) != bb)
2176         continue;
2177
2178       delta = determine_split_iv_delta (0, n_copies, unrolling);
2179       for (orig_insn = BB_HEAD (orig_bb);
2180            orig_insn != NEXT_INSN (BB_END (bb));
2181            orig_insn = next)
2182         {
2183           next = NEXT_INSN (orig_insn);
2184
2185           if (!INSN_P (orig_insn))
2186             continue;
2187
2188           ivts_templ.insn = orig_insn;
2189           if (opt_info->insns_to_split)
2190             {
2191               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2192               if (ivts)
2193                 {
2194                   if (!delta)
2195                     insert_base_initialization (ivts, orig_insn);
2196                   split_iv (ivts, orig_insn, delta);
2197                   continue;
2198                 }
2199             }
2200
2201         }
2202     }
2203 }
2204
2205 /*  Release the data structures used for the variable expansion
2206     optimization.  Callbacks for htab_traverse.  */
2207
2208 static int
2209 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2210 {
2211   struct var_to_expand *ve = *slot;
2212
2213   VEC_free (rtx, heap, ve->var_expansions);
2214
2215   /* Continue traversing the hash table.  */
2216   return 1;
2217 }
2218
2219 /* Release OPT_INFO.  */
2220
2221 static void
2222 free_opt_info (struct opt_info *opt_info)
2223 {
2224   if (opt_info->insns_to_split)
2225     htab_delete (opt_info->insns_to_split);
2226   if (opt_info->insns_with_var_to_expand)
2227     {
2228       htab_traverse (opt_info->insns_with_var_to_expand,
2229                      release_var_copies, NULL);
2230       htab_delete (opt_info->insns_with_var_to_expand);
2231     }
2232   free (opt_info);
2233 }