gcc/loop-unroll.c

   1 /* Loop unrolling and peeling.
   2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 2, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "basic-block.h"
  29 #include "cfgloop.h"
  30 #include "cfglayout.h"
  31 #include "params.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "hashtab.h"
  35 #include "recog.h"
  36
  37 /* This pass performs loop unrolling and peeling.  We only perform these
  38    optimizations on innermost loops (with single exception) because
  39    the impact on performance is greatest here, and we want to avoid
  40    unnecessary code size growth.  The gain is caused by greater sequentiality
  41    of code, better code to optimize for further passes and in some cases
  42    by fewer testings of exit conditions.  The main problem is code growth,
  43    that impacts performance negatively due to effect of caches.
  44
  45    What we do:
  46
  47    -- complete peeling of once-rolling loops; this is the above mentioned
  48       exception, as this causes loop to be cancelled completely and
  49       does not cause code growth
  50    -- complete peeling of loops that roll (small) constant times.
  51    -- simple peeling of first iterations of loops that do not roll much
  52       (according to profile feedback)
  53    -- unrolling of loops that roll constant times; this is almost always
  54       win, as we get rid of exit condition tests.
  55    -- unrolling of loops that roll number of times that we can compute
  56       in runtime; we also get rid of exit condition tests here, but there
  57       is the extra expense for calculating the number of iterations
  58    -- simple unrolling of remaining loops; this is performed only if we
  59       are asked to, as the gain is questionable in this case and often
  60       it may even slow down the code
  61    For more detailed descriptions of each of those, see comments at
  62    appropriate function below.
  63
  64    There is a lot of parameters (defined and described in params.def) that
  65    control how much we unroll/peel.
  66
  67    ??? A great problem is that we don't have a good way how to determine
  68    how many times we should unroll the loop; the experiments I have made
  69    showed that this choice may affect performance in order of several %.
  70    */
  71
  72 /* Information about induction variables to split.  */
  73
  74 struct iv_to_split
  75 {
  76   rtx insn;             /* The insn in that the induction variable occurs.  */
  77   rtx base_var;         /* The variable on that the values in the further
  78                            iterations are based.  */
  79   rtx step;             /* Step of the induction variable.  */
  80   unsigned n_loc;
  81   unsigned loc[3];      /* Location where the definition of the induction
  82                            variable occurs in the insn.  For example if
  83                            N_LOC is 2, the expression is located at
  84                            XEXP (XEXP (single_set, loc[0]), loc[1]).  */
  85 };
  86
  87 /* Information about accumulators to expand.  */
  88
  89 struct var_to_expand
  90 {
  91   rtx insn;                        /* The insn in that the variable expansion occurs.  */
  92   rtx reg;                         /* The accumulator which is expanded.  */
  93   VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
  94   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  95                                       or multiplication.  */
  96   int expansion_count;             /* Count the number of expansions generated so far.  */
  97   int reuse_expansion;             /* The expansion we intend to reuse to expand
  98                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
  99                                       the original accumulator.  Else use
 100                                       var_expansions[REUSE_EXPANSION - 1].  */
 101 };
 102
 103 /* Information about optimization applied in
 104    the unrolled loop.  */
 105
 106 struct opt_info
 107 {
 108   htab_t insns_to_split;           /* A hashtable of insns to split.  */
 109   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
 110                                       to expand.  */
 111   unsigned first_new_block;        /* The first basic block that was
 112                                       duplicated.  */
 113   basic_block loop_exit;           /* The loop exit basic block.  */
 114   basic_block loop_preheader;      /* The loop preheader basic block.  */
 115 };
 116
 117 static void decide_unrolling_and_peeling (int);
 118 static void peel_loops_completely (int);
 119 static void decide_peel_simple (struct loop *, int);
 120 static void decide_peel_once_rolling (struct loop *, int);
 121 static void decide_peel_completely (struct loop *, int);
 122 static void decide_unroll_stupid (struct loop *, int);
 123 static void decide_unroll_constant_iterations (struct loop *, int);
 124 static void decide_unroll_runtime_iterations (struct loop *, int);
 125 static void peel_loop_simple (struct loop *);
 126 static void peel_loop_completely (struct loop *);
 127 static void unroll_loop_stupid (struct loop *);
 128 static void unroll_loop_constant_iterations (struct loop *);
 129 static void unroll_loop_runtime_iterations (struct loop *);
 130 static struct opt_info *analyze_insns_in_loop (struct loop *);
 131 static void opt_info_start_duplication (struct opt_info *);
 132 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 133 static void free_opt_info (struct opt_info *);
 134 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
 135 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
 136 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
 137 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
 138 static int insert_var_expansion_initialization (void **, void *);
 139 static int combine_var_copies_in_loop_exit (void **, void *);
 140 static int release_var_copies (void **, void *);
 141 static rtx get_expansion (struct var_to_expand *);
 142
 143 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 144 void
 145 unroll_and_peel_loops (int flags)
 146 {
 147   struct loop *loop, *next;
 148   bool check;
 149
 150   /* First perform complete loop peeling (it is almost surely a win,
 151      and affects parameters for further decision a lot).  */
 152   peel_loops_completely (flags);
 153
 154   /* Now decide rest of unrolling and peeling.  */
 155   decide_unrolling_and_peeling (flags);
 156
 157   loop = current_loops->tree_root;
 158   while (loop->inner)
 159     loop = loop->inner;
 160
 161   /* Scan the loops, inner ones first.  */
 162   while (loop != current_loops->tree_root)
 163     {
 164       if (loop->next)
 165         {
 166           next = loop->next;
 167           while (next->inner)
 168             next = next->inner;
 169         }
 170       else
 171         next = loop->outer;
 172
 173       check = true;
 174       /* And perform the appropriate transformations.  */
 175       switch (loop->lpt_decision.decision)
 176         {
 177         case LPT_PEEL_COMPLETELY:
 178           /* Already done.  */
 179           gcc_unreachable ();
 180         case LPT_PEEL_SIMPLE:
 181           peel_loop_simple (loop);
 182           break;
 183         case LPT_UNROLL_CONSTANT:
 184           unroll_loop_constant_iterations (loop);
 185           break;
 186         case LPT_UNROLL_RUNTIME:
 187           unroll_loop_runtime_iterations (loop);
 188           break;
 189         case LPT_UNROLL_STUPID:
 190           unroll_loop_stupid (loop);
 191           break;
 192         case LPT_NONE:
 193           check = false;
 194           break;
 195         default:
 196           gcc_unreachable ();
 197         }
 198       if (check)
 199         {
 200 #ifdef ENABLE_CHECKING
 201           verify_dominators (CDI_DOMINATORS);
 202           verify_loop_structure ();
 203 #endif
 204         }
 205       loop = next;
 206     }
 207
 208   iv_analysis_done ();
 209 }
 210
 211 /* Check whether exit of the LOOP is at the end of loop body.  */
 212
 213 static bool
 214 loop_exit_at_end_p (struct loop *loop)
 215 {
 216   struct niter_desc *desc = get_simple_loop_desc (loop);
 217   rtx insn;
 218
 219   if (desc->in_edge->dest != loop->latch)
 220     return false;
 221
 222   /* Check that the latch is empty.  */
 223   FOR_BB_INSNS (loop->latch, insn)
 224     {
 225       if (INSN_P (insn))
 226         return false;
 227     }
 228
 229   return true;
 230 }
 231
 232 /* Depending on FLAGS, check whether to peel loops completely and do so.  */
 233 static void
 234 peel_loops_completely (int flags)
 235 {
 236   struct loop *loop;
 237   unsigned i;
 238
 239   /* Scan the loops, the inner ones first.  */
 240   for (i = current_loops->num - 1; i > 0; i--)
 241     {
 242       loop = current_loops->parray[i];
 243       if (!loop)
 244         continue;
 245
 246       loop->lpt_decision.decision = LPT_NONE;
 247
 248       if (dump_file)
 249         fprintf (dump_file,
 250                  "\n;; *** Considering loop %d for complete peeling ***\n",
 251                  loop->num);
 252
 253       loop->ninsns = num_loop_insns (loop);
 254
 255       decide_peel_once_rolling (loop, flags);
 256       if (loop->lpt_decision.decision == LPT_NONE)
 257         decide_peel_completely (loop, flags);
 258
 259       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
 260         {
 261           peel_loop_completely (loop);
 262 #ifdef ENABLE_CHECKING
 263           verify_dominators (CDI_DOMINATORS);
 264           verify_loop_structure ();
 265 #endif
 266         }
 267     }
 268 }
 269
 270 /* Decide whether unroll or peel loops (depending on FLAGS) and how much.  */
 271 static void
 272 decide_unrolling_and_peeling (int flags)
 273 {
 274   struct loop *loop = current_loops->tree_root, *next;
 275
 276   while (loop->inner)
 277     loop = loop->inner;
 278
 279   /* Scan the loops, inner ones first.  */
 280   while (loop != current_loops->tree_root)
 281     {
 282       if (loop->next)
 283         {
 284           next = loop->next;
 285           while (next->inner)
 286             next = next->inner;
 287         }
 288       else
 289         next = loop->outer;
 290
 291       loop->lpt_decision.decision = LPT_NONE;
 292
 293       if (dump_file)
 294         fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
 295
 296       /* Do not peel cold areas.  */
 297       if (!maybe_hot_bb_p (loop->header))
 298         {
 299           if (dump_file)
 300             fprintf (dump_file, ";; Not considering loop, cold area\n");
 301           loop = next;
 302           continue;
 303         }
 304
 305       /* Can the loop be manipulated?  */
 306       if (!can_duplicate_loop_p (loop))
 307         {
 308           if (dump_file)
 309             fprintf (dump_file,
 310                      ";; Not considering loop, cannot duplicate\n");
 311           loop = next;
 312           continue;
 313         }
 314
 315       /* Skip non-innermost loops.  */
 316       if (loop->inner)
 317         {
 318           if (dump_file)
 319             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 320           loop = next;
 321           continue;
 322         }
 323
 324       loop->ninsns = num_loop_insns (loop);
 325       loop->av_ninsns = average_num_loop_insns (loop);
 326
 327       /* Try transformations one by one in decreasing order of
 328          priority.  */
 329
 330       decide_unroll_constant_iterations (loop, flags);
 331       if (loop->lpt_decision.decision == LPT_NONE)
 332         decide_unroll_runtime_iterations (loop, flags);
 333       if (loop->lpt_decision.decision == LPT_NONE)
 334         decide_unroll_stupid (loop, flags);
 335       if (loop->lpt_decision.decision == LPT_NONE)
 336         decide_peel_simple (loop, flags);
 337
 338       loop = next;
 339     }
 340 }
 341
 342 /* Decide whether the LOOP is once rolling and suitable for complete
 343    peeling.  */
 344 static void
 345 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 346 {
 347   struct niter_desc *desc;
 348
 349   if (dump_file)
 350     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 351
 352   /* Is the loop small enough?  */
 353   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
 354     {
 355       if (dump_file)
 356         fprintf (dump_file, ";; Not considering loop, is too big\n");
 357       return;
 358     }
 359
 360   /* Check for simple loops.  */
 361   desc = get_simple_loop_desc (loop);
 362
 363   /* Check number of iterations.  */
 364   if (!desc->simple_p
 365       || desc->assumptions
 366       || desc->infinite
 367       || !desc->const_iter
 368       || desc->niter != 0)
 369     {
 370       if (dump_file)
 371         fprintf (dump_file,
 372                  ";; Unable to prove that the loop rolls exactly once\n");
 373       return;
 374     }
 375
 376   /* Success.  */
 377   if (dump_file)
 378     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
 379   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 380 }
 381
 382 /* Decide whether the LOOP is suitable for complete peeling.  */
 383 static void
 384 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 385 {
 386   unsigned npeel;
 387   struct niter_desc *desc;
 388
 389   if (dump_file)
 390     fprintf (dump_file, "\n;; Considering peeling completely\n");
 391
 392   /* Skip non-innermost loops.  */
 393   if (loop->inner)
 394     {
 395       if (dump_file)
 396         fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 397       return;
 398     }
 399
 400   /* Do not peel cold areas.  */
 401   if (!maybe_hot_bb_p (loop->header))
 402     {
 403       if (dump_file)
 404         fprintf (dump_file, ";; Not considering loop, cold area\n");
 405       return;
 406     }
 407
 408   /* Can the loop be manipulated?  */
 409   if (!can_duplicate_loop_p (loop))
 410     {
 411       if (dump_file)
 412         fprintf (dump_file,
 413                  ";; Not considering loop, cannot duplicate\n");
 414       return;
 415     }
 416
 417   /* npeel = number of iterations to peel.  */
 418   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
 419   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 420     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 421
 422   /* Is the loop small enough?  */
 423   if (!npeel)
 424     {
 425       if (dump_file)
 426         fprintf (dump_file, ";; Not considering loop, is too big\n");
 427       return;
 428     }
 429
 430   /* Check for simple loops.  */
 431   desc = get_simple_loop_desc (loop);
 432
 433   /* Check number of iterations.  */
 434   if (!desc->simple_p
 435       || desc->assumptions
 436       || !desc->const_iter
 437       || desc->infinite)
 438     {
 439       if (dump_file)
 440         fprintf (dump_file,
 441                  ";; Unable to prove that the loop iterates constant times\n");
 442       return;
 443     }
 444
 445   if (desc->niter > npeel - 1)
 446     {
 447       if (dump_file)
 448         {
 449           fprintf (dump_file,
 450                    ";; Not peeling loop completely, rolls too much (");
 451           fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
 452           fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
 453         }
 454       return;
 455     }
 456
 457   /* Success.  */
 458   if (dump_file)
 459     fprintf (dump_file, ";; Decided to peel loop completely\n");
 460   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 461 }
 462
 463 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
 464    completely.  The transformation done:
 465
 466    for (i = 0; i < 4; i++)
 467      body;
 468
 469    ==>
 470
 471    i = 0;
 472    body; i++;
 473    body; i++;
 474    body; i++;
 475    body; i++;
 476    */
 477 static void
 478 peel_loop_completely (struct loop *loop)
 479 {
 480   sbitmap wont_exit;
 481   unsigned HOST_WIDE_INT npeel;
 482   unsigned n_remove_edges, i;
 483   edge *remove_edges, ein;
 484   struct niter_desc *desc = get_simple_loop_desc (loop);
 485   struct opt_info *opt_info = NULL;
 486
 487   npeel = desc->niter;
 488
 489   if (npeel)
 490     {
 491       bool ok;
 492
 493       wont_exit = sbitmap_alloc (npeel + 1);
 494       sbitmap_ones (wont_exit);
 495       RESET_BIT (wont_exit, 0);
 496       if (desc->noloop_assumptions)
 497         RESET_BIT (wont_exit, 1);
 498
 499       remove_edges = XCNEWVEC (edge, npeel);
 500       n_remove_edges = 0;
 501
 502       if (flag_split_ivs_in_unroller)
 503         opt_info = analyze_insns_in_loop (loop);
 504
 505       opt_info_start_duplication (opt_info);
 506       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 507                                           npeel,
 508                                           wont_exit, desc->out_edge,
 509                                           remove_edges, &n_remove_edges,
 510                                           DLTHE_FLAG_UPDATE_FREQ
 511                                           | DLTHE_FLAG_COMPLETTE_PEEL
 512                                           | (opt_info
 513                                              ? DLTHE_RECORD_COPY_NUMBER : 0));
 514       gcc_assert (ok);
 515
 516       free (wont_exit);
 517
 518       if (opt_info)
 519         {
 520           apply_opt_in_copies (opt_info, npeel, false, true);
 521           free_opt_info (opt_info);
 522         }
 523
 524       /* Remove the exit edges.  */
 525       for (i = 0; i < n_remove_edges; i++)
 526         remove_path (remove_edges[i]);
 527       free (remove_edges);
 528     }
 529
 530   ein = desc->in_edge;
 531   free_simple_loop_desc (loop);
 532
 533   /* Now remove the unreachable part of the last iteration and cancel
 534      the loop.  */
 535   remove_path (ein);
 536
 537   if (dump_file)
 538     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
 539 }
 540
 541 /* Decide whether to unroll LOOP iterating constant number of times
 542    and how much.  */
 543
 544 static void
 545 decide_unroll_constant_iterations (struct loop *loop, int flags)
 546 {
 547   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 548   struct niter_desc *desc;
 549
 550   if (!(flags & UAP_UNROLL))
 551     {
 552       /* We were not asked to, just return back silently.  */
 553       return;
 554     }
 555
 556   if (dump_file)
 557     fprintf (dump_file,
 558              "\n;; Considering unrolling loop with constant "
 559              "number of iterations\n");
 560
 561   /* nunroll = total number of copies of the original loop body in
 562      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 563   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 564   nunroll_by_av
 565     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 566   if (nunroll > nunroll_by_av)
 567     nunroll = nunroll_by_av;
 568   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 569     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 570
 571   /* Skip big loops.  */
 572   if (nunroll <= 1)
 573     {
 574       if (dump_file)
 575         fprintf (dump_file, ";; Not considering loop, is too big\n");
 576       return;
 577     }
 578
 579   /* Check for simple loops.  */
 580   desc = get_simple_loop_desc (loop);
 581
 582   /* Check number of iterations.  */
 583   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 584     {
 585       if (dump_file)
 586         fprintf (dump_file,
 587                  ";; Unable to prove that the loop iterates constant times\n");
 588       return;
 589     }
 590
 591   /* Check whether the loop rolls enough to consider.  */
 592   if (desc->niter < 2 * nunroll)
 593     {
 594       if (dump_file)
 595         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 596       return;
 597     }
 598
 599   /* Success; now compute number of iterations to unroll.  We alter
 600      nunroll so that as few as possible copies of loop body are
 601      necessary, while still not decreasing the number of unrollings
 602      too much (at most by 1).  */
 603   best_copies = 2 * nunroll + 10;
 604
 605   i = 2 * nunroll + 2;
 606   if (i - 1 >= desc->niter)
 607     i = desc->niter - 2;
 608
 609   for (; i >= nunroll - 1; i--)
 610     {
 611       unsigned exit_mod = desc->niter % (i + 1);
 612
 613       if (!loop_exit_at_end_p (loop))
 614         n_copies = exit_mod + i + 1;
 615       else if (exit_mod != (unsigned) i
 616                || desc->noloop_assumptions != NULL_RTX)
 617         n_copies = exit_mod + i + 2;
 618       else
 619         n_copies = i + 1;
 620
 621       if (n_copies < best_copies)
 622         {
 623           best_copies = n_copies;
 624           best_unroll = i;
 625         }
 626     }
 627
 628   if (dump_file)
 629     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
 630              best_unroll + 1, best_copies, nunroll);
 631
 632   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 633   loop->lpt_decision.times = best_unroll;
 634
 635   if (dump_file)
 636     fprintf (dump_file,
 637              ";; Decided to unroll the constant times rolling loop, %d times.\n",
 638              loop->lpt_decision.times);
 639 }
 640
 641 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
 642    times.  The transformation does this:
 643
 644    for (i = 0; i < 102; i++)
 645      body;
 646
 647    ==>
 648
 649    i = 0;
 650    body; i++;
 651    body; i++;
 652    while (i < 102)
 653      {
 654        body; i++;
 655        body; i++;
 656        body; i++;
 657        body; i++;
 658      }
 659   */
 660 static void
 661 unroll_loop_constant_iterations (struct loop *loop)
 662 {
 663   unsigned HOST_WIDE_INT niter;
 664   unsigned exit_mod;
 665   sbitmap wont_exit;
 666   unsigned n_remove_edges, i;
 667   edge *remove_edges;
 668   unsigned max_unroll = loop->lpt_decision.times;
 669   struct niter_desc *desc = get_simple_loop_desc (loop);
 670   bool exit_at_end = loop_exit_at_end_p (loop);
 671   struct opt_info *opt_info = NULL;
 672   bool ok;
 673
 674   niter = desc->niter;
 675
 676   /* Should not get here (such loop should be peeled instead).  */
 677   gcc_assert (niter > max_unroll + 1);
 678
 679   exit_mod = niter % (max_unroll + 1);
 680
 681   wont_exit = sbitmap_alloc (max_unroll + 1);
 682   sbitmap_ones (wont_exit);
 683
 684   remove_edges = XCNEWVEC (edge, max_unroll + exit_mod + 1);
 685   n_remove_edges = 0;
 686   if (flag_split_ivs_in_unroller
 687       || flag_variable_expansion_in_unroller)
 688     opt_info = analyze_insns_in_loop (loop);
 689
 690   if (!exit_at_end)
 691     {
 692       /* The exit is not at the end of the loop; leave exit test
 693          in the first copy, so that the loops that start with test
 694          of exit condition have continuous body after unrolling.  */
 695
 696       if (dump_file)
 697         fprintf (dump_file, ";; Condition on beginning of loop.\n");
 698
 699       /* Peel exit_mod iterations.  */
 700       RESET_BIT (wont_exit, 0);
 701       if (desc->noloop_assumptions)
 702         RESET_BIT (wont_exit, 1);
 703
 704       if (exit_mod)
 705         {
 706           opt_info_start_duplication (opt_info);
 707           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 708                                               exit_mod,
 709                                               wont_exit, desc->out_edge,
 710                                               remove_edges, &n_remove_edges,
 711                                               DLTHE_FLAG_UPDATE_FREQ
 712                                               | (opt_info && exit_mod > 1
 713                                                  ? DLTHE_RECORD_COPY_NUMBER
 714                                                    : 0));
 715           gcc_assert (ok);
 716
 717           if (opt_info && exit_mod > 1)
 718             apply_opt_in_copies (opt_info, exit_mod, false, false);
 719
 720           desc->noloop_assumptions = NULL_RTX;
 721           desc->niter -= exit_mod;
 722           desc->niter_max -= exit_mod;
 723         }
 724
 725       SET_BIT (wont_exit, 1);
 726     }
 727   else
 728     {
 729       /* Leave exit test in last copy, for the same reason as above if
 730          the loop tests the condition at the end of loop body.  */
 731
 732       if (dump_file)
 733         fprintf (dump_file, ";; Condition on end of loop.\n");
 734
 735       /* We know that niter >= max_unroll + 2; so we do not need to care of
 736          case when we would exit before reaching the loop.  So just peel
 737          exit_mod + 1 iterations.  */
 738       if (exit_mod != max_unroll
 739           || desc->noloop_assumptions)
 740         {
 741           RESET_BIT (wont_exit, 0);
 742           if (desc->noloop_assumptions)
 743             RESET_BIT (wont_exit, 1);
 744
 745           opt_info_start_duplication (opt_info);
 746           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 747                                               exit_mod + 1,
 748                                               wont_exit, desc->out_edge,
 749                                               remove_edges, &n_remove_edges,
 750                                               DLTHE_FLAG_UPDATE_FREQ
 751                                               | (opt_info && exit_mod > 0
 752                                                  ? DLTHE_RECORD_COPY_NUMBER
 753                                                    : 0));
 754           gcc_assert (ok);
 755
 756           if (opt_info && exit_mod > 0)
 757             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 758
 759           desc->niter -= exit_mod + 1;
 760           desc->niter_max -= exit_mod + 1;
 761           desc->noloop_assumptions = NULL_RTX;
 762
 763           SET_BIT (wont_exit, 0);
 764           SET_BIT (wont_exit, 1);
 765         }
 766
 767       RESET_BIT (wont_exit, max_unroll);
 768     }
 769
 770   /* Now unroll the loop.  */
 771
 772   opt_info_start_duplication (opt_info);
 773   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 774                                       max_unroll,
 775                                       wont_exit, desc->out_edge,
 776                                       remove_edges, &n_remove_edges,
 777                                       DLTHE_FLAG_UPDATE_FREQ
 778                                       | (opt_info
 779                                          ? DLTHE_RECORD_COPY_NUMBER
 780                                            : 0));
 781   gcc_assert (ok);
 782
 783   if (opt_info)
 784     {
 785       apply_opt_in_copies (opt_info, max_unroll, true, true);
 786       free_opt_info (opt_info);
 787     }
 788
 789   free (wont_exit);
 790
 791   if (exit_at_end)
 792     {
 793       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 794       /* Find a new in and out edge; they are in the last copy we have made.  */
 795
 796       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 797         {
 798           desc->out_edge = EDGE_SUCC (exit_block, 0);
 799           desc->in_edge = EDGE_SUCC (exit_block, 1);
 800         }
 801       else
 802         {
 803           desc->out_edge = EDGE_SUCC (exit_block, 1);
 804           desc->in_edge = EDGE_SUCC (exit_block, 0);
 805         }
 806     }
 807
 808   desc->niter /= max_unroll + 1;
 809   desc->niter_max /= max_unroll + 1;
 810   desc->niter_expr = GEN_INT (desc->niter);
 811
 812   /* Remove the edges.  */
 813   for (i = 0; i < n_remove_edges; i++)
 814     remove_path (remove_edges[i]);
 815   free (remove_edges);
 816
 817   if (dump_file)
 818     fprintf (dump_file,
 819              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 820              max_unroll, num_loop_insns (loop));
 821 }
 822
 823 /* Decide whether to unroll LOOP iterating runtime computable number of times
 824    and how much.  */
 825 static void
 826 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 827 {
 828   unsigned nunroll, nunroll_by_av, i;
 829   struct niter_desc *desc;
 830
 831   if (!(flags & UAP_UNROLL))
 832     {
 833       /* We were not asked to, just return back silently.  */
 834       return;
 835     }
 836
 837   if (dump_file)
 838     fprintf (dump_file,
 839              "\n;; Considering unrolling loop with runtime "
 840              "computable number of iterations\n");
 841
 842   /* nunroll = total number of copies of the original loop body in
 843      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 844   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 845   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 846   if (nunroll > nunroll_by_av)
 847     nunroll = nunroll_by_av;
 848   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 849     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 850
 851   /* Skip big loops.  */
 852   if (nunroll <= 1)
 853     {
 854       if (dump_file)
 855         fprintf (dump_file, ";; Not considering loop, is too big\n");
 856       return;
 857     }
 858
 859   /* Check for simple loops.  */
 860   desc = get_simple_loop_desc (loop);
 861
 862   /* Check simpleness.  */
 863   if (!desc->simple_p || desc->assumptions)
 864     {
 865       if (dump_file)
 866         fprintf (dump_file,
 867                  ";; Unable to prove that the number of iterations "
 868                  "can be counted in runtime\n");
 869       return;
 870     }
 871
 872   if (desc->const_iter)
 873     {
 874       if (dump_file)
 875         fprintf (dump_file, ";; Loop iterates constant times\n");
 876       return;
 877     }
 878
 879   /* If we have profile feedback, check whether the loop rolls.  */
 880   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
 881     {
 882       if (dump_file)
 883         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 884       return;
 885     }
 886
 887   /* Success; now force nunroll to be power of 2, as we are unable to
 888      cope with overflows in computation of number of iterations.  */
 889   for (i = 1; 2 * i <= nunroll; i *= 2)
 890     continue;
 891
 892   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 893   loop->lpt_decision.times = i - 1;
 894
 895   if (dump_file)
 896     fprintf (dump_file,
 897              ";; Decided to unroll the runtime computable "
 898              "times rolling loop, %d times.\n",
 899              loop->lpt_decision.times);
 900 }
 901
 902 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
 903    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
 904    and NULL is returned instead.  */
 905
 906 basic_block
 907 split_edge_and_insert (edge e, rtx insns)
 908 {
 909   basic_block bb;
 910
 911   if (!insns)
 912     return NULL;
 913   bb = split_edge (e);
 914   emit_insn_after (insns, BB_END (bb));
 915   bb->flags |= BB_SUPERBLOCK;
 916   return bb;
 917 }
 918
 919 /* Unroll LOOP for that we are able to count number of iterations in runtime
 920    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
 921    extra care for case n < 0):
 922
 923    for (i = 0; i < n; i++)
 924      body;
 925
 926    ==>
 927
 928    i = 0;
 929    mod = n % 4;
 930
 931    switch (mod)
 932      {
 933        case 3:
 934          body; i++;
 935        case 2:
 936          body; i++;
 937        case 1:
 938          body; i++;
 939        case 0: ;
 940      }
 941
 942    while (i < n)
 943      {
 944        body; i++;
 945        body; i++;
 946        body; i++;
 947        body; i++;
 948      }
 949    */
 950 static void
 951 unroll_loop_runtime_iterations (struct loop *loop)
 952 {
 953   rtx old_niter, niter, init_code, branch_code, tmp;
 954   unsigned i, j, p;
 955   basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
 956   unsigned n_dom_bbs;
 957   sbitmap wont_exit;
 958   int may_exit_copy;
 959   unsigned n_peel, n_remove_edges;
 960   edge *remove_edges, e;
 961   bool extra_zero_check, last_may_exit;
 962   unsigned max_unroll = loop->lpt_decision.times;
 963   struct niter_desc *desc = get_simple_loop_desc (loop);
 964   bool exit_at_end = loop_exit_at_end_p (loop);
 965   struct opt_info *opt_info = NULL;
 966   bool ok;
 967
 968   if (flag_split_ivs_in_unroller
 969       || flag_variable_expansion_in_unroller)
 970     opt_info = analyze_insns_in_loop (loop);
 971
 972   /* Remember blocks whose dominators will have to be updated.  */
 973   dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
 974   n_dom_bbs = 0;
 975
 976   body = get_loop_body (loop);
 977   for (i = 0; i < loop->num_nodes; i++)
 978     {
 979       unsigned nldom;
 980       basic_block *ldom;
 981
 982       nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
 983       for (j = 0; j < nldom; j++)
 984         if (!flow_bb_inside_loop_p (loop, ldom[j]))
 985           dom_bbs[n_dom_bbs++] = ldom[j];
 986
 987       free (ldom);
 988     }
 989   free (body);
 990
 991   if (!exit_at_end)
 992     {
 993       /* Leave exit in first copy (for explanation why see comment in
 994          unroll_loop_constant_iterations).  */
 995       may_exit_copy = 0;
 996       n_peel = max_unroll - 1;
 997       extra_zero_check = true;
 998       last_may_exit = false;
 999     }
1000   else
1001     {
1002       /* Leave exit in last copy (for explanation why see comment in
1003          unroll_loop_constant_iterations).  */
1004       may_exit_copy = max_unroll;
1005       n_peel = max_unroll;
1006       extra_zero_check = false;
1007       last_may_exit = true;
1008     }
1009
1010   /* Get expression for number of iterations.  */
1011   start_sequence ();
1012   old_niter = niter = gen_reg_rtx (desc->mode);
1013   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
1014   if (tmp != niter)
1015     emit_move_insn (niter, tmp);
1016
1017   /* Count modulo by ANDing it with max_unroll; we use the fact that
1018      the number of unrollings is a power of two, and thus this is correct
1019      even if there is overflow in the computation.  */
1020   niter = expand_simple_binop (desc->mode, AND,
1021                                niter,
1022                                GEN_INT (max_unroll),
1023                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
1024
1025   init_code = get_insns ();
1026   end_sequence ();
1027
1028   /* Precondition the loop.  */
1029   split_edge_and_insert (loop_preheader_edge (loop), init_code);
1030
1031   remove_edges = XCNEWVEC (edge, max_unroll + n_peel + 1);
1032   n_remove_edges = 0;
1033
1034   wont_exit = sbitmap_alloc (max_unroll + 2);
1035
1036   /* Peel the first copy of loop body (almost always we must leave exit test
1037      here; the only exception is when we have extra zero check and the number
1038      of iterations is reliable.  Also record the place of (possible) extra
1039      zero check.  */
1040   sbitmap_zero (wont_exit);
1041   if (extra_zero_check
1042       && !desc->noloop_assumptions)
1043     SET_BIT (wont_exit, 1);
1044   ezc_swtch = loop_preheader_edge (loop)->src;
1045   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1046                                       1, wont_exit, desc->out_edge,
1047                                       remove_edges, &n_remove_edges,
1048                                       DLTHE_FLAG_UPDATE_FREQ);
1049   gcc_assert (ok);
1050
1051   /* Record the place where switch will be built for preconditioning.  */
1052   swtch = split_edge (loop_preheader_edge (loop));
1053
1054   for (i = 0; i < n_peel; i++)
1055     {
1056       /* Peel the copy.  */
1057       sbitmap_zero (wont_exit);
1058       if (i != n_peel - 1 || !last_may_exit)
1059         SET_BIT (wont_exit, 1);
1060       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1061                                           1, wont_exit, desc->out_edge,
1062                                           remove_edges, &n_remove_edges,
1063                                           DLTHE_FLAG_UPDATE_FREQ);
1064       gcc_assert (ok);
1065
1066       /* Create item for switch.  */
1067       j = n_peel - i - (extra_zero_check ? 0 : 1);
1068       p = REG_BR_PROB_BASE / (i + 2);
1069
1070       preheader = split_edge (loop_preheader_edge (loop));
1071       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1072                                           block_label (preheader), p,
1073                                           NULL_RTX);
1074
1075       /* We rely on the fact that the compare and jump cannot be optimized out,
1076          and hence the cfg we create is correct.  */
1077       gcc_assert (branch_code != NULL_RTX);
1078
1079       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1080       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1081       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1082       e = make_edge (swtch, preheader,
1083                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1084       e->probability = p;
1085     }
1086
1087   if (extra_zero_check)
1088     {
1089       /* Add branch for zero iterations.  */
1090       p = REG_BR_PROB_BASE / (max_unroll + 1);
1091       swtch = ezc_swtch;
1092       preheader = split_edge (loop_preheader_edge (loop));
1093       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1094                                           block_label (preheader), p,
1095                                           NULL_RTX);
1096       gcc_assert (branch_code != NULL_RTX);
1097
1098       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1099       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1100       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1101       e = make_edge (swtch, preheader,
1102                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1103       e->probability = p;
1104     }
1105
1106   /* Recount dominators for outer blocks.  */
1107   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1108
1109   /* And unroll loop.  */
1110
1111   sbitmap_ones (wont_exit);
1112   RESET_BIT (wont_exit, may_exit_copy);
1113   opt_info_start_duplication (opt_info);
1114
1115   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1116                                       max_unroll,
1117                                       wont_exit, desc->out_edge,
1118                                       remove_edges, &n_remove_edges,
1119                                       DLTHE_FLAG_UPDATE_FREQ
1120                                       | (opt_info
1121                                          ? DLTHE_RECORD_COPY_NUMBER
1122                                            : 0));
1123   gcc_assert (ok);
1124
1125   if (opt_info)
1126     {
1127       apply_opt_in_copies (opt_info, max_unroll, true, true);
1128       free_opt_info (opt_info);
1129     }
1130
1131   free (wont_exit);
1132
1133   if (exit_at_end)
1134     {
1135       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1136       /* Find a new in and out edge; they are in the last copy we have
1137          made.  */
1138
1139       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1140         {
1141           desc->out_edge = EDGE_SUCC (exit_block, 0);
1142           desc->in_edge = EDGE_SUCC (exit_block, 1);
1143         }
1144       else
1145         {
1146           desc->out_edge = EDGE_SUCC (exit_block, 1);
1147           desc->in_edge = EDGE_SUCC (exit_block, 0);
1148         }
1149     }
1150
1151   /* Remove the edges.  */
1152   for (i = 0; i < n_remove_edges; i++)
1153     remove_path (remove_edges[i]);
1154   free (remove_edges);
1155
1156   /* We must be careful when updating the number of iterations due to
1157      preconditioning and the fact that the value must be valid at entry
1158      of the loop.  After passing through the above code, we see that
1159      the correct new number of iterations is this:  */
1160   gcc_assert (!desc->const_iter);
1161   desc->niter_expr =
1162     simplify_gen_binary (UDIV, desc->mode, old_niter,
1163                          GEN_INT (max_unroll + 1));
1164   desc->niter_max /= max_unroll + 1;
1165   if (exit_at_end)
1166     {
1167       desc->niter_expr =
1168         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1169       desc->noloop_assumptions = NULL_RTX;
1170       desc->niter_max--;
1171     }
1172
1173   if (dump_file)
1174     fprintf (dump_file,
1175              ";; Unrolled loop %d times, counting # of iterations "
1176              "in runtime, %i insns\n",
1177              max_unroll, num_loop_insns (loop));
1178
1179   if (dom_bbs)
1180     free (dom_bbs);
1181 }
1182
1183 /* Decide whether to simply peel LOOP and how much.  */
1184 static void
1185 decide_peel_simple (struct loop *loop, int flags)
1186 {
1187   unsigned npeel;
1188   struct niter_desc *desc;
1189
1190   if (!(flags & UAP_PEEL))
1191     {
1192       /* We were not asked to, just return back silently.  */
1193       return;
1194     }
1195
1196   if (dump_file)
1197     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1198
1199   /* npeel = number of iterations to peel.  */
1200   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1201   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1202     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1203
1204   /* Skip big loops.  */
1205   if (!npeel)
1206     {
1207       if (dump_file)
1208         fprintf (dump_file, ";; Not considering loop, is too big\n");
1209       return;
1210     }
1211
1212   /* Check for simple loops.  */
1213   desc = get_simple_loop_desc (loop);
1214
1215   /* Check number of iterations.  */
1216   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1217     {
1218       if (dump_file)
1219         fprintf (dump_file, ";; Loop iterates constant times\n");
1220       return;
1221     }
1222
1223   /* Do not simply peel loops with branches inside -- it increases number
1224      of mispredicts.  */
1225   if (num_loop_branches (loop) > 1)
1226     {
1227       if (dump_file)
1228         fprintf (dump_file, ";; Not peeling, contains branches\n");
1229       return;
1230     }
1231
1232   if (loop->header->count)
1233     {
1234       unsigned niter = expected_loop_iterations (loop);
1235       if (niter + 1 > npeel)
1236         {
1237           if (dump_file)
1238             {
1239               fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1240               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1241                        (HOST_WIDEST_INT) (niter + 1));
1242               fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1243                        npeel);
1244             }
1245           return;
1246         }
1247       npeel = niter + 1;
1248     }
1249   else
1250     {
1251       /* For now we have no good heuristics to decide whether loop peeling
1252          will be effective, so disable it.  */
1253       if (dump_file)
1254         fprintf (dump_file,
1255                  ";; Not peeling loop, no evidence it will be profitable\n");
1256       return;
1257     }
1258
1259   /* Success.  */
1260   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1261   loop->lpt_decision.times = npeel;
1262
1263   if (dump_file)
1264     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1265              loop->lpt_decision.times);
1266 }
1267
1268 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1269    while (cond)
1270      body;
1271
1272    ==>
1273
1274    if (!cond) goto end;
1275    body;
1276    if (!cond) goto end;
1277    body;
1278    while (cond)
1279      body;
1280    end: ;
1281    */
1282 static void
1283 peel_loop_simple (struct loop *loop)
1284 {
1285   sbitmap wont_exit;
1286   unsigned npeel = loop->lpt_decision.times;
1287   struct niter_desc *desc = get_simple_loop_desc (loop);
1288   struct opt_info *opt_info = NULL;
1289   bool ok;
1290
1291   if (flag_split_ivs_in_unroller && npeel > 1)
1292     opt_info = analyze_insns_in_loop (loop);
1293
1294   wont_exit = sbitmap_alloc (npeel + 1);
1295   sbitmap_zero (wont_exit);
1296
1297   opt_info_start_duplication (opt_info);
1298
1299   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1300                                       npeel, wont_exit,
1301                                       NULL, NULL,
1302                                       NULL, DLTHE_FLAG_UPDATE_FREQ
1303                                       | (opt_info
1304                                          ? DLTHE_RECORD_COPY_NUMBER
1305                                            : 0));
1306   gcc_assert (ok);
1307
1308   free (wont_exit);
1309
1310   if (opt_info)
1311     {
1312       apply_opt_in_copies (opt_info, npeel, false, false);
1313       free_opt_info (opt_info);
1314     }
1315
1316   if (desc->simple_p)
1317     {
1318       if (desc->const_iter)
1319         {
1320           desc->niter -= npeel;
1321           desc->niter_expr = GEN_INT (desc->niter);
1322           desc->noloop_assumptions = NULL_RTX;
1323         }
1324       else
1325         {
1326           /* We cannot just update niter_expr, as its value might be clobbered
1327              inside loop.  We could handle this by counting the number into
1328              temporary just like we do in runtime unrolling, but it does not
1329              seem worthwhile.  */
1330           free_simple_loop_desc (loop);
1331         }
1332     }
1333   if (dump_file)
1334     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1335 }
1336
1337 /* Decide whether to unroll LOOP stupidly and how much.  */
1338 static void
1339 decide_unroll_stupid (struct loop *loop, int flags)
1340 {
1341   unsigned nunroll, nunroll_by_av, i;
1342   struct niter_desc *desc;
1343
1344   if (!(flags & UAP_UNROLL_ALL))
1345     {
1346       /* We were not asked to, just return back silently.  */
1347       return;
1348     }
1349
1350   if (dump_file)
1351     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1352
1353   /* nunroll = total number of copies of the original loop body in
1354      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1355   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1356   nunroll_by_av
1357     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1358   if (nunroll > nunroll_by_av)
1359     nunroll = nunroll_by_av;
1360   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1361     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1362
1363   /* Skip big loops.  */
1364   if (nunroll <= 1)
1365     {
1366       if (dump_file)
1367         fprintf (dump_file, ";; Not considering loop, is too big\n");
1368       return;
1369     }
1370
1371   /* Check for simple loops.  */
1372   desc = get_simple_loop_desc (loop);
1373
1374   /* Check simpleness.  */
1375   if (desc->simple_p && !desc->assumptions)
1376     {
1377       if (dump_file)
1378         fprintf (dump_file, ";; The loop is simple\n");
1379       return;
1380     }
1381
1382   /* Do not unroll loops with branches inside -- it increases number
1383      of mispredicts.  */
1384   if (num_loop_branches (loop) > 1)
1385     {
1386       if (dump_file)
1387         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1388       return;
1389     }
1390
1391   /* If we have profile feedback, check whether the loop rolls.  */
1392   if (loop->header->count
1393       && expected_loop_iterations (loop) < 2 * nunroll)
1394     {
1395       if (dump_file)
1396         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1397       return;
1398     }
1399
1400   /* Success.  Now force nunroll to be power of 2, as it seems that this
1401      improves results (partially because of better alignments, partially
1402      because of some dark magic).  */
1403   for (i = 1; 2 * i <= nunroll; i *= 2)
1404     continue;
1405
1406   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1407   loop->lpt_decision.times = i - 1;
1408
1409   if (dump_file)
1410     fprintf (dump_file,
1411              ";; Decided to unroll the loop stupidly, %d times.\n",
1412              loop->lpt_decision.times);
1413 }
1414
1415 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1416    while (cond)
1417      body;
1418
1419    ==>
1420
1421    while (cond)
1422      {
1423        body;
1424        if (!cond) break;
1425        body;
1426        if (!cond) break;
1427        body;
1428        if (!cond) break;
1429        body;
1430      }
1431    */
1432 static void
1433 unroll_loop_stupid (struct loop *loop)
1434 {
1435   sbitmap wont_exit;
1436   unsigned nunroll = loop->lpt_decision.times;
1437   struct niter_desc *desc = get_simple_loop_desc (loop);
1438   struct opt_info *opt_info = NULL;
1439   bool ok;
1440
1441   if (flag_split_ivs_in_unroller
1442       || flag_variable_expansion_in_unroller)
1443     opt_info = analyze_insns_in_loop (loop);
1444
1445
1446   wont_exit = sbitmap_alloc (nunroll + 1);
1447   sbitmap_zero (wont_exit);
1448   opt_info_start_duplication (opt_info);
1449
1450   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1451                                       nunroll, wont_exit,
1452                                       NULL, NULL, NULL,
1453                                       DLTHE_FLAG_UPDATE_FREQ
1454                                       | (opt_info
1455                                          ? DLTHE_RECORD_COPY_NUMBER
1456                                            : 0));
1457   gcc_assert (ok);
1458
1459   if (opt_info)
1460     {
1461       apply_opt_in_copies (opt_info, nunroll, true, true);
1462       free_opt_info (opt_info);
1463     }
1464
1465   free (wont_exit);
1466
1467   if (desc->simple_p)
1468     {
1469       /* We indeed may get here provided that there are nontrivial assumptions
1470          for a loop to be really simple.  We could update the counts, but the
1471          problem is that we are unable to decide which exit will be taken
1472          (not really true in case the number of iterations is constant,
1473          but noone will do anything with this information, so we do not
1474          worry about it).  */
1475       desc->simple_p = false;
1476     }
1477
1478   if (dump_file)
1479     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1480              nunroll, num_loop_insns (loop));
1481 }
1482
1483 /* A hash function for information about insns to split.  */
1484
1485 static hashval_t
1486 si_info_hash (const void *ivts)
1487 {
1488   return (hashval_t) INSN_UID (((struct iv_to_split *) ivts)->insn);
1489 }
1490
1491 /* An equality functions for information about insns to split.  */
1492
1493 static int
1494 si_info_eq (const void *ivts1, const void *ivts2)
1495 {
1496   const struct iv_to_split *i1 = ivts1;
1497   const struct iv_to_split *i2 = ivts2;
1498
1499   return i1->insn == i2->insn;
1500 }
1501
1502 /* Return a hash for VES, which is really a "var_to_expand *".  */
1503
1504 static hashval_t
1505 ve_info_hash (const void *ves)
1506 {
1507   return (hashval_t) INSN_UID (((struct var_to_expand *) ves)->insn);
1508 }
1509
1510 /* Return true if IVTS1 and IVTS2 (which are really both of type
1511    "var_to_expand *") refer to the same instruction.  */
1512
1513 static int
1514 ve_info_eq (const void *ivts1, const void *ivts2)
1515 {
1516   const struct var_to_expand *i1 = ivts1;
1517   const struct var_to_expand *i2 = ivts2;
1518
1519   return i1->insn == i2->insn;
1520 }
1521
1522 /* Returns true if REG is referenced in one insn in LOOP.  */
1523
1524 bool
1525 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1526 {
1527   basic_block *body, bb;
1528   unsigned i;
1529   int count_ref = 0;
1530   rtx insn;
1531
1532   body = get_loop_body (loop);
1533   for (i = 0; i < loop->num_nodes; i++)
1534     {
1535       bb = body[i];
1536
1537       FOR_BB_INSNS (bb, insn)
1538       {
1539         if (rtx_referenced_p (reg, insn))
1540           count_ref++;
1541       }
1542     }
1543   return (count_ref  == 1);
1544 }
1545
1546 /* Determine whether INSN contains an accumulator
1547    which can be expanded into separate copies,
1548    one for each copy of the LOOP body.
1549
1550    for (i = 0 ; i < n; i++)
1551      sum += a[i];
1552
1553    ==>
1554
1555    sum += a[i]
1556    ....
1557    i = i+1;
1558    sum1 += a[i]
1559    ....
1560    i = i+1
1561    sum2 += a[i];
1562    ....
1563
1564    Return NULL if INSN contains no opportunity for expansion of accumulator.
1565    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1566    information and return a pointer to it.
1567 */
1568
1569 static struct var_to_expand *
1570 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1571 {
1572   rtx set, dest, src, op1;
1573   struct var_to_expand *ves;
1574   enum machine_mode mode1, mode2;
1575
1576   set = single_set (insn);
1577   if (!set)
1578     return NULL;
1579
1580   dest = SET_DEST (set);
1581   src = SET_SRC (set);
1582
1583   if (GET_CODE (src) != PLUS
1584       && GET_CODE (src) != MINUS
1585       && GET_CODE (src) != MULT)
1586     return NULL;
1587
1588   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1589      in MD.  But if there is no optab to generate the insn, we can not
1590      perform the variable expansion.  This can happen if an MD provides
1591      an insn but not a named pattern to generate it, for example to avoid
1592      producing code that needs additional mode switches like for x87/mmx.
1593
1594      So we check have_insn_for which looks for an optab for the operation
1595      in SRC.  If it doesn't exist, we can't perform the expansion even
1596      though INSN is valid.  */
1597   if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
1598     return NULL;
1599
1600   if (!XEXP (src, 0))
1601     return NULL;
1602
1603   op1 = XEXP (src, 0);
1604
1605   if (!REG_P (dest)
1606       && !(GET_CODE (dest) == SUBREG
1607            && REG_P (SUBREG_REG (dest))))
1608     return NULL;
1609
1610   if (!rtx_equal_p (dest, op1))
1611     return NULL;
1612
1613   if (!referenced_in_one_insn_in_loop_p (loop, dest))
1614     return NULL;
1615
1616   if (rtx_referenced_p (dest, XEXP (src, 1)))
1617     return NULL;
1618
1619   mode1 = GET_MODE (dest);
1620   mode2 = GET_MODE (XEXP (src, 1));
1621   if ((FLOAT_MODE_P (mode1)
1622        || FLOAT_MODE_P (mode2))
1623       && !flag_unsafe_math_optimizations)
1624     return NULL;
1625
1626   /* Record the accumulator to expand.  */
1627   ves = XNEW (struct var_to_expand);
1628   ves->insn = insn;
1629   ves->var_expansions = VEC_alloc (rtx, heap, 1);
1630   ves->reg = copy_rtx (dest);
1631   ves->op = GET_CODE (src);
1632   ves->expansion_count = 0;
1633   ves->reuse_expansion = 0;
1634   return ves;
1635 }
1636
1637 /* Determine whether there is an induction variable in INSN that
1638    we would like to split during unrolling.
1639
1640    I.e. replace
1641
1642    i = i + 1;
1643    ...
1644    i = i + 1;
1645    ...
1646    i = i + 1;
1647    ...
1648
1649    type chains by
1650
1651    i0 = i + 1
1652    ...
1653    i = i0 + 1
1654    ...
1655    i = i0 + 2
1656    ...
1657
1658    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1659    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1660    pointer to it.  */
1661
1662 static struct iv_to_split *
1663 analyze_iv_to_split_insn (rtx insn)
1664 {
1665   rtx set, dest;
1666   struct rtx_iv iv;
1667   struct iv_to_split *ivts;
1668   bool ok;
1669
1670   /* For now we just split the basic induction variables.  Later this may be
1671      extended for example by selecting also addresses of memory references.  */
1672   set = single_set (insn);
1673   if (!set)
1674     return NULL;
1675
1676   dest = SET_DEST (set);
1677   if (!REG_P (dest))
1678     return NULL;
1679
1680   if (!biv_p (insn, dest))
1681     return NULL;
1682
1683   ok = iv_analyze_result (insn, dest, &iv);
1684
1685   /* This used to be an assert under the assumption that if biv_p returns
1686      true that iv_analyze_result must also return true.  However, that
1687      assumption is not strictly correct as evidenced by pr25569.
1688
1689      Returning NULL when iv_analyze_result returns false is safe and
1690      avoids the problems in pr25569 until the iv_analyze_* routines
1691      can be fixed, which is apparently hard and time consuming
1692      according to their author.  */
1693   if (! ok)
1694     return NULL;
1695
1696   if (iv.step == const0_rtx
1697       || iv.mode != iv.extend_mode)
1698     return NULL;
1699
1700   /* Record the insn to split.  */
1701   ivts = XNEW (struct iv_to_split);
1702   ivts->insn = insn;
1703   ivts->base_var = NULL_RTX;
1704   ivts->step = iv.step;
1705   ivts->n_loc = 1;
1706   ivts->loc[0] = 1;
1707
1708   return ivts;
1709 }
1710
1711 /* Determines which of insns in LOOP can be optimized.
1712    Return a OPT_INFO struct with the relevant hash tables filled
1713    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1714    is undefined for the return value.  */
1715
1716 static struct opt_info *
1717 analyze_insns_in_loop (struct loop *loop)
1718 {
1719   basic_block *body, bb;
1720   unsigned i;
1721   struct opt_info *opt_info = XCNEW (struct opt_info);
1722   rtx insn;
1723   struct iv_to_split *ivts = NULL;
1724   struct var_to_expand *ves = NULL;
1725   PTR *slot1;
1726   PTR *slot2;
1727   VEC (edge, heap) *edges = get_loop_exit_edges (loop);
1728   edge exit;
1729   bool can_apply = false;
1730
1731   iv_analysis_loop_init (loop);
1732
1733   body = get_loop_body (loop);
1734
1735   if (flag_split_ivs_in_unroller)
1736     opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1737                                             si_info_hash, si_info_eq, free);
1738
1739   /* Record the loop exit bb and loop preheader before the unrolling.  */
1740   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1741
1742   if (VEC_length (edge, edges) == 1)
1743     {
1744       exit = VEC_index (edge, edges, 0);
1745       if (!(exit->flags & EDGE_COMPLEX))
1746         {
1747           opt_info->loop_exit = split_edge (exit);
1748           can_apply = true;
1749         }
1750     }
1751
1752   if (flag_variable_expansion_in_unroller
1753       && can_apply)
1754     opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1755                                                       ve_info_hash, ve_info_eq, free);
1756
1757   for (i = 0; i < loop->num_nodes; i++)
1758     {
1759       bb = body[i];
1760       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1761         continue;
1762
1763       FOR_BB_INSNS (bb, insn)
1764       {
1765         if (!INSN_P (insn))
1766           continue;
1767
1768         if (opt_info->insns_to_split)
1769           ivts = analyze_iv_to_split_insn (insn);
1770
1771         if (ivts)
1772           {
1773             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1774             *slot1 = ivts;
1775             continue;
1776           }
1777
1778         if (opt_info->insns_with_var_to_expand)
1779           ves = analyze_insn_to_expand_var (loop, insn);
1780
1781         if (ves)
1782           {
1783             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1784             *slot2 = ves;
1785           }
1786       }
1787     }
1788
1789   VEC_free (edge, heap, edges);
1790   free (body);
1791   return opt_info;
1792 }
1793
1794 /* Called just before loop duplication.  Records start of duplicated area
1795    to OPT_INFO.  */
1796
1797 static void
1798 opt_info_start_duplication (struct opt_info *opt_info)
1799 {
1800   if (opt_info)
1801     opt_info->first_new_block = last_basic_block;
1802 }
1803
1804 /* Determine the number of iterations between initialization of the base
1805    variable and the current copy (N_COPY).  N_COPIES is the total number
1806    of newly created copies.  UNROLLING is true if we are unrolling
1807    (not peeling) the loop.  */
1808
1809 static unsigned
1810 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1811 {
1812   if (unrolling)
1813     {
1814       /* If we are unrolling, initialization is done in the original loop
1815          body (number 0).  */
1816       return n_copy;
1817     }
1818   else
1819     {
1820       /* If we are peeling, the copy in that the initialization occurs has
1821          number 1.  The original loop (number 0) is the last.  */
1822       if (n_copy)
1823         return n_copy - 1;
1824       else
1825         return n_copies;
1826     }
1827 }
1828
1829 /* Locate in EXPR the expression corresponding to the location recorded
1830    in IVTS, and return a pointer to the RTX for this location.  */
1831
1832 static rtx *
1833 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1834 {
1835   unsigned i;
1836   rtx *ret = &expr;
1837
1838   for (i = 0; i < ivts->n_loc; i++)
1839     ret = &XEXP (*ret, ivts->loc[i]);
1840
1841   return ret;
1842 }
1843
1844 /* Allocate basic variable for the induction variable chain.  Callback for
1845    htab_traverse.  */
1846
1847 static int
1848 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1849 {
1850   struct iv_to_split *ivts = *slot;
1851   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1852
1853   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1854
1855   return 1;
1856 }
1857
1858 /* Insert initialization of basic variable of IVTS before INSN, taking
1859    the initial value from INSN.  */
1860
1861 static void
1862 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1863 {
1864   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1865   rtx seq;
1866
1867   start_sequence ();
1868   expr = force_operand (expr, ivts->base_var);
1869   if (expr != ivts->base_var)
1870     emit_move_insn (ivts->base_var, expr);
1871   seq = get_insns ();
1872   end_sequence ();
1873
1874   emit_insn_before (seq, insn);
1875 }
1876
1877 /* Replace the use of induction variable described in IVTS in INSN
1878    by base variable + DELTA * step.  */
1879
1880 static void
1881 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1882 {
1883   rtx expr, *loc, seq, incr, var;
1884   enum machine_mode mode = GET_MODE (ivts->base_var);
1885   rtx src, dest, set;
1886
1887   /* Construct base + DELTA * step.  */
1888   if (!delta)
1889     expr = ivts->base_var;
1890   else
1891     {
1892       incr = simplify_gen_binary (MULT, mode,
1893                                   ivts->step, gen_int_mode (delta, mode));
1894       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1895                                   ivts->base_var, incr);
1896     }
1897
1898   /* Figure out where to do the replacement.  */
1899   loc = get_ivts_expr (single_set (insn), ivts);
1900
1901   /* If we can make the replacement right away, we're done.  */
1902   if (validate_change (insn, loc, expr, 0))
1903     return;
1904
1905   /* Otherwise, force EXPR into a register and try again.  */
1906   start_sequence ();
1907   var = gen_reg_rtx (mode);
1908   expr = force_operand (expr, var);
1909   if (expr != var)
1910     emit_move_insn (var, expr);
1911   seq = get_insns ();
1912   end_sequence ();
1913   emit_insn_before (seq, insn);
1914
1915   if (validate_change (insn, loc, var, 0))
1916     return;
1917
1918   /* The last chance.  Try recreating the assignment in insn
1919      completely from scratch.  */
1920   set = single_set (insn);
1921   gcc_assert (set);
1922
1923   start_sequence ();
1924   *loc = var;
1925   src = copy_rtx (SET_SRC (set));
1926   dest = copy_rtx (SET_DEST (set));
1927   src = force_operand (src, dest);
1928   if (src != dest)
1929     emit_move_insn (dest, src);
1930   seq = get_insns ();
1931   end_sequence ();
1932
1933   emit_insn_before (seq, insn);
1934   delete_insn (insn);
1935 }
1936
1937
1938 /* Return one expansion of the accumulator recorded in struct VE.  */
1939
1940 static rtx
1941 get_expansion (struct var_to_expand *ve)
1942 {
1943   rtx reg;
1944
1945   if (ve->reuse_expansion == 0)
1946     reg = ve->reg;
1947   else
1948     reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1949
1950   if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1951     ve->reuse_expansion = 0;
1952   else
1953     ve->reuse_expansion++;
1954
1955   return reg;
1956 }
1957
1958
1959 /* Given INSN replace the uses of the accumulator recorded in VE
1960    with a new register.  */
1961
1962 static void
1963 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1964 {
1965   rtx new_reg, set;
1966   bool really_new_expansion = false;
1967
1968   set = single_set (insn);
1969   gcc_assert (set);
1970
1971   /* Generate a new register only if the expansion limit has not been
1972      reached.  Else reuse an already existing expansion.  */
1973   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1974     {
1975       really_new_expansion = true;
1976       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1977     }
1978   else
1979     new_reg = get_expansion (ve);
1980
1981   validate_change (insn, &SET_DEST (set), new_reg, 1);
1982   validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1983
1984   if (apply_change_group ())
1985     if (really_new_expansion)
1986       {
1987         VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
1988         ve->expansion_count++;
1989       }
1990 }
1991
1992 /* Initialize the variable expansions in loop preheader.
1993    Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1994    basic block where the initialization of the expansions
1995    should take place.  */
1996
1997 static int
1998 insert_var_expansion_initialization (void **slot, void *place_p)
1999 {
2000   struct var_to_expand *ve = *slot;
2001   basic_block place = (basic_block)place_p;
2002   rtx seq, var, zero_init, insn;
2003   unsigned i;
2004
2005   if (VEC_length (rtx, ve->var_expansions) == 0)
2006     return 1;
2007
2008   start_sequence ();
2009   if (ve->op == PLUS || ve->op == MINUS)
2010     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2011       {
2012         zero_init =  CONST0_RTX (GET_MODE (var));
2013         emit_move_insn (var, zero_init);
2014       }
2015   else if (ve->op == MULT)
2016     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2017       {
2018         zero_init =  CONST1_RTX (GET_MODE (var));
2019         emit_move_insn (var, zero_init);
2020       }
2021
2022   seq = get_insns ();
2023   end_sequence ();
2024
2025   insn = BB_HEAD (place);
2026   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2027     insn = NEXT_INSN (insn);
2028
2029   emit_insn_after (seq, insn);
2030   /* Continue traversing the hash table.  */
2031   return 1;
2032 }
2033
2034 /*  Combine the variable expansions at the loop exit.
2035     Callbacks for htab_traverse.  PLACE_P is the loop exit
2036     basic block where the summation of the expansions should
2037     take place.  */
2038
2039 static int
2040 combine_var_copies_in_loop_exit (void **slot, void *place_p)
2041 {
2042   struct var_to_expand *ve = *slot;
2043   basic_block place = (basic_block)place_p;
2044   rtx sum = ve->reg;
2045   rtx expr, seq, var, insn;
2046   unsigned i;
2047
2048   if (VEC_length (rtx, ve->var_expansions) == 0)
2049     return 1;
2050
2051   start_sequence ();
2052   if (ve->op == PLUS || ve->op == MINUS)
2053     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2054       {
2055         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2056                                    var, sum);
2057       }
2058   else if (ve->op == MULT)
2059     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2060       {
2061         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2062                                    var, sum);
2063       }
2064
2065   expr = force_operand (sum, ve->reg);
2066   if (expr != ve->reg)
2067     emit_move_insn (ve->reg, expr);
2068   seq = get_insns ();
2069   end_sequence ();
2070
2071   insn = BB_HEAD (place);
2072   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2073     insn = NEXT_INSN (insn);
2074
2075   emit_insn_after (seq, insn);
2076
2077   /* Continue traversing the hash table.  */
2078   return 1;
2079 }
2080
2081 /* Apply loop optimizations in loop copies using the
2082    data which gathered during the unrolling.  Structure
2083    OPT_INFO record that data.
2084
2085    UNROLLING is true if we unrolled (not peeled) the loop.
2086    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2087    the loop (as it should happen in complete unrolling, but not in ordinary
2088    peeling of the loop).  */
2089
2090 static void
2091 apply_opt_in_copies (struct opt_info *opt_info,
2092                      unsigned n_copies, bool unrolling,
2093                      bool rewrite_original_loop)
2094 {
2095   unsigned i, delta;
2096   basic_block bb, orig_bb;
2097   rtx insn, orig_insn, next;
2098   struct iv_to_split ivts_templ, *ivts;
2099   struct var_to_expand ve_templ, *ves;
2100
2101   /* Sanity check -- we need to put initialization in the original loop
2102      body.  */
2103   gcc_assert (!unrolling || rewrite_original_loop);
2104
2105   /* Allocate the basic variables (i0).  */
2106   if (opt_info->insns_to_split)
2107     htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2108
2109   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2110     {
2111       bb = BASIC_BLOCK (i);
2112       orig_bb = get_bb_original (bb);
2113
2114       /* bb->aux holds position in copy sequence initialized by
2115          duplicate_loop_to_header_edge.  */
2116       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2117                                         unrolling);
2118       bb->aux = 0;
2119       orig_insn = BB_HEAD (orig_bb);
2120       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2121         {
2122           next = NEXT_INSN (insn);
2123           if (!INSN_P (insn))
2124             continue;
2125
2126           while (!INSN_P (orig_insn))
2127             orig_insn = NEXT_INSN (orig_insn);
2128
2129           ivts_templ.insn = orig_insn;
2130           ve_templ.insn = orig_insn;
2131
2132           /* Apply splitting iv optimization.  */
2133           if (opt_info->insns_to_split)
2134             {
2135               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2136
2137               if (ivts)
2138                 {
2139                   gcc_assert (GET_CODE (PATTERN (insn))
2140                               == GET_CODE (PATTERN (orig_insn)));
2141
2142                   if (!delta)
2143                     insert_base_initialization (ivts, insn);
2144                   split_iv (ivts, insn, delta);
2145                 }
2146             }
2147           /* Apply variable expansion optimization.  */
2148           if (unrolling && opt_info->insns_with_var_to_expand)
2149             {
2150               ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2151               if (ves)
2152                 {
2153                   gcc_assert (GET_CODE (PATTERN (insn))
2154                               == GET_CODE (PATTERN (orig_insn)));
2155                   expand_var_during_unrolling (ves, insn);
2156                 }
2157             }
2158           orig_insn = NEXT_INSN (orig_insn);
2159         }
2160     }
2161
2162   if (!rewrite_original_loop)
2163     return;
2164
2165   /* Initialize the variable expansions in the loop preheader
2166      and take care of combining them at the loop exit.  */
2167   if (opt_info->insns_with_var_to_expand)
2168     {
2169       htab_traverse (opt_info->insns_with_var_to_expand,
2170                      insert_var_expansion_initialization,
2171                      opt_info->loop_preheader);
2172       htab_traverse (opt_info->insns_with_var_to_expand,
2173                      combine_var_copies_in_loop_exit,
2174                      opt_info->loop_exit);
2175     }
2176
2177   /* Rewrite also the original loop body.  Find them as originals of the blocks
2178      in the last copied iteration, i.e. those that have
2179      get_bb_copy (get_bb_original (bb)) == bb.  */
2180   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2181     {
2182       bb = BASIC_BLOCK (i);
2183       orig_bb = get_bb_original (bb);
2184       if (get_bb_copy (orig_bb) != bb)
2185         continue;
2186
2187       delta = determine_split_iv_delta (0, n_copies, unrolling);
2188       for (orig_insn = BB_HEAD (orig_bb);
2189            orig_insn != NEXT_INSN (BB_END (bb));
2190            orig_insn = next)
2191         {
2192           next = NEXT_INSN (orig_insn);
2193
2194           if (!INSN_P (orig_insn))
2195             continue;
2196
2197           ivts_templ.insn = orig_insn;
2198           if (opt_info->insns_to_split)
2199             {
2200               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2201               if (ivts)
2202                 {
2203                   if (!delta)
2204                     insert_base_initialization (ivts, orig_insn);
2205                   split_iv (ivts, orig_insn, delta);
2206                   continue;
2207                 }
2208             }
2209
2210         }
2211     }
2212 }
2213
2214 /*  Release the data structures used for the variable expansion
2215     optimization.  Callbacks for htab_traverse.  */
2216
2217 static int
2218 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2219 {
2220   struct var_to_expand *ve = *slot;
2221
2222   VEC_free (rtx, heap, ve->var_expansions);
2223
2224   /* Continue traversing the hash table.  */
2225   return 1;
2226 }
2227
2228 /* Release OPT_INFO.  */
2229
2230 static void
2231 free_opt_info (struct opt_info *opt_info)
2232 {
2233   if (opt_info->insns_to_split)
2234     htab_delete (opt_info->insns_to_split);
2235   if (opt_info->insns_with_var_to_expand)
2236     {
2237       htab_traverse (opt_info->insns_with_var_to_expand,
2238                      release_var_copies, NULL);
2239       htab_delete (opt_info->insns_with_var_to_expand);
2240     }
2241   free (opt_info);
2242 }