gcc/cfgloopanal.c

   1 /* Natural loop analysis code for GNU compiler.
   2    Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "tm.h"
  24 #include "rtl.h"
  25 #include "hard-reg-set.h"
  26 #include "obstack.h"
  27 #include "basic-block.h"
  28 #include "cfgloop.h"
  29 #include "expr.h"
  30 #include "output.h"
  31 #include "graphds.h"
  32 #include "params.h"
  33
  34 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
  35
  36 bool
  37 just_once_each_iteration_p (const struct loop *loop, const_basic_block bb)
  38 {
  39   /* It must be executed at least once each iteration.  */
  40   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
  41     return false;
  42
  43   /* And just once.  */
  44   if (bb->loop_father != loop)
  45     return false;
  46
  47   /* But this was not enough.  We might have some irreducible loop here.  */
  48   if (bb->flags & BB_IRREDUCIBLE_LOOP)
  49     return false;
  50
  51   return true;
  52 }
  53
  54 /* Marks the edge E in graph G irreducible if it connects two vertices in the
  55    same scc.  */
  56
  57 static void
  58 check_irred (struct graph *g, struct graph_edge *e)
  59 {
  60   edge real = (edge) e->data;
  61
  62   /* All edges should lead from a component with higher number to the
  63      one with lower one.  */
  64   gcc_assert (g->vertices[e->src].component >= g->vertices[e->dest].component);
  65
  66   if (g->vertices[e->src].component != g->vertices[e->dest].component)
  67     return;
  68
  69   real->flags |= EDGE_IRREDUCIBLE_LOOP;
  70   if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
  71     real->src->flags |= BB_IRREDUCIBLE_LOOP;
  72 }
  73
  74 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
  75    throw away all latch edges and mark blocks inside any remaining cycle.
  76    Everything is a bit complicated due to fact we do not want to do this
  77    for parts of cycles that only "pass" through some loop -- i.e. for
  78    each cycle, we want to mark blocks that belong directly to innermost
  79    loop containing the whole cycle.
  80
  81    LOOPS is the loop tree.  */
  82
  83 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block)
  84 #define BB_REPR(BB) ((BB)->index + 1)
  85
  86 void
  87 mark_irreducible_loops (void)
  88 {
  89   basic_block act;
  90   edge e;
  91   edge_iterator ei;
  92   int src, dest;
  93   unsigned depth;
  94   struct graph *g;
  95   int num = number_of_loops ();
  96   struct loop *cloop;
  97
  98   gcc_assert (current_loops != NULL);
  99
 100   /* Reset the flags.  */
 101   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
 102     {
 103       act->flags &= ~BB_IRREDUCIBLE_LOOP;
 104       FOR_EACH_EDGE (e, ei, act->succs)
 105         e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
 106     }
 107
 108   /* Create the edge lists.  */
 109   g = new_graph (last_basic_block + num);
 110
 111   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
 112     FOR_EACH_EDGE (e, ei, act->succs)
 113       {
 114         /* Ignore edges to exit.  */
 115         if (e->dest == EXIT_BLOCK_PTR)
 116           continue;
 117
 118         src = BB_REPR (act);
 119         dest = BB_REPR (e->dest);
 120
 121         /* Ignore latch edges.  */
 122         if (e->dest->loop_father->header == e->dest
 123             && e->dest->loop_father->latch == act)
 124           continue;
 125
 126         /* Edges inside a single loop should be left where they are.  Edges
 127            to subloop headers should lead to representative of the subloop,
 128            but from the same place.
 129
 130            Edges exiting loops should lead from representative
 131            of the son of nearest common ancestor of the loops in that
 132            act lays.  */
 133
 134         if (e->dest->loop_father->header == e->dest)
 135           dest = LOOP_REPR (e->dest->loop_father);
 136
 137         if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
 138           {
 139             depth = 1 + loop_depth (find_common_loop (act->loop_father,
 140                                                       e->dest->loop_father));
 141             if (depth == loop_depth (act->loop_father))
 142               cloop = act->loop_father;
 143             else
 144               cloop = VEC_index (loop_p, act->loop_father->superloops, depth);
 145
 146             src = LOOP_REPR (cloop);
 147           }
 148
 149         add_edge (g, src, dest)->data = e;
 150       }
 151
 152   /* Find the strongly connected components.  */
 153   graphds_scc (g, NULL);
 154
 155   /* Mark the irreducible loops.  */
 156   for_each_edge (g, check_irred);
 157
 158   free_graph (g);
 159
 160   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
 161 }
 162
 163 /* Counts number of insns inside LOOP.  */
 164 int
 165 num_loop_insns (const struct loop *loop)
 166 {
 167   basic_block *bbs, bb;
 168   unsigned i, ninsns = 0;
 169   rtx insn;
 170
 171   bbs = get_loop_body (loop);
 172   for (i = 0; i < loop->num_nodes; i++)
 173     {
 174       bb = bbs[i];
 175       ninsns++;
 176       for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn))
 177         if (INSN_P (insn))
 178           ninsns++;
 179     }
 180   free(bbs);
 181
 182   return ninsns;
 183 }
 184
 185 /* Counts number of insns executed on average per iteration LOOP.  */
 186 int
 187 average_num_loop_insns (const struct loop *loop)
 188 {
 189   basic_block *bbs, bb;
 190   unsigned i, binsns, ninsns, ratio;
 191   rtx insn;
 192
 193   ninsns = 0;
 194   bbs = get_loop_body (loop);
 195   for (i = 0; i < loop->num_nodes; i++)
 196     {
 197       bb = bbs[i];
 198
 199       binsns = 1;
 200       for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn))
 201         if (INSN_P (insn))
 202           binsns++;
 203
 204       ratio = loop->header->frequency == 0
 205               ? BB_FREQ_MAX
 206               : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
 207       ninsns += binsns * ratio;
 208     }
 209   free(bbs);
 210
 211   ninsns /= BB_FREQ_MAX;
 212   if (!ninsns)
 213     ninsns = 1; /* To avoid division by zero.  */
 214
 215   return ninsns;
 216 }
 217
 218 /* Returns expected number of iterations of LOOP, according to
 219    measured or guessed profile.  No bounding is done on the
 220    value.  */
 221
 222 gcov_type
 223 expected_loop_iterations_unbounded (const struct loop *loop)
 224 {
 225   edge e;
 226   edge_iterator ei;
 227
 228   if (loop->latch->count || loop->header->count)
 229     {
 230       gcov_type count_in, count_latch, expected;
 231
 232       count_in = 0;
 233       count_latch = 0;
 234
 235       FOR_EACH_EDGE (e, ei, loop->header->preds)
 236         if (e->src == loop->latch)
 237           count_latch = e->count;
 238         else
 239           count_in += e->count;
 240
 241       if (count_in == 0)
 242         expected = count_latch * 2;
 243       else
 244         expected = (count_latch + count_in - 1) / count_in;
 245
 246       return expected;
 247     }
 248   else
 249     {
 250       int freq_in, freq_latch;
 251
 252       freq_in = 0;
 253       freq_latch = 0;
 254
 255       FOR_EACH_EDGE (e, ei, loop->header->preds)
 256         if (e->src == loop->latch)
 257           freq_latch = EDGE_FREQUENCY (e);
 258         else
 259           freq_in += EDGE_FREQUENCY (e);
 260
 261       if (freq_in == 0)
 262         return freq_latch * 2;
 263
 264       return (freq_latch + freq_in - 1) / freq_in;
 265     }
 266 }
 267
 268 /* Returns expected number of LOOP iterations.  The returned value is bounded
 269    by REG_BR_PROB_BASE.  */
 270
 271 unsigned
 272 expected_loop_iterations (const struct loop *loop)
 273 {
 274   gcov_type expected = expected_loop_iterations_unbounded (loop);
 275   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
 276 }
 277
 278 /* Returns the maximum level of nesting of subloops of LOOP.  */
 279
 280 unsigned
 281 get_loop_level (const struct loop *loop)
 282 {
 283   const struct loop *ploop;
 284   unsigned mx = 0, l;
 285
 286   for (ploop = loop->inner; ploop; ploop = ploop->next)
 287     {
 288       l = get_loop_level (ploop);
 289       if (l >= mx)
 290         mx = l + 1;
 291     }
 292   return mx;
 293 }
 294
 295 /* Returns estimate on cost of computing SEQ.  */
 296
 297 static unsigned
 298 seq_cost (const_rtx seq, bool speed)
 299 {
 300   unsigned cost = 0;
 301   rtx set;
 302
 303   for (; seq; seq = NEXT_INSN (seq))
 304     {
 305       set = single_set (seq);
 306       if (set)
 307         cost += rtx_cost (set, SET, speed);
 308       else
 309         cost++;
 310     }
 311
 312   return cost;
 313 }
 314
 315 /* The properties of the target.  */
 316
 317 unsigned target_avail_regs;     /* Number of available registers.  */
 318 unsigned target_res_regs;       /* Number of registers reserved for temporary
 319                                    expressions.  */
 320 unsigned target_reg_cost[2];    /* The cost for register when there still
 321                                    is some reserve, but we are approaching
 322                                    the number of available registers.  */
 323 unsigned target_spill_cost[2];  /* The cost for register when we need
 324                                    to spill.  */
 325
 326 /* Initialize the constants for computing set costs.  */
 327
 328 void
 329 init_set_costs (void)
 330 {
 331   int speed;
 332   rtx seq;
 333   rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER);
 334   rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1);
 335   rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2);
 336   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
 337   unsigned i;
 338
 339   target_avail_regs = 0;
 340   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 341     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
 342         && !fixed_regs[i])
 343       target_avail_regs++;
 344
 345   target_res_regs = 3;
 346
 347   for (speed = 0; speed < 2; speed++)
 348      {
 349       crtl->maybe_hot_insn_p = speed;
 350       /* Set up the costs for using extra registers:
 351
 352          1) If not many free registers remain, we should prefer having an
 353             additional move to decreasing the number of available registers.
 354             (TARGET_REG_COST).
 355          2) If no registers are available, we need to spill, which may require
 356             storing the old value to memory and loading it back
 357             (TARGET_SPILL_COST).  */
 358
 359       start_sequence ();
 360       emit_move_insn (reg1, reg2);
 361       seq = get_insns ();
 362       end_sequence ();
 363       target_reg_cost [speed] = seq_cost (seq, speed);
 364
 365       start_sequence ();
 366       emit_move_insn (mem, reg1);
 367       emit_move_insn (reg2, mem);
 368       seq = get_insns ();
 369       end_sequence ();
 370       target_spill_cost [speed] = seq_cost (seq, speed);
 371     }
 372   default_rtl_profile ();
 373 }
 374
 375 /* Estimates cost of increased register pressure caused by making N_NEW new
 376    registers live around the loop.  N_OLD is the number of registers live
 377    around the loop.  */
 378
 379 unsigned
 380 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed)
 381 {
 382   unsigned cost;
 383   unsigned regs_needed = n_new + n_old;
 384
 385   /* If we have enough registers, we should use them and not restrict
 386      the transformations unnecessarily.  */
 387   if (regs_needed + target_res_regs <= target_avail_regs)
 388     return 0;
 389
 390   if (regs_needed <= target_avail_regs)
 391     /* If we are close to running out of registers, try to preserve
 392        them.  */
 393     cost = target_reg_cost [speed] * n_new;
 394   else
 395     /* If we run out of registers, it is very expensive to add another
 396        one.  */
 397     cost = target_spill_cost [speed] * n_new;
 398
 399   if (optimize && flag_ira && (flag_ira_algorithm == IRA_ALGORITHM_REGIONAL
 400                                || flag_ira_algorithm == IRA_ALGORITHM_MIXED)
 401       && number_of_loops () <= (unsigned) IRA_MAX_LOOPS_NUM)
 402     /* IRA regional allocation deals with high register pressure
 403        better.  So decrease the cost (to do more accurate the cost
 404        calculation for IRA, we need to know how many registers lives
 405        through the loop transparently).  */
 406     cost /= 2;
 407
 408   return cost;
 409 }
 410
 411 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
 412
 413 void
 414 mark_loop_exit_edges (void)
 415 {
 416   basic_block bb;
 417   edge e;
 418
 419   if (number_of_loops () <= 1)
 420     return;
 421
 422   FOR_EACH_BB (bb)
 423     {
 424       edge_iterator ei;
 425
 426       FOR_EACH_EDGE (e, ei, bb->succs)
 427         {
 428           if (loop_outer (bb->loop_father)
 429               && loop_exit_edge_p (bb->loop_father, e))
 430             e->flags |= EDGE_LOOP_EXIT;
 431           else
 432             e->flags &= ~EDGE_LOOP_EXIT;
 433         }
 434     }
 435 }
 436