gcc/modulo-sched.c

   1 /* Swing Modulo Scheduling implementation.
   2    Copyright (C) 2004, 2005
   3    Free Software Foundation, Inc.
   4    Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tm_p.h"
  31 #include "hard-reg-set.h"
  32 #include "regs.h"
  33 #include "function.h"
  34 #include "flags.h"
  35 #include "insn-config.h"
  36 #include "insn-attr.h"
  37 #include "except.h"
  38 #include "toplev.h"
  39 #include "recog.h"
  40 #include "sched-int.h"
  41 #include "target.h"
  42 #include "cfglayout.h"
  43 #include "cfgloop.h"
  44 #include "cfghooks.h"
  45 #include "expr.h"
  46 #include "params.h"
  47 #include "gcov-io.h"
  48 #include "df.h"
  49 #include "ddg.h"
  50
  51 #ifdef INSN_SCHEDULING
  52
  53 /* This file contains the implementation of the Swing Modulo Scheduler,
  54    described in the following references:
  55    [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
  56        Lifetime--sensitive modulo scheduling in a production environment.
  57        IEEE Trans. on Comps., 50(3), March 2001
  58    [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
  59        Swing Modulo Scheduling: A Lifetime Sensitive Approach.
  60        PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
  61
  62    The basic structure is:
  63    1. Build a data-dependence graph (DDG) for each loop.
  64    2. Use the DDG to order the insns of a loop (not in topological order
  65       necessarily, but rather) trying to place each insn after all its
  66       predecessors _or_ after all its successors.
  67    3. Compute MII: a lower bound on the number of cycles to schedule the loop.
  68    4. Use the ordering to perform list-scheduling of the loop:
  69       1. Set II = MII.  We will try to schedule the loop within II cycles.
  70       2. Try to schedule the insns one by one according to the ordering.
  71          For each insn compute an interval of cycles by considering already-
  72          scheduled preds and succs (and associated latencies); try to place
  73          the insn in the cycles of this window checking for potential
  74          resource conflicts (using the DFA interface).
  75          Note: this is different from the cycle-scheduling of schedule_insns;
  76          here the insns are not scheduled monotonically top-down (nor bottom-
  77          up).
  78       3. If failed in scheduling all insns - bump II++ and try again, unless
  79          II reaches an upper bound MaxII, in which case report failure.
  80    5. If we succeeded in scheduling the loop within II cycles, we now
  81       generate prolog and epilog, decrease the counter of the loop, and
  82       perform modulo variable expansion for live ranges that span more than
  83       II cycles (i.e. use register copies to prevent a def from overwriting
  84       itself before reaching the use).
  85 */
  86
  87 \f
  88 /* This page defines partial-schedule structures and functions for
  89    modulo scheduling.  */
  90
  91 typedef struct partial_schedule *partial_schedule_ptr;
  92 typedef struct ps_insn *ps_insn_ptr;
  93
  94 /* The minimum (absolute) cycle that a node of ps was scheduled in.  */
  95 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
  96
  97 /* The maximum (absolute) cycle that a node of ps was scheduled in.  */
  98 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
  99
 100 /* Perform signed modulo, always returning a non-negative value.  */
 101 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
 102
 103 /* The number of different iterations the nodes in ps span, assuming
 104    the stage boundaries are placed efficiently.  */
 105 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
 106                              + 1 + (ps)->ii - 1) / (ps)->ii)
 107
 108 /* A single instruction in the partial schedule.  */
 109 struct ps_insn
 110 {
 111   /* The corresponding DDG_NODE.  */
 112   ddg_node_ptr node;
 113
 114   /* The (absolute) cycle in which the PS instruction is scheduled.
 115      Same as SCHED_TIME (node).  */
 116   int cycle;
 117
 118   /* The next/prev PS_INSN in the same row.  */
 119   ps_insn_ptr next_in_row,
 120               prev_in_row;
 121
 122   /* The number of nodes in the same row that come after this node.  */
 123   int row_rest_count;
 124 };
 125
 126 /* Holds the partial schedule as an array of II rows.  Each entry of the
 127    array points to a linked list of PS_INSNs, which represents the
 128    instructions that are scheduled for that row.  */
 129 struct partial_schedule
 130 {
 131   int ii;       /* Number of rows in the partial schedule.  */
 132   int history;  /* Threshold for conflict checking using DFA.  */
 133
 134   /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii).  */
 135   ps_insn_ptr *rows;
 136
 137   /* The earliest absolute cycle of an insn in the partial schedule.  */
 138   int min_cycle;
 139
 140   /* The latest absolute cycle of an insn in the partial schedule.  */
 141   int max_cycle;
 142
 143   ddg_ptr g;    /* The DDG of the insns in the partial schedule.  */
 144 };
 145
 146 /* We use this to record all the register replacements we do in
 147    the kernel so we can undo SMS if it is not profitable.  */
 148 struct undo_replace_buff_elem
 149 {
 150   rtx insn;
 151   rtx orig_reg;
 152   rtx new_reg;
 153   struct undo_replace_buff_elem *next;
 154 };
 155
 156
 157
 158 partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
 159 void free_partial_schedule (partial_schedule_ptr);
 160 void reset_partial_schedule (partial_schedule_ptr, int new_ii);
 161 void print_partial_schedule (partial_schedule_ptr, FILE *);
 162 static int kernel_number_of_cycles (rtx first_insn, rtx last_insn);
 163 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
 164                                                 ddg_node_ptr node, int cycle,
 165                                                 sbitmap must_precede,
 166                                                 sbitmap must_follow);
 167 static void rotate_partial_schedule (partial_schedule_ptr, int);
 168 void set_row_column_for_ps (partial_schedule_ptr);
 169 static bool ps_unschedule_node (partial_schedule_ptr, ddg_node_ptr );
 170
 171 \f
 172 /* This page defines constants and structures for the modulo scheduling
 173    driver.  */
 174
 175 /* As in haifa-sched.c:  */
 176 /* issue_rate is the number of insns that can be scheduled in the same
 177    machine cycle.  It can be defined in the config/mach/mach.h file,
 178    otherwise we set it to 1.  */
 179
 180 static int issue_rate;
 181
 182 /* For printing statistics.  */
 183 static FILE *stats_file;
 184
 185 static int sms_order_nodes (ddg_ptr, int, int * result);
 186 static void set_node_sched_params (ddg_ptr);
 187 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int,
 188                                                    int *, FILE*);
 189 static void permute_partial_schedule (partial_schedule_ptr ps, rtx last);
 190 static void generate_prolog_epilog (partial_schedule_ptr ,struct loop * loop, rtx);
 191 static void duplicate_insns_of_cycles (partial_schedule_ptr ps,
 192                                        int from_stage, int to_stage,
 193                                        int is_prolog);
 194
 195 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
 196 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
 197 #define SCHED_FIRST_REG_MOVE(x) \
 198         (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
 199 #define SCHED_NREG_MOVES(x) \
 200         (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
 201 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
 202 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
 203 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
 204
 205 /* The scheduling parameters held for each node.  */
 206 typedef struct node_sched_params
 207 {
 208   int asap;     /* A lower-bound on the absolute scheduling cycle.  */
 209   int time;     /* The absolute scheduling cycle (time >= asap).  */
 210
 211   /* The following field (first_reg_move) is a pointer to the first
 212      register-move instruction added to handle the modulo-variable-expansion
 213      of the register defined by this node.  This register-move copies the
 214      original register defined by the node.  */
 215   rtx first_reg_move;
 216
 217   /* The number of register-move instructions added, immediately preceding
 218      first_reg_move.  */
 219   int nreg_moves;
 220
 221   int row;    /* Holds time % ii.  */
 222   int stage;  /* Holds time / ii.  */
 223
 224   /* The column of a node inside the ps.  If nodes u, v are on the same row,
 225      u will precede v if column (u) < column (v).  */
 226   int column;
 227 } *node_sched_params_ptr;
 228
 229 \f
 230 /* The following three functions are copied from the current scheduler
 231    code in order to use sched_analyze() for computing the dependencies.
 232    They are used when initializing the sched_info structure.  */
 233 static const char *
 234 sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
 235 {
 236   static char tmp[80];
 237
 238   sprintf (tmp, "i%4d", INSN_UID (insn));
 239   return tmp;
 240 }
 241
 242 static int
 243 contributes_to_priority (rtx next, rtx insn)
 244 {
 245   return BLOCK_NUM (next) == BLOCK_NUM (insn);
 246 }
 247
 248 static void
 249 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
 250                                regset cond_exec ATTRIBUTE_UNUSED,
 251                                regset used ATTRIBUTE_UNUSED,
 252                                regset set ATTRIBUTE_UNUSED)
 253 {
 254 }
 255
 256 static struct sched_info sms_sched_info =
 257 {
 258   NULL,
 259   NULL,
 260   NULL,
 261   NULL,
 262   NULL,
 263   sms_print_insn,
 264   contributes_to_priority,
 265   compute_jump_reg_dependencies,
 266   NULL, NULL,
 267   NULL, NULL,
 268   0, 0, 0
 269 };
 270
 271
 272 /* Return the register decremented and tested or zero if it is not a decrement
 273    and branch jump insn (similar to doloop_condition_get).  */
 274 static rtx
 275 doloop_register_get (rtx insn, rtx *comp)
 276 {
 277   rtx pattern, cmp, inc, reg, condition;
 278
 279   if (!JUMP_P (insn))
 280     return NULL_RTX;
 281   pattern = PATTERN (insn);
 282
 283   /* The canonical doloop pattern we expect is:
 284
 285      (parallel [(set (pc) (if_then_else (condition)
 286                                         (label_ref (label))
 287                                         (pc)))
 288                 (set (reg) (plus (reg) (const_int -1)))
 289                 (additional clobbers and uses)])
 290
 291     where condition is further restricted to be
 292       (ne (reg) (const_int 1)).  */
 293
 294   if (GET_CODE (pattern) != PARALLEL)
 295     return NULL_RTX;
 296
 297   cmp = XVECEXP (pattern, 0, 0);
 298   inc = XVECEXP (pattern, 0, 1);
 299   /* Return the compare rtx.  */
 300   *comp = cmp;
 301
 302   /* Check for (set (reg) (something)).  */
 303   if (GET_CODE (inc) != SET || ! REG_P (SET_DEST (inc)))
 304     return NULL_RTX;
 305
 306   /* Extract loop counter register.  */
 307   reg = SET_DEST (inc);
 308
 309   /* Check if something = (plus (reg) (const_int -1)).  */
 310   if (GET_CODE (SET_SRC (inc)) != PLUS
 311       || XEXP (SET_SRC (inc), 0) != reg
 312       || XEXP (SET_SRC (inc), 1) != constm1_rtx)
 313     return NULL_RTX;
 314
 315   /* Check for (set (pc) (if_then_else (condition)
 316                                        (label_ref (label))
 317                                        (pc))).  */
 318   if (GET_CODE (cmp) != SET
 319       || SET_DEST (cmp) != pc_rtx
 320       || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
 321       || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
 322       || XEXP (SET_SRC (cmp), 2) != pc_rtx)
 323     return NULL_RTX;
 324
 325   /* Extract loop termination condition.  */
 326   condition = XEXP (SET_SRC (cmp), 0);
 327
 328   /* Check if condition = (ne (reg) (const_int 1)), which is more
 329      restrictive than the check in doloop_condition_get:
 330      if ((GET_CODE (condition) != GE && GET_CODE (condition) != NE)
 331          || GET_CODE (XEXP (condition, 1)) != CONST_INT).  */
 332   if (GET_CODE (condition) != NE
 333       || XEXP (condition, 1) != const1_rtx)
 334     return NULL_RTX;
 335
 336   if (XEXP (condition, 0) == reg)
 337     return reg;
 338
 339   return NULL_RTX;
 340 }
 341
 342 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
 343    that the number of iterations is a compile-time constant.  If so,
 344    return the rtx that sets COUNT_REG to a constant, and set COUNT to
 345    this constant.  Otherwise return 0.  */
 346 static rtx
 347 const_iteration_count (rtx count_reg, basic_block pre_header,
 348                        HOST_WIDEST_INT * count)
 349 {
 350   rtx insn;
 351   rtx head, tail;
 352
 353   if (! pre_header)
 354     return NULL_RTX;
 355
 356   get_block_head_tail (pre_header->index, &head, &tail);
 357
 358   for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
 359     if (INSN_P (insn) && single_set (insn) &&
 360         rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
 361       {
 362         rtx pat = single_set (insn);
 363
 364         if (GET_CODE (SET_SRC (pat)) == CONST_INT)
 365           {
 366             *count = INTVAL (SET_SRC (pat));
 367             return insn;
 368           }
 369
 370         return NULL_RTX;
 371       }
 372
 373   return NULL_RTX;
 374 }
 375
 376 /* A very simple resource-based lower bound on the initiation interval.
 377    ??? Improve the accuracy of this bound by considering the
 378    utilization of various units.  */
 379 static int
 380 res_MII (ddg_ptr g)
 381 {
 382   return (g->num_nodes / issue_rate);
 383 }
 384
 385
 386 /* Points to the array that contains the sched data for each node.  */
 387 static node_sched_params_ptr node_sched_params;
 388
 389 /* Allocate sched_params for each node and initialize it.  Assumes that
 390    the aux field of each node contain the asap bound (computed earlier),
 391    and copies it into the sched_params field.  */
 392 static void
 393 set_node_sched_params (ddg_ptr g)
 394 {
 395   int i;
 396
 397   /* Allocate for each node in the DDG a place to hold the "sched_data".  */
 398   /* Initialize ASAP/ALAP/HIGHT to zero.  */
 399   node_sched_params = (node_sched_params_ptr)
 400                        xcalloc (g->num_nodes,
 401                                 sizeof (struct node_sched_params));
 402
 403   /* Set the pointer of the general data of the node to point to the
 404      appropriate sched_params structure.  */
 405   for (i = 0; i < g->num_nodes; i++)
 406     {
 407       /* Watch out for aliasing problems?  */
 408       node_sched_params[i].asap = g->nodes[i].aux.count;
 409       g->nodes[i].aux.info = &node_sched_params[i];
 410     }
 411 }
 412
 413 static void
 414 print_node_sched_params (FILE * dump_file, int num_nodes)
 415 {
 416   int i;
 417
 418   if (! dump_file)
 419     return;
 420   for (i = 0; i < num_nodes; i++)
 421     {
 422       node_sched_params_ptr nsp = &node_sched_params[i];
 423       rtx reg_move = nsp->first_reg_move;
 424       int j;
 425
 426       fprintf (dump_file, "Node %d:\n", i);
 427       fprintf (dump_file, " asap = %d:\n", nsp->asap);
 428       fprintf (dump_file, " time = %d:\n", nsp->time);
 429       fprintf (dump_file, " nreg_moves = %d:\n", nsp->nreg_moves);
 430       for (j = 0; j < nsp->nreg_moves; j++)
 431         {
 432           fprintf (dump_file, " reg_move = ");
 433           print_rtl_single (dump_file, reg_move);
 434           reg_move = PREV_INSN (reg_move);
 435         }
 436     }
 437 }
 438
 439 /* Calculate an upper bound for II.  SMS should not schedule the loop if it
 440    requires more cycles than this bound.  Currently set to the sum of the
 441    longest latency edge for each node.  Reset based on experiments.  */
 442 static int
 443 calculate_maxii (ddg_ptr g)
 444 {
 445   int i;
 446   int maxii = 0;
 447
 448   for (i = 0; i < g->num_nodes; i++)
 449     {
 450       ddg_node_ptr u = &g->nodes[i];
 451       ddg_edge_ptr e;
 452       int max_edge_latency = 0;
 453
 454       for (e = u->out; e; e = e->next_out)
 455         max_edge_latency = MAX (max_edge_latency, e->latency);
 456
 457       maxii += max_edge_latency;
 458     }
 459   return maxii;
 460 }
 461
 462 /*
 463    Breaking intra-loop register anti-dependences:
 464    Each intra-loop register anti-dependence implies a cross-iteration true
 465    dependence of distance 1. Therefore, we can remove such false dependencies
 466    and figure out if the partial schedule broke them by checking if (for a
 467    true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
 468    if so generate a register move.   The number of such moves is equal to:
 469               SCHED_TIME (use) - SCHED_TIME (def)       { 0 broken
 470    nreg_moves = ----------------------------------- + 1 - {   dependence.
 471                             ii                          { 1 if not.
 472 */
 473 static struct undo_replace_buff_elem *
 474 generate_reg_moves (partial_schedule_ptr ps)
 475 {
 476   ddg_ptr g = ps->g;
 477   int ii = ps->ii;
 478   int i;
 479   struct undo_replace_buff_elem *reg_move_replaces = NULL;
 480
 481   for (i = 0; i < g->num_nodes; i++)
 482     {
 483       ddg_node_ptr u = &g->nodes[i];
 484       ddg_edge_ptr e;
 485       int nreg_moves = 0, i_reg_move;
 486       sbitmap *uses_of_defs;
 487       rtx last_reg_move;
 488       rtx prev_reg, old_reg;
 489
 490       /* Compute the number of reg_moves needed for u, by looking at life
 491          ranges started at u (excluding self-loops).  */
 492       for (e = u->out; e; e = e->next_out)
 493         if (e->type == TRUE_DEP && e->dest != e->src)
 494           {
 495             int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
 496
 497             if (e->distance == 1)
 498               nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
 499
 500             /* If dest precedes src in the schedule of the kernel, then dest
 501                will read before src writes and we can save one reg_copy.  */
 502             if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
 503                 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
 504               nreg_moves4e--;
 505
 506             nreg_moves = MAX (nreg_moves, nreg_moves4e);
 507           }
 508
 509       if (nreg_moves == 0)
 510         continue;
 511
 512       /* Every use of the register defined by node may require a different
 513          copy of this register, depending on the time the use is scheduled.
 514          Set a bitmap vector, telling which nodes use each copy of this
 515          register.  */
 516       uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
 517       sbitmap_vector_zero (uses_of_defs, nreg_moves);
 518       for (e = u->out; e; e = e->next_out)
 519         if (e->type == TRUE_DEP && e->dest != e->src)
 520           {
 521             int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
 522
 523             if (e->distance == 1)
 524               dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
 525
 526             if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
 527                 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
 528               dest_copy--;
 529
 530             if (dest_copy)
 531               SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
 532           }
 533
 534       /* Now generate the reg_moves, attaching relevant uses to them.  */
 535       SCHED_NREG_MOVES (u) = nreg_moves;
 536       old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
 537       last_reg_move = u->insn;
 538
 539       for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
 540         {
 541           int i_use;
 542           rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
 543           rtx reg_move = gen_move_insn (new_reg, prev_reg);
 544
 545           add_insn_before (reg_move, last_reg_move);
 546           last_reg_move = reg_move;
 547
 548           if (!SCHED_FIRST_REG_MOVE (u))
 549             SCHED_FIRST_REG_MOVE (u) = reg_move;
 550
 551           EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use,
 552             {
 553               struct undo_replace_buff_elem *rep;
 554
 555               rep = (struct undo_replace_buff_elem *)
 556                     xcalloc (1, sizeof (struct undo_replace_buff_elem));
 557               rep->insn = g->nodes[i_use].insn;
 558               rep->orig_reg = old_reg;
 559               rep->new_reg = new_reg;
 560
 561               if (! reg_move_replaces)
 562                 reg_move_replaces = rep;
 563               else
 564                 {
 565                   rep->next = reg_move_replaces;
 566                   reg_move_replaces = rep;
 567                 }
 568
 569               replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
 570             });
 571
 572           prev_reg = new_reg;
 573         }
 574     }
 575   return reg_move_replaces;
 576 }
 577
 578 /* We call this when we want to undo the SMS schedule for a given loop.
 579    One of the things that we do is to delete the register moves generated
 580    for the sake of SMS; this function deletes the register move instructions
 581    recorded in the undo buffer.  */
 582 static void
 583 undo_generate_reg_moves (partial_schedule_ptr ps,
 584                          struct undo_replace_buff_elem *reg_move_replaces)
 585 {
 586   int i,j;
 587
 588   for (i = 0; i < ps->g->num_nodes; i++)
 589     {
 590       ddg_node_ptr u = &ps->g->nodes[i];
 591       rtx prev;
 592       rtx crr = SCHED_FIRST_REG_MOVE (u);
 593
 594       for (j = 0; j < SCHED_NREG_MOVES (u); j++)
 595         {
 596           prev = PREV_INSN (crr);
 597           delete_insn (crr);
 598           crr = prev;
 599         }
 600       SCHED_FIRST_REG_MOVE (u) = NULL_RTX;
 601     }
 602
 603   while (reg_move_replaces)
 604     {
 605       struct undo_replace_buff_elem *rep = reg_move_replaces;
 606
 607       reg_move_replaces = reg_move_replaces->next;
 608       replace_rtx (rep->insn, rep->new_reg, rep->orig_reg);
 609     }
 610 }
 611
 612 /* Free memory allocated for the undo buffer.  */
 613 static void
 614 free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
 615 {
 616
 617   while (reg_move_replaces)
 618     {
 619       struct undo_replace_buff_elem *rep = reg_move_replaces;
 620
 621       reg_move_replaces = reg_move_replaces->next;
 622       free (rep);
 623     }
 624 }
 625
 626 /* Bump the SCHED_TIMEs of all nodes to start from zero.  Set the values
 627    of SCHED_ROW and SCHED_STAGE.  */
 628 static void
 629 normalize_sched_times (partial_schedule_ptr ps)
 630 {
 631   int i;
 632   ddg_ptr g = ps->g;
 633   int amount = PS_MIN_CYCLE (ps);
 634   int ii = ps->ii;
 635
 636   /* Don't include the closing branch assuming that it is the last node.  */
 637   for (i = 0; i < g->num_nodes - 1; i++)
 638     {
 639       ddg_node_ptr u = &g->nodes[i];
 640       int normalized_time = SCHED_TIME (u) - amount;
 641
 642       gcc_assert (normalized_time >= 0);
 643
 644       SCHED_TIME (u) = normalized_time;
 645       SCHED_ROW (u) = normalized_time % ii;
 646       SCHED_STAGE (u) = normalized_time / ii;
 647     }
 648 }
 649
 650 /* Set SCHED_COLUMN of each node according to its position in PS.  */
 651 static void
 652 set_columns_for_ps (partial_schedule_ptr ps)
 653 {
 654   int row;
 655
 656   for (row = 0; row < ps->ii; row++)
 657     {
 658       ps_insn_ptr cur_insn = ps->rows[row];
 659       int column = 0;
 660
 661       for (; cur_insn; cur_insn = cur_insn->next_in_row)
 662         SCHED_COLUMN (cur_insn->node) = column++;
 663     }
 664 }
 665
 666 /* Permute the insns according to their order in PS, from row 0 to
 667    row ii-1, and position them right before LAST.  This schedules
 668    the insns of the loop kernel.  */
 669 static void
 670 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
 671 {
 672   int ii = ps->ii;
 673   int row;
 674   ps_insn_ptr ps_ij;
 675
 676   for (row = 0; row < ii ; row++)
 677     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
 678       if (PREV_INSN (last) != ps_ij->node->insn)
 679         reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
 680                             PREV_INSN (last));
 681 }
 682
 683 /* As part of undoing SMS we return to the original ordering of the
 684    instructions inside the loop kernel.  Given the partial schedule PS, this
 685    function returns the ordering of the instruction according to their CUID
 686    in the DDG (PS->G), which is the original order of the instruction before
 687    performing SMS.  */
 688 static void
 689 undo_permute_partial_schedule (partial_schedule_ptr ps, rtx last)
 690 {
 691   int i;
 692
 693   for (i = 0 ; i < ps->g->num_nodes; i++)
 694     if (last == ps->g->nodes[i].insn
 695         || last == ps->g->nodes[i].first_note)
 696       break;
 697     else if (PREV_INSN (last) != ps->g->nodes[i].insn)
 698       reorder_insns_nobb (ps->g->nodes[i].first_note, ps->g->nodes[i].insn,
 699                           PREV_INSN (last));
 700 }
 701
 702 /* Used to generate the prologue & epilogue.  Duplicate the subset of
 703    nodes whose stages are between FROM_STAGE and TO_STAGE (inclusive
 704    of both), together with a prefix/suffix of their reg_moves.  */
 705 static void
 706 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
 707                            int to_stage, int for_prolog)
 708 {
 709   int row;
 710   ps_insn_ptr ps_ij;
 711
 712   for (row = 0; row < ps->ii; row++)
 713     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
 714       {
 715         ddg_node_ptr u_node = ps_ij->node;
 716         int j, i_reg_moves;
 717         rtx reg_move = NULL_RTX;
 718
 719         if (for_prolog)
 720           {
 721             /* SCHED_STAGE (u_node) >= from_stage == 0.  Generate increasing
 722                number of reg_moves starting with the second occurrence of
 723                u_node, which is generated if its SCHED_STAGE <= to_stage.  */
 724             i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
 725             i_reg_moves = MAX (i_reg_moves, 0);
 726             i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
 727
 728             /* The reg_moves start from the *first* reg_move backwards.  */
 729             if (i_reg_moves)
 730               {
 731                 reg_move = SCHED_FIRST_REG_MOVE (u_node);
 732                 for (j = 1; j < i_reg_moves; j++)
 733                   reg_move = PREV_INSN (reg_move);
 734               }
 735           }
 736         else /* It's for the epilog.  */
 737           {
 738             /* SCHED_STAGE (u_node) <= to_stage.  Generate all reg_moves,
 739                starting to decrease one stage after u_node no longer occurs;
 740                that is, generate all reg_moves until
 741                SCHED_STAGE (u_node) == from_stage - 1.  */
 742             i_reg_moves = SCHED_NREG_MOVES (u_node)
 743                        - (from_stage - SCHED_STAGE (u_node) - 1);
 744             i_reg_moves = MAX (i_reg_moves, 0);
 745             i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
 746
 747             /* The reg_moves start from the *last* reg_move forwards.  */
 748             if (i_reg_moves)
 749               {
 750                 reg_move = SCHED_FIRST_REG_MOVE (u_node);
 751                 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
 752                   reg_move = PREV_INSN (reg_move);
 753               }
 754           }
 755
 756         for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
 757           emit_insn (copy_rtx (PATTERN (reg_move)));
 758         if (SCHED_STAGE (u_node) >= from_stage
 759             && SCHED_STAGE (u_node) <= to_stage)
 760           duplicate_insn_chain (u_node->first_note, u_node->insn);
 761       }
 762 }
 763
 764
 765 /* Generate the instructions (including reg_moves) for prolog & epilog.  */
 766 static void
 767 generate_prolog_epilog (partial_schedule_ptr ps, struct loop * loop, rtx count_reg)
 768 {
 769   int i;
 770   int last_stage = PS_STAGE_COUNT (ps) - 1;
 771   edge e;
 772
 773   /* Generate the prolog, inserting its insns on the loop-entry edge.  */
 774   start_sequence ();
 775
 776   if (count_reg)
 777    /* Generate a subtract instruction at the beginning of the prolog to
 778       adjust the loop count by STAGE_COUNT.  */
 779    emit_insn (gen_sub2_insn (count_reg, GEN_INT (last_stage)));
 780
 781   for (i = 0; i < last_stage; i++)
 782     duplicate_insns_of_cycles (ps, 0, i, 1);
 783
 784   /* Put the prolog ,  on the one and only entry edge.  */
 785   e = loop_preheader_edge (loop);
 786   loop_split_edge_with(e , get_insns());
 787
 788   end_sequence ();
 789
 790   /* Generate the epilog, inserting its insns on the loop-exit edge.  */
 791   start_sequence ();
 792
 793   for (i = 0; i < last_stage; i++)
 794     duplicate_insns_of_cycles (ps, i + 1, last_stage, 0);
 795
 796   /* Put the epilogue on the one and only one exit edge.  */
 797   gcc_assert (loop->single_exit);
 798   e = loop->single_exit;
 799   loop_split_edge_with(e , get_insns());
 800   end_sequence ();
 801 }
 802
 803 /* Return the line note insn preceding INSN, for debugging.  Taken from
 804    emit-rtl.c.  */
 805 static rtx
 806 find_line_note (rtx insn)
 807 {
 808   for (; insn; insn = PREV_INSN (insn))
 809     if (NOTE_P (insn)
 810         && NOTE_LINE_NUMBER (insn) >= 0)
 811       break;
 812
 813   return insn;
 814 }
 815
 816 /* Return true if all the BBs of the loop are empty except the
 817    loop header.  */
 818 static bool
 819 loop_single_full_bb_p (struct loop *loop)
 820 {
 821   unsigned i;
 822   basic_block *bbs = get_loop_body (loop);
 823
 824   for (i = 0; i < loop->num_nodes ; i++)
 825     {
 826       rtx head, tail;
 827       bool empty_bb = true;
 828
 829       if (bbs[i] == loop->header)
 830         continue;
 831
 832       /* Make sure that basic blocks other than the header
 833          have only notes labels or jumps.  */
 834       get_block_head_tail (bbs[i]->index, &head, &tail);
 835       for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
 836         {
 837           if (NOTE_P (head) || LABEL_P (head)
 838               || (INSN_P (head) && JUMP_P (head)))
 839             continue;
 840           empty_bb = false;
 841           break;
 842         }
 843
 844       if (! empty_bb)
 845         {
 846           free (bbs);
 847           return false;
 848         }
 849     }
 850   free (bbs);
 851   return true;
 852 }
 853
 854 /* A simple loop from SMS point of view; it is a loop that is composed of
 855    either a single basic block or two BBs - a header and a latch.  */
 856 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 )                     \
 857                                   && (EDGE_COUNT (loop->latch->preds) == 1) \
 858                                   && (EDGE_COUNT (loop->latch->succs) == 1))
 859
 860 /* Return true if the loop is in its canonical form and false if not.
 861    i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit.  */
 862 static bool
 863 loop_canon_p (struct loop *loop, FILE *dump_file)
 864 {
 865
 866   if (loop->inner || ! loop->outer)
 867     return false;
 868
 869   if (!loop->single_exit)
 870     {
 871       if (dump_file)
 872         {
 873           rtx line_note = find_line_note (BB_END (loop->header));
 874
 875           fprintf (dump_file, "SMS loop many exits ");
 876           if (line_note)
 877             {
 878               expanded_location xloc;
 879               NOTE_EXPANDED_LOCATION (xloc, line_note);
 880               fprintf (stats_file, " %s %d (file, line)\n",
 881                        xloc.file, xloc.line);
 882             }
 883         }
 884       return false;
 885     }
 886
 887   if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
 888     {
 889       if (dump_file)
 890         {
 891           rtx line_note = find_line_note (BB_END (loop->header));
 892
 893           fprintf (dump_file, "SMS loop many BBs. ");
 894           if (line_note)
 895             {
 896               expanded_location xloc;
 897               NOTE_EXPANDED_LOCATION (xloc, line_note);
 898               fprintf (stats_file, " %s %d (file, line)\n",
 899                        xloc.file, xloc.line);
 900             }
 901         }
 902       return false;
 903     }
 904
 905     return true;
 906 }
 907
 908 /* If there are more than one entry for the loop,
 909    make it one by splitting the first entry edge and
 910    redirecting the others to the new BB.  */
 911 static void
 912 canon_loop (struct loop *loop)
 913 {
 914   edge e;
 915   edge_iterator i;
 916
 917   /* Avoid annoying special cases of edges going to exit
 918      block.  */
 919   FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
 920     if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
 921       loop_split_edge_with (e, NULL_RTX);
 922
 923   if (loop->latch == loop->header
 924       || EDGE_COUNT (loop->latch->succs) > 1)
 925     {
 926       FOR_EACH_EDGE (e, i, loop->header->preds)
 927         if (e->src == loop->latch)
 928           break;
 929       loop_split_edge_with (e, NULL_RTX);
 930     }
 931 }
 932
 933 /* Build the loop information without loop
 934    canonization, the loop canonization will
 935    be performed if the loop is SMSable.  */
 936 static struct loops *
 937 build_loops_structure (FILE *dumpfile)
 938 {
 939   struct loops *loops = xcalloc (1, sizeof (struct loops));
 940
 941   /* Find the loops.  */
 942
 943   if (flow_loops_find (loops) <= 1)
 944     {
 945       /* No loops.  */
 946       flow_loops_free (loops);
 947       free (loops);
 948
 949       return NULL;
 950     }
 951
 952   /* Not going to update these.  */
 953   free (loops->cfg.rc_order);
 954   loops->cfg.rc_order = NULL;
 955   free (loops->cfg.dfs_order);
 956   loops->cfg.dfs_order = NULL;
 957
 958   create_preheaders (loops, CP_SIMPLE_PREHEADERS);
 959   mark_single_exit_loops (loops);
 960   /* Dump loops.  */
 961   flow_loops_dump (loops, dumpfile, NULL, 1);
 962
 963 #ifdef ENABLE_CHECKING
 964   verify_dominators (CDI_DOMINATORS);
 965   verify_loop_structure (loops);
 966 #endif
 967
 968   return loops;
 969 }
 970
 971 /* Main entry point, perform SMS scheduling on the loops of the function
 972    that consist of single basic blocks.  */
 973 void
 974 sms_schedule (FILE *dump_file)
 975 {
 976   static int passes = 0;
 977   rtx insn;
 978   ddg_ptr *g_arr, g;
 979   int * node_order;
 980   int maxii;
 981   unsigned i,num_loops;
 982   partial_schedule_ptr ps;
 983   struct df *df;
 984   struct loops *loops;
 985   basic_block bb = NULL;
 986   /* vars to the versioning only if needed*/
 987   struct loop * nloop;
 988   basic_block condition_bb = NULL;
 989   edge latch_edge;
 990   gcov_type trip_count = 0;
 991
 992   if (! (loops = build_loops_structure (dump_file)))
 993     return;  /* There is no loops to schedule.  */
 994
 995
 996   stats_file = dump_file;
 997
 998   /* Initialize issue_rate.  */
 999   if (targetm.sched.issue_rate)
1000     {
1001       int temp = reload_completed;
1002
1003       reload_completed = 1;
1004       issue_rate = targetm.sched.issue_rate ();
1005       reload_completed = temp;
1006     }
1007   else
1008     issue_rate = 1;
1009
1010   /* Initialize the scheduler.  */
1011   current_sched_info = &sms_sched_info;
1012   sched_init (NULL);
1013
1014   /* Init Data Flow analysis, to be used in interloop dep calculation.  */
1015   df = df_init ();
1016   df_analyze (df, 0, DF_ALL);
1017
1018   /* Allocate memory to hold the DDG array one entry for each loop.
1019      We use loop->num as index into this array.  */
1020   g_arr = xcalloc (loops->num, sizeof (ddg_ptr));
1021
1022
1023   /* Build DDGs for all the relevant loops and hold them in G_ARR
1024      indexed by the loop index.  */
1025   for (i = 0; i < loops->num; i++)
1026     {
1027       rtx head, tail;
1028       rtx count_reg, comp;
1029       struct loop *loop = loops->parray[i];
1030
1031       /* For debugging.  */
1032       if ((passes++ > MAX_SMS_LOOP_NUMBER) && (MAX_SMS_LOOP_NUMBER != -1))
1033         {
1034           if (dump_file)
1035             fprintf (dump_file, "SMS reached MAX_PASSES... \n");
1036
1037           break;
1038         }
1039
1040       if (! loop_canon_p (loop, dump_file))
1041         continue;
1042
1043       if (! loop_single_full_bb_p (loop))
1044         continue;
1045
1046       bb = loop->header;
1047
1048       get_block_head_tail (bb->index, &head, &tail);
1049       latch_edge = loop_latch_edge (loop);
1050       gcc_assert (loop->single_exit);
1051       if (loop->single_exit->count)
1052         trip_count = latch_edge->count / loop->single_exit->count;
1053
1054       /* Perfrom SMS only on loops that their average count is above threshold.  */
1055
1056       if ( latch_edge->count
1057           && (latch_edge->count < loop->single_exit->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
1058         {
1059           if (stats_file)
1060             {
1061               rtx line_note = find_line_note (tail);
1062
1063               if (line_note)
1064                 {
1065                   expanded_location xloc;
1066                   NOTE_EXPANDED_LOCATION (xloc, line_note);
1067                   fprintf (stats_file, "SMS bb %s %d (file, line)\n",
1068                            xloc.file, xloc.line);
1069                 }
1070               fprintf (stats_file, "SMS single-bb-loop\n");
1071               if (profile_info && flag_branch_probabilities)
1072                 {
1073                   fprintf (stats_file, "SMS loop-count ");
1074                   fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1075                            (HOST_WIDEST_INT) bb->count);
1076                   fprintf (stats_file, "\n");
1077                   fprintf (stats_file, "SMS trip-count ");
1078                   fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1079                            (HOST_WIDEST_INT) trip_count);
1080                   fprintf (stats_file, "\n");
1081                   fprintf (stats_file, "SMS profile-sum-max ");
1082                   fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1083                            (HOST_WIDEST_INT) profile_info->sum_max);
1084                   fprintf (stats_file, "\n");
1085                 }
1086             }
1087           continue;
1088         }
1089
1090       /* Make sure this is a doloop.  */
1091       if ( !(count_reg = doloop_register_get (tail, &comp)))
1092         continue;
1093
1094       /* Don't handle BBs with calls or barriers, or !single_set insns.  */
1095       for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1096         if (CALL_P (insn)
1097             || BARRIER_P (insn)
1098             || (INSN_P (insn) && !JUMP_P (insn)
1099                 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1100           break;
1101
1102       if (insn != NEXT_INSN (tail))
1103         {
1104           if (stats_file)
1105             {
1106               if (CALL_P (insn))
1107                 fprintf (stats_file, "SMS loop-with-call\n");
1108               else if (BARRIER_P (insn))
1109                 fprintf (stats_file, "SMS loop-with-barrier\n");
1110               else
1111                 fprintf (stats_file, "SMS loop-with-not-single-set\n");
1112               print_rtl_single (stats_file, insn);
1113             }
1114
1115           continue;
1116         }
1117
1118       if (! (g = create_ddg (bb, df, 0)))
1119         {
1120           if (stats_file)
1121             fprintf (stats_file, "SMS doloop\n");
1122           continue;
1123         }
1124
1125       g_arr[i] = g;
1126     }
1127
1128   /* Release Data Flow analysis data structures.  */
1129   df_finish (df);
1130
1131   /* We don't want to perform SMS on new loops - created by versioning.  */
1132   num_loops = loops->num;
1133   /* Go over the built DDGs and perfrom SMS for each one of them.  */
1134   for (i = 0; i < num_loops; i++)
1135     {
1136       rtx head, tail;
1137       rtx count_reg, count_init, comp;
1138       int mii, rec_mii;
1139       unsigned stage_count = 0;
1140       HOST_WIDEST_INT loop_count = 0;
1141       struct loop *loop = loops->parray[i];
1142
1143       if (! (g = g_arr[i]))
1144         continue;
1145
1146       if (dump_file)
1147         print_ddg (dump_file, g);
1148
1149       get_block_head_tail (loop->header->index, &head, &tail);
1150
1151       latch_edge = loop_latch_edge (loop);
1152       gcc_assert (loop->single_exit);
1153       if (loop->single_exit->count)
1154         trip_count = latch_edge->count / loop->single_exit->count;
1155
1156       if (stats_file)
1157         {
1158           rtx line_note = find_line_note (tail);
1159
1160           if (line_note)
1161             {
1162               expanded_location xloc;
1163               NOTE_EXPANDED_LOCATION (xloc, line_note);
1164               fprintf (stats_file, "SMS bb %s %d (file, line)\n",
1165                        xloc.file, xloc.line);
1166             }
1167           fprintf (stats_file, "SMS single-bb-loop\n");
1168           if (profile_info && flag_branch_probabilities)
1169             {
1170               fprintf (stats_file, "SMS loop-count ");
1171               fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1172                        (HOST_WIDEST_INT) bb->count);
1173               fprintf (stats_file, "\n");
1174               fprintf (stats_file, "SMS profile-sum-max ");
1175               fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1176                        (HOST_WIDEST_INT) profile_info->sum_max);
1177               fprintf (stats_file, "\n");
1178             }
1179           fprintf (stats_file, "SMS doloop\n");
1180           fprintf (stats_file, "SMS built-ddg %d\n", g->num_nodes);
1181           fprintf (stats_file, "SMS num-loads %d\n", g->num_loads);
1182           fprintf (stats_file, "SMS num-stores %d\n", g->num_stores);
1183         }
1184
1185
1186       /* In case of th loop have doloop register it gets special
1187          handling.  */
1188       count_init = NULL_RTX;
1189       if ((count_reg = doloop_register_get (tail, &comp)))
1190         {
1191           basic_block pre_header;
1192
1193           pre_header = loop_preheader_edge (loop)->src;
1194           count_init = const_iteration_count (count_reg, pre_header,
1195                                               &loop_count);
1196         }
1197       gcc_assert (count_reg);
1198
1199       if (stats_file && count_init)
1200         {
1201           fprintf (stats_file, "SMS const-doloop ");
1202           fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1203                      loop_count);
1204           fprintf (stats_file, "\n");
1205         }
1206
1207       node_order = (int *) xmalloc (sizeof (int) * g->num_nodes);
1208
1209       mii = 1; /* Need to pass some estimate of mii.  */
1210       rec_mii = sms_order_nodes (g, mii, node_order);
1211       mii = MAX (res_MII (g), rec_mii);
1212       maxii = (calculate_maxii (g) * SMS_MAX_II_FACTOR) / 100;
1213
1214       if (stats_file)
1215         fprintf (stats_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1216                  rec_mii, mii, maxii);
1217
1218       /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1219          ASAP.  */
1220       set_node_sched_params (g);
1221
1222       ps = sms_schedule_by_order (g, mii, maxii, node_order, dump_file);
1223
1224       if (ps)
1225         stage_count = PS_STAGE_COUNT (ps);
1226
1227       /* Stage count of 1 means that there is no interleaving between
1228          iterations, let the scheduling passes do the job.  */
1229       if (stage_count < 1
1230           || (count_init && (loop_count <= stage_count))
1231           || (flag_branch_probabilities && (trip_count <= stage_count)))
1232         {
1233           if (dump_file)
1234             {
1235               fprintf (dump_file, "SMS failed... \n");
1236               fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1237               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1238               fprintf (dump_file, ", trip-count=");
1239               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1240               fprintf (dump_file, ")\n");
1241             }
1242           continue;
1243         }
1244       else
1245         {
1246           int orig_cycles = kernel_number_of_cycles (BB_HEAD (g->bb), BB_END (g->bb));
1247           int new_cycles;
1248           struct undo_replace_buff_elem *reg_move_replaces;
1249
1250           if (stats_file)
1251             {
1252               fprintf (stats_file,
1253                        "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1254                        stage_count);
1255               print_partial_schedule (ps, stats_file);
1256               fprintf (stats_file,
1257                        "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1258                        g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1259             }
1260
1261           /* Set the stage boundaries.  If the DDG is built with closing_branch_deps,
1262              the closing_branch was scheduled and should appear in the last (ii-1)
1263              row.  Otherwise, we are free to schedule the branch, and we let nodes
1264              that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1265              row; this should reduce stage_count to minimum.  */
1266           normalize_sched_times (ps);
1267           rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1268           set_columns_for_ps (ps);
1269
1270           /* Generate the kernel just to be able to measure its cycles.  */
1271           permute_partial_schedule (ps, g->closing_branch->first_note);
1272           reg_move_replaces = generate_reg_moves (ps);
1273
1274           /* Get the number of cycles the new kernel expect to execute in.  */
1275           new_cycles = kernel_number_of_cycles (BB_HEAD (g->bb), BB_END (g->bb));
1276
1277           /* Get back to the original loop so we can do loop versioning.  */
1278           undo_permute_partial_schedule (ps, g->closing_branch->first_note);
1279           if (reg_move_replaces)
1280             undo_generate_reg_moves (ps, reg_move_replaces);
1281
1282           if ( new_cycles >= orig_cycles)
1283             {
1284               /* SMS is not profitable so undo the permutation and reg move generation
1285                  and return the kernel to its original state.  */
1286               if (dump_file)
1287                 fprintf (dump_file, "Undoing SMS because it is not profitable.\n");
1288
1289             }
1290           else
1291             {
1292               canon_loop (loop);
1293
1294               /* case the BCT count is not known , Do loop-versioning */
1295               if (count_reg && ! count_init)
1296                 {
1297                   rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1298                                                  GEN_INT(stage_count));
1299
1300                   nloop = loop_version (loops, loop, comp_rtx, &condition_bb);
1301                 }
1302
1303               /* Set new iteration count of loop kernel.  */
1304               if (count_reg && count_init)
1305                 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1306                                                              - stage_count + 1);
1307
1308               /* Now apply the scheduled kernel to the RTL of the loop.  */
1309               permute_partial_schedule (ps, g->closing_branch->first_note);
1310
1311               /* Mark this loop as software pipelined so the later
1312               scheduling passes doesn't touch it.  */
1313               if (! flag_resched_modulo_sched)
1314                 g->bb->flags |= BB_DISABLE_SCHEDULE;
1315               /* The life-info is not valid any more.  */
1316               g->bb->flags |= BB_DIRTY;
1317
1318               reg_move_replaces = generate_reg_moves (ps);
1319               if (dump_file)
1320                 print_node_sched_params (dump_file, g->num_nodes);
1321               /* Generate prolog and epilog.  */
1322               if (count_reg && !count_init)
1323                 generate_prolog_epilog (ps, loop, count_reg);
1324               else
1325                 generate_prolog_epilog (ps, loop, NULL_RTX);
1326             }
1327           free_undo_replace_buff (reg_move_replaces);
1328         }
1329
1330       free_partial_schedule (ps);
1331       free (node_sched_params);
1332       free (node_order);
1333       free_ddg (g);
1334     }
1335
1336   /* Release scheduler data, needed until now because of DFA.  */
1337   sched_finish ();
1338   loop_optimizer_finalize (loops, dump_file);
1339 }
1340
1341 /* The SMS scheduling algorithm itself
1342    -----------------------------------
1343    Input: 'O' an ordered list of insns of a loop.
1344    Output: A scheduling of the loop - kernel, prolog, and epilogue.
1345
1346    'Q' is the empty Set
1347    'PS' is the partial schedule; it holds the currently scheduled nodes with
1348         their cycle/slot.
1349    'PSP' previously scheduled predecessors.
1350    'PSS' previously scheduled successors.
1351    't(u)' the cycle where u is scheduled.
1352    'l(u)' is the latency of u.
1353    'd(v,u)' is the dependence distance from v to u.
1354    'ASAP(u)' the earliest time at which u could be scheduled as computed in
1355              the node ordering phase.
1356    'check_hardware_resources_conflicts(u, PS, c)'
1357                              run a trace around cycle/slot through DFA model
1358                              to check resource conflicts involving instruction u
1359                              at cycle c given the partial schedule PS.
1360    'add_to_partial_schedule_at_time(u, PS, c)'
1361                              Add the node/instruction u to the partial schedule
1362                              PS at time c.
1363    'calculate_register_pressure(PS)'
1364                              Given a schedule of instructions, calculate the register
1365                              pressure it implies.  One implementation could be the
1366                              maximum number of overlapping live ranges.
1367    'maxRP' The maximum allowed register pressure, it is usually derived from the number
1368            registers available in the hardware.
1369
1370    1. II = MII.
1371    2. PS = empty list
1372    3. for each node u in O in pre-computed order
1373    4.   if (PSP(u) != Q && PSS(u) == Q) then
1374    5.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1375    6.     start = Early_start; end = Early_start + II - 1; step = 1
1376    11.  else if (PSP(u) == Q && PSS(u) != Q) then
1377    12.      Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1378    13.     start = Late_start; end = Late_start - II + 1; step = -1
1379    14.  else if (PSP(u) != Q && PSS(u) != Q) then
1380    15.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1381    16.     Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1382    17.     start = Early_start;
1383    18.     end = min(Early_start + II - 1 , Late_start);
1384    19.     step = 1
1385    20.     else "if (PSP(u) == Q && PSS(u) == Q)"
1386    21.    start = ASAP(u); end = start + II - 1; step = 1
1387    22.  endif
1388
1389    23.  success = false
1390    24.  for (c = start ; c != end ; c += step)
1391    25.     if check_hardware_resources_conflicts(u, PS, c) then
1392    26.       add_to_partial_schedule_at_time(u, PS, c)
1393    27.       success = true
1394    28.       break
1395    29.     endif
1396    30.  endfor
1397    31.  if (success == false) then
1398    32.    II = II + 1
1399    33.    if (II > maxII) then
1400    34.       finish - failed to schedule
1401    35.   endif
1402    36.    goto 2.
1403    37.  endif
1404    38. endfor
1405    39. if (calculate_register_pressure(PS) > maxRP) then
1406    40.    goto 32.
1407    41. endif
1408    42. compute epilogue & prologue
1409    43. finish - succeeded to schedule
1410 */
1411
1412 /* A limit on the number of cycles that resource conflicts can span.  ??? Should
1413    be provided by DFA, and be dependent on the type of insn scheduled.  Currently
1414    set to 0 to save compile time.  */
1415 #define DFA_HISTORY SMS_DFA_HISTORY
1416
1417 /* Given the partial schedule PS, this function calculates and returns the
1418    cycles in which we can schedule the node with the given index I.
1419    NOTE: Here we do the backtracking in SMS, in some special cases. We have
1420    noticed that there are several cases in which we fail    to SMS the loop
1421    because the sched window of a node is empty    due to tight data-deps. In
1422    such cases we want to unschedule    some of the predecessors/successors
1423    until we get non-empty    scheduling window.  It returns -1 if the
1424    scheduling window is empty and zero otherwise.  */
1425
1426 static int
1427 get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1428                   sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1429 {
1430   int start, step, end;
1431   ddg_edge_ptr e;
1432   int u = nodes_order [i];
1433   ddg_node_ptr u_node = &ps->g->nodes[u];
1434   sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1435   sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1436   sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1437   sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1438   int psp_not_empty;
1439   int pss_not_empty;
1440
1441   /* 1. compute sched window for u (start, end, step).  */
1442   sbitmap_zero (psp);
1443   sbitmap_zero (pss);
1444   psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1445   pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1446
1447   if (psp_not_empty && !pss_not_empty)
1448     {
1449       int early_start = INT_MIN;
1450
1451       end = INT_MAX;
1452       for (e = u_node->in; e != 0; e = e->next_in)
1453         {
1454           ddg_node_ptr v_node = e->src;
1455           if (TEST_BIT (sched_nodes, v_node->cuid))
1456             {
1457               int node_st = SCHED_TIME (v_node)
1458                             + e->latency - (e->distance * ii);
1459
1460               early_start = MAX (early_start, node_st);
1461
1462               if (e->data_type == MEM_DEP)
1463                 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1464             }
1465         }
1466       start = early_start;
1467       end = MIN (end, early_start + ii);
1468       step = 1;
1469     }
1470
1471   else if (!psp_not_empty && pss_not_empty)
1472     {
1473       int late_start = INT_MAX;
1474
1475       end = INT_MIN;
1476       for (e = u_node->out; e != 0; e = e->next_out)
1477         {
1478           ddg_node_ptr v_node = e->dest;
1479           if (TEST_BIT (sched_nodes, v_node->cuid))
1480             {
1481               late_start = MIN (late_start,
1482                                 SCHED_TIME (v_node) - e->latency
1483                                 + (e->distance * ii));
1484               if (e->data_type == MEM_DEP)
1485                 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1486             }
1487         }
1488       start = late_start;
1489       end = MAX (end, late_start - ii);
1490       step = -1;
1491     }
1492
1493   else if (psp_not_empty && pss_not_empty)
1494     {
1495       int early_start = INT_MIN;
1496       int late_start = INT_MAX;
1497
1498       start = INT_MIN;
1499       end = INT_MAX;
1500       for (e = u_node->in; e != 0; e = e->next_in)
1501         {
1502           ddg_node_ptr v_node = e->src;
1503
1504           if (TEST_BIT (sched_nodes, v_node->cuid))
1505             {
1506               early_start = MAX (early_start,
1507                                  SCHED_TIME (v_node) + e->latency
1508                                  - (e->distance * ii));
1509               if (e->data_type == MEM_DEP)
1510                 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1511             }
1512         }
1513       for (e = u_node->out; e != 0; e = e->next_out)
1514         {
1515           ddg_node_ptr v_node = e->dest;
1516
1517           if (TEST_BIT (sched_nodes, v_node->cuid))
1518             {
1519               late_start = MIN (late_start,
1520                                 SCHED_TIME (v_node) - e->latency
1521                                 + (e->distance * ii));
1522               if (e->data_type == MEM_DEP)
1523                 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1524             }
1525         }
1526       start = MAX (start, early_start);
1527       end = MIN (end, MIN (early_start + ii, late_start + 1));
1528       step = 1;
1529     }
1530   else /* psp is empty && pss is empty.  */
1531     {
1532       start = SCHED_ASAP (u_node);
1533       end = start + ii;
1534       step = 1;
1535     }
1536
1537   *start_p = start;
1538   *step_p = step;
1539   *end_p = end;
1540   sbitmap_free (psp);
1541   sbitmap_free (pss);
1542
1543   if ((start >= end && step == 1) || (start <= end && step == -1))
1544     return -1;
1545   else
1546     return 0;
1547 }
1548
1549 /* This function implements the scheduling algorithm for SMS according to the
1550    above algorithm.  */
1551 static partial_schedule_ptr
1552 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order, FILE *dump_file)
1553 {
1554   int ii = mii;
1555   int i, c, success;
1556   int try_again_with_larger_ii = true;
1557   int num_nodes = g->num_nodes;
1558   ddg_edge_ptr e;
1559   int start, end, step; /* Place together into one struct?  */
1560   sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1561   sbitmap must_precede = sbitmap_alloc (num_nodes);
1562   sbitmap must_follow = sbitmap_alloc (num_nodes);
1563   sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1564
1565   partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1566
1567   sbitmap_ones (tobe_scheduled);
1568   sbitmap_zero (sched_nodes);
1569
1570   while ((! sbitmap_equal (tobe_scheduled, sched_nodes)
1571          || try_again_with_larger_ii ) && ii < maxii)
1572     {
1573       int j;
1574       bool unscheduled_nodes = false;
1575
1576       if (dump_file)
1577         fprintf(dump_file, "Starting with ii=%d\n", ii);
1578       if (try_again_with_larger_ii)
1579         {
1580           try_again_with_larger_ii = false;
1581           sbitmap_zero (sched_nodes);
1582         }
1583
1584       for (i = 0; i < num_nodes; i++)
1585         {
1586           int u = nodes_order[i];
1587           ddg_node_ptr u_node = &ps->g->nodes[u];
1588           rtx insn = u_node->insn;
1589
1590           if (!INSN_P (insn))
1591             {
1592               RESET_BIT (tobe_scheduled, u);
1593               continue;
1594             }
1595
1596           if (JUMP_P (insn)) /* Closing branch handled later.  */
1597             {
1598               RESET_BIT (tobe_scheduled, u);
1599               continue;
1600             }
1601
1602           if (TEST_BIT (sched_nodes, u))
1603             continue;
1604
1605           /* Try to get non-empty scheduling window.  */
1606           j = i;
1607           while (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start, &step, &end) < 0
1608                  && j > 0)
1609             {
1610               unscheduled_nodes = true;
1611               if (TEST_BIT (NODE_PREDECESSORS (u_node), nodes_order[j - 1])
1612                   || TEST_BIT (NODE_SUCCESSORS (u_node), nodes_order[j - 1]))
1613                 {
1614                   ps_unschedule_node (ps, &ps->g->nodes[nodes_order[j - 1]]);
1615                   RESET_BIT (sched_nodes, nodes_order [j - 1]);
1616                 }
1617               j--;
1618             }
1619           if (j < 0)
1620             {
1621               /* ??? Try backtracking instead of immediately ii++?  */
1622               ii++;
1623               try_again_with_larger_ii = true;
1624               reset_partial_schedule (ps, ii);
1625               break;
1626             }
1627           /* 2. Try scheduling u in window.  */
1628           if (dump_file)
1629             fprintf(dump_file, "Trying to schedule node %d in (%d .. %d) step %d\n",
1630                     u, start, end, step);
1631
1632           /* use must_follow & must_precede bitmaps to determine order
1633              of nodes within the cycle.  */
1634           sbitmap_zero (must_precede);
1635           sbitmap_zero (must_follow);
1636           for (e = u_node->in; e != 0; e = e->next_in)
1637             if (TEST_BIT (sched_nodes, e->src->cuid)
1638                 && e->latency == (ii * e->distance)
1639                 && start == SCHED_TIME (e->src))
1640              SET_BIT (must_precede, e->src->cuid);
1641
1642           for (e = u_node->out; e != 0; e = e->next_out)
1643             if (TEST_BIT (sched_nodes, e->dest->cuid)
1644                 && e->latency == (ii * e->distance)
1645                 && end == SCHED_TIME (e->dest))
1646              SET_BIT (must_follow, e->dest->cuid);
1647
1648           success = 0;
1649           if ((step > 0 && start < end) ||  (step < 0 && start > end))
1650             for (c = start; c != end; c += step)
1651               {
1652                 ps_insn_ptr psi;
1653
1654                 psi = ps_add_node_check_conflicts (ps, u_node, c,
1655                                                    must_precede,
1656                                                    must_follow);
1657
1658                 if (psi)
1659                   {
1660                     SCHED_TIME (u_node) = c;
1661                     SET_BIT (sched_nodes, u);
1662                     success = 1;
1663                     if (dump_file)
1664                       fprintf(dump_file, "Schedule in %d\n", c);
1665                     break;
1666                   }
1667               }
1668           if (!success)
1669             {
1670               /* ??? Try backtracking instead of immediately ii++?  */
1671               ii++;
1672               try_again_with_larger_ii = true;
1673               reset_partial_schedule (ps, ii);
1674               break;
1675             }
1676           if (unscheduled_nodes)
1677             break;
1678
1679           /* ??? If (success), check register pressure estimates.  */
1680         } /* Continue with next node.  */
1681     } /* While try_again_with_larger_ii.  */
1682
1683   sbitmap_free (sched_nodes);
1684
1685   if (ii >= maxii)
1686     {
1687       free_partial_schedule (ps);
1688       ps = NULL;
1689     }
1690   return ps;
1691 }
1692
1693 \f
1694 /* This page implements the algorithm for ordering the nodes of a DDG
1695    for modulo scheduling, activated through the
1696    "int sms_order_nodes (ddg_ptr, int mii, int * result)" API.  */
1697
1698 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
1699 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
1700 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
1701 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
1702 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
1703 #define DEPTH(x) (ASAP ((x)))
1704
1705 typedef struct node_order_params * nopa;
1706
1707 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
1708 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
1709 static nopa  calculate_order_params (ddg_ptr, int mii);
1710 static int find_max_asap (ddg_ptr, sbitmap);
1711 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
1712 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
1713
1714 enum sms_direction {BOTTOMUP, TOPDOWN};
1715
1716 struct node_order_params
1717 {
1718   int asap;
1719   int alap;
1720   int height;
1721 };
1722
1723 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1.  */
1724 static void
1725 check_nodes_order (int *node_order, int num_nodes)
1726 {
1727   int i;
1728   sbitmap tmp = sbitmap_alloc (num_nodes);
1729
1730   sbitmap_zero (tmp);
1731
1732   for (i = 0; i < num_nodes; i++)
1733     {
1734       int u = node_order[i];
1735
1736       gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
1737
1738       SET_BIT (tmp, u);
1739     }
1740
1741   sbitmap_free (tmp);
1742 }
1743
1744 /* Order the nodes of G for scheduling and pass the result in
1745    NODE_ORDER.  Also set aux.count of each node to ASAP.
1746    Return the recMII for the given DDG.  */
1747 static int
1748 sms_order_nodes (ddg_ptr g, int mii, int * node_order)
1749 {
1750   int i;
1751   int rec_mii = 0;
1752   ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
1753
1754   nopa nops = calculate_order_params (g, mii);
1755
1756   order_nodes_of_sccs (sccs, node_order);
1757
1758   if (sccs->num_sccs > 0)
1759     /* First SCC has the largest recurrence_length.  */
1760     rec_mii = sccs->sccs[0]->recurrence_length;
1761
1762   /* Save ASAP before destroying node_order_params.  */
1763   for (i = 0; i < g->num_nodes; i++)
1764     {
1765       ddg_node_ptr v = &g->nodes[i];
1766       v->aux.count = ASAP (v);
1767     }
1768
1769   free (nops);
1770   free_ddg_all_sccs (sccs);
1771   check_nodes_order (node_order, g->num_nodes);
1772
1773   return rec_mii;
1774 }
1775
1776 static void
1777 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
1778 {
1779   int i, pos = 0;
1780   ddg_ptr g = all_sccs->ddg;
1781   int num_nodes = g->num_nodes;
1782   sbitmap prev_sccs = sbitmap_alloc (num_nodes);
1783   sbitmap on_path = sbitmap_alloc (num_nodes);
1784   sbitmap tmp = sbitmap_alloc (num_nodes);
1785   sbitmap ones = sbitmap_alloc (num_nodes);
1786
1787   sbitmap_zero (prev_sccs);
1788   sbitmap_ones (ones);
1789
1790   /* Perfrom the node ordering starting from the SCC with the highest recMII.
1791      For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc.  */
1792   for (i = 0; i < all_sccs->num_sccs; i++)
1793     {
1794       ddg_scc_ptr scc = all_sccs->sccs[i];
1795
1796       /* Add nodes on paths from previous SCCs to the current SCC.  */
1797       find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
1798       sbitmap_a_or_b (tmp, scc->nodes, on_path);
1799
1800       /* Add nodes on paths from the current SCC to previous SCCs.  */
1801       find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
1802       sbitmap_a_or_b (tmp, tmp, on_path);
1803
1804       /* Remove nodes of previous SCCs from current extended SCC.  */
1805       sbitmap_difference (tmp, tmp, prev_sccs);
1806
1807       pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1808       /* Above call to order_nodes_in_scc updated prev_sccs |= tmp.  */
1809     }
1810
1811   /* Handle the remaining nodes that do not belong to any scc.  Each call
1812      to order_nodes_in_scc handles a single connected component.  */
1813   while (pos < g->num_nodes)
1814     {
1815       sbitmap_difference (tmp, ones, prev_sccs);
1816       pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1817     }
1818   sbitmap_free (prev_sccs);
1819   sbitmap_free (on_path);
1820   sbitmap_free (tmp);
1821   sbitmap_free (ones);
1822 }
1823
1824 /* MII is needed if we consider backarcs (that do not close recursive cycles).  */
1825 static struct node_order_params *
1826 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED)
1827 {
1828   int u;
1829   int max_asap;
1830   int num_nodes = g->num_nodes;
1831   ddg_edge_ptr e;
1832   /* Allocate a place to hold ordering params for each node in the DDG.  */
1833   nopa node_order_params_arr;
1834
1835   /* Initialize of ASAP/ALAP/HEIGHT to zero.  */
1836   node_order_params_arr = (nopa) xcalloc (num_nodes,
1837                                           sizeof (struct node_order_params));
1838
1839   /* Set the aux pointer of each node to point to its order_params structure.  */
1840   for (u = 0; u < num_nodes; u++)
1841     g->nodes[u].aux.info = &node_order_params_arr[u];
1842
1843   /* Disregarding a backarc from each recursive cycle to obtain a DAG,
1844      calculate ASAP, ALAP, mobility, distance, and height for each node
1845      in the dependence (direct acyclic) graph.  */
1846
1847   /* We assume that the nodes in the array are in topological order.  */
1848
1849   max_asap = 0;
1850   for (u = 0; u < num_nodes; u++)
1851     {
1852       ddg_node_ptr u_node = &g->nodes[u];
1853
1854       ASAP (u_node) = 0;
1855       for (e = u_node->in; e; e = e->next_in)
1856         if (e->distance == 0)
1857           ASAP (u_node) = MAX (ASAP (u_node),
1858                                ASAP (e->src) + e->latency);
1859       max_asap = MAX (max_asap, ASAP (u_node));
1860     }
1861
1862   for (u = num_nodes - 1; u > -1; u--)
1863     {
1864       ddg_node_ptr u_node = &g->nodes[u];
1865
1866       ALAP (u_node) = max_asap;
1867       HEIGHT (u_node) = 0;
1868       for (e = u_node->out; e; e = e->next_out)
1869         if (e->distance == 0)
1870           {
1871             ALAP (u_node) = MIN (ALAP (u_node),
1872                                  ALAP (e->dest) - e->latency);
1873             HEIGHT (u_node) = MAX (HEIGHT (u_node),
1874                                    HEIGHT (e->dest) + e->latency);
1875           }
1876     }
1877
1878   return node_order_params_arr;
1879 }
1880
1881 static int
1882 find_max_asap (ddg_ptr g, sbitmap nodes)
1883 {
1884   int u;
1885   int max_asap = -1;
1886   int result = -1;
1887
1888   EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u,
1889     {
1890       ddg_node_ptr u_node = &g->nodes[u];
1891
1892       if (max_asap < ASAP (u_node))
1893         {
1894           max_asap = ASAP (u_node);
1895           result = u;
1896         }
1897     });
1898   return result;
1899 }
1900
1901 static int
1902 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
1903 {
1904   int u;
1905   int max_hv = -1;
1906   int min_mob = INT_MAX;
1907   int result = -1;
1908
1909   EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u,
1910     {
1911       ddg_node_ptr u_node = &g->nodes[u];
1912
1913       if (max_hv < HEIGHT (u_node))
1914         {
1915           max_hv = HEIGHT (u_node);
1916           min_mob = MOB (u_node);
1917           result = u;
1918         }
1919       else if ((max_hv == HEIGHT (u_node))
1920                && (min_mob > MOB (u_node)))
1921         {
1922           min_mob = MOB (u_node);
1923           result = u;
1924         }
1925     });
1926   return result;
1927 }
1928
1929 static int
1930 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
1931 {
1932   int u;
1933   int max_dv = -1;
1934   int min_mob = INT_MAX;
1935   int result = -1;
1936
1937   EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u,
1938     {
1939       ddg_node_ptr u_node = &g->nodes[u];
1940
1941       if (max_dv < DEPTH (u_node))
1942         {
1943           max_dv = DEPTH (u_node);
1944           min_mob = MOB (u_node);
1945           result = u;
1946         }
1947       else if ((max_dv == DEPTH (u_node))
1948                && (min_mob > MOB (u_node)))
1949         {
1950           min_mob = MOB (u_node);
1951           result = u;
1952         }
1953     });
1954   return result;
1955 }
1956
1957 /* Places the nodes of SCC into the NODE_ORDER array starting
1958    at position POS, according to the SMS ordering algorithm.
1959    NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
1960    the NODE_ORDER array, starting from position zero.  */
1961 static int
1962 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
1963                     int * node_order, int pos)
1964 {
1965   enum sms_direction dir;
1966   int num_nodes = g->num_nodes;
1967   sbitmap workset = sbitmap_alloc (num_nodes);
1968   sbitmap tmp = sbitmap_alloc (num_nodes);
1969   sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
1970   sbitmap predecessors = sbitmap_alloc (num_nodes);
1971   sbitmap successors = sbitmap_alloc (num_nodes);
1972
1973   sbitmap_zero (predecessors);
1974   find_predecessors (predecessors, g, nodes_ordered);
1975
1976   sbitmap_zero (successors);
1977   find_successors (successors, g, nodes_ordered);
1978
1979   sbitmap_zero (tmp);
1980   if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
1981     {
1982       sbitmap_copy (workset, tmp);
1983       dir = BOTTOMUP;
1984     }
1985   else if (sbitmap_a_and_b_cg (tmp, successors, scc))
1986     {
1987       sbitmap_copy (workset, tmp);
1988       dir = TOPDOWN;
1989     }
1990   else
1991     {
1992       int u;
1993
1994       sbitmap_zero (workset);
1995       if ((u = find_max_asap (g, scc)) >= 0)
1996         SET_BIT (workset, u);
1997       dir = BOTTOMUP;
1998     }
1999
2000   sbitmap_zero (zero_bitmap);
2001   while (!sbitmap_equal (workset, zero_bitmap))
2002     {
2003       int v;
2004       ddg_node_ptr v_node;
2005       sbitmap v_node_preds;
2006       sbitmap v_node_succs;
2007
2008       if (dir == TOPDOWN)
2009         {
2010           while (!sbitmap_equal (workset, zero_bitmap))
2011             {
2012               v = find_max_hv_min_mob (g, workset);
2013               v_node = &g->nodes[v];
2014               node_order[pos++] = v;
2015               v_node_succs = NODE_SUCCESSORS (v_node);
2016               sbitmap_a_and_b (tmp, v_node_succs, scc);
2017
2018               /* Don't consider the already ordered successors again.  */
2019               sbitmap_difference (tmp, tmp, nodes_ordered);
2020               sbitmap_a_or_b (workset, workset, tmp);
2021               RESET_BIT (workset, v);
2022               SET_BIT (nodes_ordered, v);
2023             }
2024           dir = BOTTOMUP;
2025           sbitmap_zero (predecessors);
2026           find_predecessors (predecessors, g, nodes_ordered);
2027           sbitmap_a_and_b (workset, predecessors, scc);
2028         }
2029       else
2030         {
2031           while (!sbitmap_equal (workset, zero_bitmap))
2032             {
2033               v = find_max_dv_min_mob (g, workset);
2034               v_node = &g->nodes[v];
2035               node_order[pos++] = v;
2036               v_node_preds = NODE_PREDECESSORS (v_node);
2037               sbitmap_a_and_b (tmp, v_node_preds, scc);
2038
2039               /* Don't consider the already ordered predecessors again.  */
2040               sbitmap_difference (tmp, tmp, nodes_ordered);
2041               sbitmap_a_or_b (workset, workset, tmp);
2042               RESET_BIT (workset, v);
2043               SET_BIT (nodes_ordered, v);
2044             }
2045           dir = TOPDOWN;
2046           sbitmap_zero (successors);
2047           find_successors (successors, g, nodes_ordered);
2048           sbitmap_a_and_b (workset, successors, scc);
2049         }
2050     }
2051   sbitmap_free (tmp);
2052   sbitmap_free (workset);
2053   sbitmap_free (zero_bitmap);
2054   sbitmap_free (predecessors);
2055   sbitmap_free (successors);
2056   return pos;
2057 }
2058
2059 \f
2060 /* This page contains functions for manipulating partial-schedules during
2061    modulo scheduling.  */
2062
2063 /* Create a partial schedule and allocate a memory to hold II rows.  */
2064 partial_schedule_ptr
2065 create_partial_schedule (int ii, ddg_ptr g, int history)
2066 {
2067   partial_schedule_ptr ps = (partial_schedule_ptr)
2068                              xmalloc (sizeof (struct partial_schedule));
2069   ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2070   ps->ii = ii;
2071   ps->history = history;
2072   ps->min_cycle = INT_MAX;
2073   ps->max_cycle = INT_MIN;
2074   ps->g = g;
2075
2076   return ps;
2077 }
2078
2079 /* Free the PS_INSNs in rows array of the given partial schedule.
2080    ??? Consider caching the PS_INSN's.  */
2081 static void
2082 free_ps_insns (partial_schedule_ptr ps)
2083 {
2084   int i;
2085
2086   for (i = 0; i < ps->ii; i++)
2087     {
2088       while (ps->rows[i])
2089         {
2090           ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2091
2092           free (ps->rows[i]);
2093           ps->rows[i] = ps_insn;
2094         }
2095       ps->rows[i] = NULL;
2096     }
2097 }
2098
2099 /* Free all the memory allocated to the partial schedule.  */
2100 void
2101 free_partial_schedule (partial_schedule_ptr ps)
2102 {
2103   if (!ps)
2104     return;
2105   free_ps_insns (ps);
2106   free (ps->rows);
2107   free (ps);
2108 }
2109
2110 /* Clear the rows array with its PS_INSNs, and create a new one with
2111    NEW_II rows.  */
2112 void
2113 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2114 {
2115   if (!ps)
2116     return;
2117   free_ps_insns (ps);
2118   if (new_ii == ps->ii)
2119     return;
2120   ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2121                                                  * sizeof (ps_insn_ptr));
2122   memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2123   ps->ii = new_ii;
2124   ps->min_cycle = INT_MAX;
2125   ps->max_cycle = INT_MIN;
2126 }
2127
2128 /* Prints the partial schedule as an ii rows array, for each rows
2129    print the ids of the insns in it.  */
2130 void
2131 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2132 {
2133   int i;
2134
2135   for (i = 0; i < ps->ii; i++)
2136     {
2137       ps_insn_ptr ps_i = ps->rows[i];
2138
2139       fprintf (dump, "\n[CYCLE %d ]: ", i);
2140       while (ps_i)
2141         {
2142           fprintf (dump, "%d, ",
2143                    INSN_UID (ps_i->node->insn));
2144           ps_i = ps_i->next_in_row;
2145         }
2146     }
2147 }
2148
2149 /* Creates an object of PS_INSN and initializes it to the given parameters.  */
2150 static ps_insn_ptr
2151 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
2152 {
2153   ps_insn_ptr ps_i = xmalloc (sizeof (struct ps_insn));
2154
2155   ps_i->node = node;
2156   ps_i->next_in_row = NULL;
2157   ps_i->prev_in_row = NULL;
2158   ps_i->row_rest_count = rest_count;
2159   ps_i->cycle = cycle;
2160
2161   return ps_i;
2162 }
2163
2164
2165 /* Removes the given PS_INSN from the partial schedule.  Returns false if the
2166    node is not found in the partial schedule, else returns true.  */
2167 static bool
2168 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2169 {
2170   int row;
2171
2172   if (!ps || !ps_i)
2173     return false;
2174
2175   row = SMODULO (ps_i->cycle, ps->ii);
2176   if (! ps_i->prev_in_row)
2177     {
2178       if (ps_i != ps->rows[row])
2179         return false;
2180
2181       ps->rows[row] = ps_i->next_in_row;
2182       if (ps->rows[row])
2183         ps->rows[row]->prev_in_row = NULL;
2184     }
2185   else
2186     {
2187       ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2188       if (ps_i->next_in_row)
2189         ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2190     }
2191   free (ps_i);
2192   return true;
2193 }
2194
2195 /* Unlike what literature describes for modulo scheduling (which focuses
2196    on VLIW machines) the order of the instructions inside a cycle is
2197    important.  Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2198    where the current instruction should go relative to the already
2199    scheduled instructions in the given cycle.  Go over these
2200    instructions and find the first possible column to put it in.  */
2201 static bool
2202 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2203                      sbitmap must_precede, sbitmap must_follow)
2204 {
2205   ps_insn_ptr next_ps_i;
2206   ps_insn_ptr first_must_follow = NULL;
2207   ps_insn_ptr last_must_precede = NULL;
2208   int row;
2209
2210   if (! ps_i)
2211     return false;
2212
2213   row = SMODULO (ps_i->cycle, ps->ii);
2214
2215   /* Find the first must follow and the last must precede
2216      and insert the node immediately after the must precede
2217      but make sure that it there is no must follow after it.  */
2218   for (next_ps_i = ps->rows[row];
2219        next_ps_i;
2220        next_ps_i = next_ps_i->next_in_row)
2221     {
2222       if (TEST_BIT (must_follow, next_ps_i->node->cuid)
2223           && ! first_must_follow)
2224         first_must_follow = next_ps_i;
2225       if (TEST_BIT (must_precede, next_ps_i->node->cuid))
2226         {
2227           /* If we have already met a node that must follow, then
2228              there is no possible column.  */
2229           if (first_must_follow)
2230             return false;
2231           else
2232             last_must_precede = next_ps_i;
2233         }
2234     }
2235
2236   /* Now insert the node after INSERT_AFTER_PSI.  */
2237
2238   if (! last_must_precede)
2239     {
2240       ps_i->next_in_row = ps->rows[row];
2241       ps_i->prev_in_row = NULL;
2242       if (ps_i->next_in_row)
2243         ps_i->next_in_row->prev_in_row = ps_i;
2244       ps->rows[row] = ps_i;
2245     }
2246   else
2247     {
2248       ps_i->next_in_row = last_must_precede->next_in_row;
2249       last_must_precede->next_in_row = ps_i;
2250       ps_i->prev_in_row = last_must_precede;
2251       if (ps_i->next_in_row)
2252         ps_i->next_in_row->prev_in_row = ps_i;
2253     }
2254
2255   return true;
2256 }
2257
2258 /* Advances the PS_INSN one column in its current row; returns false
2259    in failure and true in success.  Bit N is set in MUST_FOLLOW if
2260    the node with cuid N must be come after the node pointed to by
2261    PS_I when scheduled in the same cycle.  */
2262 static int
2263 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2264                         sbitmap must_follow)
2265 {
2266   ps_insn_ptr prev, next;
2267   int row;
2268   ddg_node_ptr next_node;
2269
2270   if (!ps || !ps_i)
2271     return false;
2272
2273   row = SMODULO (ps_i->cycle, ps->ii);
2274
2275   if (! ps_i->next_in_row)
2276     return false;
2277
2278   next_node = ps_i->next_in_row->node;
2279
2280   /* Check if next_in_row is dependent on ps_i, both having same sched
2281      times (typically ANTI_DEP).  If so, ps_i cannot skip over it.  */
2282   if (TEST_BIT (must_follow, next_node->cuid))
2283     return false;
2284
2285   /* Advance PS_I over its next_in_row in the doubly linked list.  */
2286   prev = ps_i->prev_in_row;
2287   next = ps_i->next_in_row;
2288
2289   if (ps_i == ps->rows[row])
2290     ps->rows[row] = next;
2291
2292   ps_i->next_in_row = next->next_in_row;
2293
2294   if (next->next_in_row)
2295     next->next_in_row->prev_in_row = ps_i;
2296
2297   next->next_in_row = ps_i;
2298   ps_i->prev_in_row = next;
2299
2300   next->prev_in_row = prev;
2301   if (prev)
2302     prev->next_in_row = next;
2303
2304   return true;
2305 }
2306
2307 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2308    Returns 0 if this is not possible and a PS_INSN otherwise.  Bit N is
2309    set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2310    before/after (respectively) the node pointed to by PS_I when scheduled
2311    in the same cycle.  */
2312 static ps_insn_ptr
2313 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2314                 sbitmap must_precede, sbitmap must_follow)
2315 {
2316   ps_insn_ptr ps_i;
2317   int rest_count = 1;
2318   int row = SMODULO (cycle, ps->ii);
2319
2320   if (ps->rows[row]
2321       && ps->rows[row]->row_rest_count >= issue_rate)
2322     return NULL;
2323
2324   if (ps->rows[row])
2325     rest_count += ps->rows[row]->row_rest_count;
2326
2327   ps_i = create_ps_insn (node, rest_count, cycle);
2328
2329   /* Finds and inserts PS_I according to MUST_FOLLOW and
2330      MUST_PRECEDE.  */
2331   if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2332     {
2333       free (ps_i);
2334       return NULL;
2335     }
2336
2337   return ps_i;
2338 }
2339
2340 /* Advance time one cycle.  Assumes DFA is being used.  */
2341 static void
2342 advance_one_cycle (void)
2343 {
2344   if (targetm.sched.dfa_pre_cycle_insn)
2345     state_transition (curr_state,
2346                       targetm.sched.dfa_pre_cycle_insn ());
2347
2348   state_transition (curr_state, NULL);
2349
2350   if (targetm.sched.dfa_post_cycle_insn)
2351     state_transition (curr_state,
2352                       targetm.sched.dfa_post_cycle_insn ());
2353 }
2354
2355 /* Given the kernel of a loop (from FIRST_INSN to LAST_INSN), finds
2356    the number of cycles according to DFA that the kernel fits in,
2357    we use this to check if we done well with SMS after we add
2358    register moves.  In some cases register moves overhead makes
2359    it even worse than the original loop.  We want SMS to be performed
2360    when it gives less cycles after register moves are added.  */
2361 static int
2362 kernel_number_of_cycles (rtx first_insn, rtx last_insn)
2363 {
2364   int cycles = 0;
2365   rtx insn;
2366   int can_issue_more = issue_rate;
2367
2368   state_reset (curr_state);
2369
2370   for (insn = first_insn;
2371        insn != NULL_RTX && insn != last_insn;
2372        insn = NEXT_INSN (insn))
2373     {
2374       if (! INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
2375         continue;
2376
2377       /* Check if there is room for the current insn.  */
2378       if (!can_issue_more || state_dead_lock_p (curr_state))
2379         {
2380           cycles ++;
2381           advance_one_cycle ();
2382           can_issue_more = issue_rate;
2383         }
2384
2385         /* Update the DFA state and return with failure if the DFA found
2386            recource conflicts.  */
2387       if (state_transition (curr_state, insn) >= 0)
2388         {
2389           cycles ++;
2390           advance_one_cycle ();
2391           can_issue_more = issue_rate;
2392         }
2393
2394       if (targetm.sched.variable_issue)
2395         can_issue_more =
2396           targetm.sched.variable_issue (sched_dump, sched_verbose,
2397                                         insn, can_issue_more);
2398       /* A naked CLOBBER or USE generates no instruction, so don't
2399          let them consume issue slots.  */
2400       else if (GET_CODE (PATTERN (insn)) != USE
2401                && GET_CODE (PATTERN (insn)) != CLOBBER)
2402         can_issue_more--;
2403     }
2404   return cycles;
2405 }
2406
2407 /* Checks if PS has resource conflicts according to DFA, starting from
2408    FROM cycle to TO cycle; returns true if there are conflicts and false
2409    if there are no conflicts.  Assumes DFA is being used.  */
2410 static int
2411 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2412 {
2413   int cycle;
2414
2415   state_reset (curr_state);
2416
2417   for (cycle = from; cycle <= to; cycle++)
2418     {
2419       ps_insn_ptr crr_insn;
2420       /* Holds the remaining issue slots in the current row.  */
2421       int can_issue_more = issue_rate;
2422
2423       /* Walk through the DFA for the current row.  */
2424       for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2425            crr_insn;
2426            crr_insn = crr_insn->next_in_row)
2427         {
2428           rtx insn = crr_insn->node->insn;
2429
2430           if (!INSN_P (insn))
2431             continue;
2432
2433           /* Check if there is room for the current insn.  */
2434           if (!can_issue_more || state_dead_lock_p (curr_state))
2435             return true;
2436
2437           /* Update the DFA state and return with failure if the DFA found
2438              recource conflicts.  */
2439           if (state_transition (curr_state, insn) >= 0)
2440             return true;
2441
2442           if (targetm.sched.variable_issue)
2443             can_issue_more =
2444               targetm.sched.variable_issue (sched_dump, sched_verbose,
2445                                             insn, can_issue_more);
2446           /* A naked CLOBBER or USE generates no instruction, so don't
2447              let them consume issue slots.  */
2448           else if (GET_CODE (PATTERN (insn)) != USE
2449                    && GET_CODE (PATTERN (insn)) != CLOBBER)
2450             can_issue_more--;
2451         }
2452
2453       /* Advance the DFA to the next cycle.  */
2454       advance_one_cycle ();
2455     }
2456   return false;
2457 }
2458
2459 /* Checks if the given node causes resource conflicts when added to PS at
2460    cycle C.  If not the node is added to PS and returned; otherwise zero
2461    is returned.  Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2462    cuid N must be come before/after (respectively) the node pointed to by
2463    PS_I when scheduled in the same cycle.  */
2464 ps_insn_ptr
2465 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2466                              int c, sbitmap must_precede,
2467                              sbitmap must_follow)
2468 {
2469   int has_conflicts = 0;
2470   ps_insn_ptr ps_i;
2471
2472   /* First add the node to the PS, if this succeeds check for
2473      conflicts, trying different issue slots in the same row.  */
2474   if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2475     return NULL; /* Failed to insert the node at the given cycle.  */
2476
2477   has_conflicts = ps_has_conflicts (ps, c, c)
2478                   || (ps->history > 0
2479                       && ps_has_conflicts (ps,
2480                                            c - ps->history,
2481                                            c + ps->history));
2482
2483   /* Try different issue slots to find one that the given node can be
2484      scheduled in without conflicts.  */
2485   while (has_conflicts)
2486     {
2487       if (! ps_insn_advance_column (ps, ps_i, must_follow))
2488         break;
2489       has_conflicts = ps_has_conflicts (ps, c, c)
2490                       || (ps->history > 0
2491                           && ps_has_conflicts (ps,
2492                                                c - ps->history,
2493                                                c + ps->history));
2494     }
2495
2496   if (has_conflicts)
2497     {
2498       remove_node_from_ps (ps, ps_i);
2499       return NULL;
2500     }
2501
2502   ps->min_cycle = MIN (ps->min_cycle, c);
2503   ps->max_cycle = MAX (ps->max_cycle, c);
2504   return ps_i;
2505 }
2506
2507 /* Rotate the rows of PS such that insns scheduled at time
2508    START_CYCLE will appear in row 0.  Updates max/min_cycles.  */
2509 void
2510 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2511 {
2512   int i, row, backward_rotates;
2513   int last_row = ps->ii - 1;
2514
2515   if (start_cycle == 0)
2516     return;
2517
2518   backward_rotates = SMODULO (start_cycle, ps->ii);
2519
2520   /* Revisit later and optimize this into a single loop.  */
2521   for (i = 0; i < backward_rotates; i++)
2522     {
2523       ps_insn_ptr first_row = ps->rows[0];
2524
2525       for (row = 0; row < last_row; row++)
2526         ps->rows[row] = ps->rows[row+1];
2527
2528       ps->rows[last_row] = first_row;
2529     }
2530
2531   ps->max_cycle -= start_cycle;
2532   ps->min_cycle -= start_cycle;
2533 }
2534
2535 /* Remove the node N from the partial schedule PS; because we restart the DFA
2536    each time we want to check for resource conflicts; this is equivalent to
2537    unscheduling the node N.  */
2538 static bool
2539 ps_unschedule_node (partial_schedule_ptr ps, ddg_node_ptr n)
2540 {
2541   ps_insn_ptr ps_i;
2542   int row = SMODULO (SCHED_TIME (n), ps->ii);
2543
2544   if (row < 0 || row > ps->ii)
2545     return false;
2546
2547   for (ps_i = ps->rows[row];
2548        ps_i &&  ps_i->node != n;
2549        ps_i = ps_i->next_in_row);
2550   if (!ps_i)
2551     return false;
2552
2553   return remove_node_from_ps (ps, ps_i);
2554 }
2555 #endif /* INSN_SCHEDULING*/
2556