2009-07-17 Richard Guenther <rguenther@suse.de>

[pf3gnuchains/gcc-fork.git] / gcc / modulo-sched.c
diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c

index a203664..5176880 100644 (file)
--- a/gcc/modulo-sched.c
+++ b/gcc/modulo-sched.c
@@ -1,5 +1,5 @@
  /* Swing Modulo Scheduling implementation.
-   Copyright (C) 2004, 2005, 2006, 2007
+   Copyright (C) 2004, 2005, 2006, 2007, 2008
     Free Software Foundation, Inc.
     Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
  
@@ -187,13 +187,6 @@ static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
  /* This page defines constants and structures for the modulo scheduling
     driver.  */
  
-/* As in haifa-sched.c:  */
-/* issue_rate is the number of insns that can be scheduled in the same
-   machine cycle.  It can be defined in the config/mach/mach.h file,
-   otherwise we set it to 1.  */
-
-static int issue_rate;
-
  static int sms_order_nodes (ddg_ptr, int, int *, int *);
  static void set_node_sched_params (ddg_ptr);
  static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
@@ -242,7 +235,7 @@ typedef struct node_sched_params
     code in order to use sched_analyze() for computing the dependencies.
     They are used when initializing the sched_info structure.  */
  static const char *
-sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
+sms_print_insn (const_rtx insn, int aligned ATTRIBUTE_UNUSED)
  {
    static char tmp[80];
  
@@ -258,7 +251,17 @@ compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
  {
  }
  
-static struct sched_info sms_sched_info =
+static struct common_sched_info_def sms_common_sched_info;
+
+static struct sched_deps_info_def sms_sched_deps_info =
+  {
+    compute_jump_reg_dependencies,
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+    NULL,
+    0, 0, 0
+  };
+
+static struct haifa_sched_info sms_sched_info =
  {
    NULL,
    NULL,
@@ -267,16 +270,15 @@ static struct sched_info sms_sched_info =
    NULL,
    sms_print_insn,
    NULL,
-  compute_jump_reg_dependencies,
+  NULL, /* insn_finishes_block_p */
    NULL, NULL,
    NULL, NULL,
-  0, 0, 0,
+  0, 0,
  
-  NULL, NULL, NULL, NULL, NULL,
+  NULL, NULL, NULL, 
    0
  };
  
-
  /* Given HEAD and TAIL which are the first and last insns in a loop;
     return the register which controls the loop.  Return zero if it has
     more than one occurrence in the loop besides the control part or the
@@ -352,7 +354,7 @@ const_iteration_count (rtx count_reg, basic_block pre_header,
        {
         rtx pat = single_set (insn);
  
-       if (GET_CODE (SET_SRC (pat)) == CONST_INT)
+       if (CONST_INT_P (SET_SRC (pat)))
           {
             *count = INTVAL (SET_SRC (pat));
             return insn;
@@ -856,6 +858,19 @@ canon_loop (struct loop *loop)
      }
  }
  
+/* Setup infos.  */
+static void
+setup_sched_infos (void)
+{
+  memcpy (&sms_common_sched_info, &haifa_common_sched_info,
+         sizeof (sms_common_sched_info));
+  sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
+  common_sched_info = &sms_common_sched_info;
+
+  sched_deps_info = &sms_sched_deps_info;
+  current_sched_info = &sms_sched_info;
+}
+
  /* Probability in % that the sms-ed loop rolls enough so that optimized
     version may be entered.  Just a guess.  */
  #define PROB_SMS_ENOUGH_ITERATIONS 80
@@ -901,16 +916,8 @@ sms_schedule (void)
      issue_rate = 1;
  
    /* Initialize the scheduler.  */
-  current_sched_info = &sms_sched_info;
-
-  /* Init Data Flow analysis, to be used in interloop dep calculation.  */
-  df_set_flags (DF_LR_RUN_DCE);
-  df_rd_add_problem ();
-  df_note_add_problem ();
-  df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
-  df_analyze ();
-  regstat_compute_calls_crossed ();
-  sched_init ();
+  setup_sched_infos ();
+  haifa_sched_init ();
  
    /* Allocate memory to hold the DDG array one entry for each loop.
       We use loop->num as index into this array.  */
@@ -965,7 +972,7 @@ sms_schedule (void)
        if (single_exit (loop)->count)
         trip_count = latch_edge->count / single_exit (loop)->count;
  
-      /* Perfrom SMS only on loops that their average count is above threshold.  */
+      /* Perform SMS only on loops that their average count is above threshold.  */
  
        if ( latch_edge->count
            && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
@@ -1045,7 +1052,7 @@ sms_schedule (void)
        if (! (g = create_ddg (bb, 0)))
          {
            if (dump_file)
-           fprintf (dump_file, "SMS doloop\n");
+           fprintf (dump_file, "SMS create_ddg failed\n");
           continue;
          }
  
@@ -1150,12 +1157,14 @@ sms_schedule (void)
  
        ps = sms_schedule_by_order (g, mii, maxii, node_order);
  
-      if (ps)
+      if (ps){
         stage_count = PS_STAGE_COUNT (ps);
+        gcc_assert(stage_count >= 1);
+      }  
  
        /* Stage count of 1 means that there is no interleaving between
           iterations, let the scheduling passes do the job.  */
-      if (stage_count < 1
+      if (stage_count <= 1
           || (count_init && (loop_count <= stage_count))
           || (flag_branch_probabilities && (trip_count <= stage_count)))
         {
@@ -1242,11 +1251,10 @@ sms_schedule (void)
        free_ddg (g);
      }
  
-  regstat_free_calls_crossed ();
    free (g_arr);
  
    /* Release scheduler data, needed until now because of DFA.  */
-  sched_finish ();
+  haifa_sched_finish ();
    loop_optimizer_finalize ();
  }
  
@@ -1587,18 +1595,17 @@ get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
  
  /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
     node currently been scheduled.  At the end of the calculation
-   MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of U_NODE
-   which are in SCHED_NODES (already scheduled nodes) and scheduled at
-   the same row as the first/last row of U_NODE's scheduling window.
+   MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
+   U_NODE which are (1) already scheduled in the first/last row of
+   U_NODE's scheduling window, (2) whose dependence inequality with U
+   becomes an equality when U is scheduled in this same row, and (3)
+   whose dependence latency is zero.
+
     The first and last rows are calculated using the following parameters:
     START/END rows - The cycles that begins/ends the traversal on the window;
     searching for an empty cycle to schedule U_NODE.
     STEP - The direction in which we traverse the window.
-   II - The initiation interval.
-   TODO: We can add an insn to the must_precede/must_follow bitmap only
-   if it has tight dependence to U and they are both scheduled in the
-   same row.  The current check is more conservative and content with
-   the fact that both U and the insn are scheduled in the same row.  */
+   II - The initiation interval.  */
  
  static void
  calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
@@ -1607,7 +1614,6 @@ calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
  {
    ddg_edge_ptr e;
    int first_cycle_in_window, last_cycle_in_window;
-  int first_row_in_window, last_row_in_window;
  
    gcc_assert (must_precede && must_follow);
  
@@ -1621,18 +1627,27 @@ calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
    first_cycle_in_window = (step == 1) ? start : end - step;
    last_cycle_in_window = (step == 1) ? end - step : start;
  
-  first_row_in_window = SMODULO (first_cycle_in_window, ii);
-  last_row_in_window = SMODULO (last_cycle_in_window, ii);
-
    sbitmap_zero (must_precede);
    sbitmap_zero (must_follow);
  
    if (dump_file)
      fprintf (dump_file, "\nmust_precede: ");
  
+  /* Instead of checking if:
+      (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
+      && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
+             first_cycle_in_window)
+      && e->latency == 0
+     we use the fact that latency is non-negative:
+      SCHED_TIME (e->src) - (e->distance * ii) <=
+      SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
+      first_cycle_in_window
+     and check only if
+      SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window  */
    for (e = u_node->in; e != 0; e = e->next_in)
      if (TEST_BIT (sched_nodes, e->src->cuid)
-       && (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window))
+       && ((SCHED_TIME (e->src) - (e->distance * ii)) ==
+             first_cycle_in_window))
        {
         if (dump_file)
           fprintf (dump_file, "%d ", e->src->cuid);
@@ -1643,9 +1658,21 @@ calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
    if (dump_file)
      fprintf (dump_file, "\nmust_follow: ");
  
+  /* Instead of checking if:
+      (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
+      && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
+             last_cycle_in_window)
+      && e->latency == 0
+     we use the fact that latency is non-negative:
+      SCHED_TIME (e->dest) + (e->distance * ii) >=
+      SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >= 
+      last_cycle_in_window
+     and check only if
+      SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window  */
    for (e = u_node->out; e != 0; e = e->next_out)
      if (TEST_BIT (sched_nodes, e->dest->cuid)
-       && (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window))
+       && ((SCHED_TIME (e->dest) + (e->distance * ii)) ==
+             last_cycle_in_window))
        {
         if (dump_file)
           fprintf (dump_file, "%d ", e->dest->cuid);
@@ -1661,7 +1688,7 @@ calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
     parameters to decide if that's possible:
     PS - The partial schedule.
     U - The serial number of U_NODE.
-   NUM_SPLITS - The number of row spilts made so far.
+   NUM_SPLITS - The number of row splits made so far.
     MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
     the first row of the scheduling window)
     MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
@@ -1669,7 +1696,7 @@ calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
  
  static bool
  try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
-                             int u, int row, sbitmap sched_nodes,
+                             int u, int cycle, sbitmap sched_nodes,
                               int *num_splits, sbitmap must_precede,
                               sbitmap must_follow)
  {
@@ -1677,16 +1704,16 @@ try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
    bool success = 0;
  
    verify_partial_schedule (ps, sched_nodes);
-  psi = ps_add_node_check_conflicts (ps, u_node, row,
+  psi = ps_add_node_check_conflicts (ps, u_node, cycle,
                                      must_precede, must_follow);
    if (psi)
      {
-      SCHED_TIME (u_node) = row;
+      SCHED_TIME (u_node) = cycle;
        SET_BIT (sched_nodes, u);
        success = 1;
        *num_splits = 0;
        if (dump_file)
-       fprintf (dump_file, "Scheduled w/o split in %d\n", row);
+       fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
  
      }
  
@@ -1808,11 +1835,14 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
                  }
  
                num_splits++;
+              /* The scheduling window is exclusive of 'end'
+                 whereas compute_split_window() expects an inclusive,
+                 ordered range.  */
                if (step == 1)
-                split_row = compute_split_row (sched_nodes, start, end,
+                split_row = compute_split_row (sched_nodes, start, end - 1,
                                                 ps->ii, u_node);
                else
-                split_row = compute_split_row (sched_nodes, end, start,
+                split_row = compute_split_row (sched_nodes, end + 1, start,
                                                 ps->ii, u_node);
  
                ps_insert_empty_row (ps, split_row, sched_nodes);
@@ -2106,7 +2136,7 @@ order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
    sbitmap_zero (prev_sccs);
    sbitmap_ones (ones);
  
-  /* Perfrom the node ordering starting from the SCC with the highest recMII.
+  /* Perform the node ordering starting from the SCC with the highest recMII.
       For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc.  */
    for (i = 0; i < all_sccs->num_sccs; i++)
      {
@@ -2472,7 +2502,7 @@ print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
      {
        ps_insn_ptr ps_i = ps->rows[i];
  
-      fprintf (dump, "\n[CYCLE %d ]: ", i);
+      fprintf (dump, "\n[ROW %d ]: ", i);
        while (ps_i)
         {
           fprintf (dump, "%d, ",
@@ -2721,7 +2751,7 @@ ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
             return true;
  
           /* Update the DFA state and return with failure if the DFA found
-            recource conflicts.  */
+            resource conflicts.  */
           if (state_transition (curr_state, insn) >= 0)
             return true;
  
@@ -2852,8 +2882,10 @@ rest_of_handle_sms (void)
    return 0;
  }
  
-struct tree_opt_pass pass_sms =
+struct rtl_opt_pass pass_sms =
  {
+ {
+  RTL_PASS,
    "sms",                                /* name */
    gate_handle_sms,                      /* gate */
    rest_of_handle_sms,                   /* execute */
@@ -2867,7 +2899,7 @@ struct tree_opt_pass pass_sms =
    TODO_dump_func,                       /* todo_flags_start */
    TODO_df_finish | TODO_verify_rtl_sharing |
    TODO_dump_func |
-  TODO_ggc_collect,                     /* todo_flags_finish */
-  'm'                                   /* letter */
+  TODO_ggc_collect                      /* todo_flags_finish */
+ }
  };