From: revitale Date: Wed, 8 Aug 2007 12:21:37 +0000 (+0000) Subject: Remove profitability check X-Git-Url: http://git.sourceforge.jp/view?a=commitdiff_plain;h=896dbaedeef7fd0bf7ea6682fc91678e5eb6023b;p=pf3gnuchains%2Fgcc-fork.git Remove profitability check git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@127293 138bc75d-0d04-0410-961f-82ee72b054a4 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 15deba017eb..55296107009 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2007-08-08 Vladimir Yanovsky + Revital Eres + + * ddg.c (print_ddg): Add dump information. + * modulo-sched.c (print_node_sched_params): Add parameter and + verbosity. + (calculate_maxii): Remove function. + (undo_generate_reg_moves): Likewise. + (undo_permute_partial_schedule): Likewise. + (kernel_number_of_cycles): Likewise. + (MAXII_FACTOR): New definition to calculate the upper bound of II. + (sms_schedule): Use it. Remove profitability checks. + (sms_schedule_by_order): Fix order of nodes within the cycle. + 2007-08-08 Samuel Thibault * gcc/config/i386/gnu.h (STARTFILE_SPEC): Use gcrt0.o in profile mode, add diff --git a/gcc/ddg.c b/gcc/ddg.c index def8e0ee6d9..73e37225a25 100644 --- a/gcc/ddg.c +++ b/gcc/ddg.c @@ -568,6 +568,7 @@ print_ddg (FILE *file, ddg_ptr g) { ddg_edge_ptr e; + fprintf (file, "Node num: %d\n", g->nodes[i].cuid); print_rtl_single (file, g->nodes[i].insn); fprintf (file, "OUT ARCS: "); for (e = g->nodes[i].out; e; e = e->next_out) diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c index a85a1095628..73c4adc84b0 100644 --- a/gcc/modulo-sched.c +++ b/gcc/modulo-sched.c @@ -159,7 +159,6 @@ static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int histor static void free_partial_schedule (partial_schedule_ptr); static void reset_partial_schedule (partial_schedule_ptr, int new_ii); void print_partial_schedule (partial_schedule_ptr, FILE *); -static int kernel_number_of_cycles (rtx first_insn, rtx last_insn); static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr, ddg_node_ptr node, int cycle, sbitmap must_precede, @@ -365,7 +364,7 @@ set_node_sched_params (ddg_ptr g) } static void -print_node_sched_params (FILE * file, int num_nodes) +print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g) { int i; @@ -377,7 +376,8 @@ print_node_sched_params (FILE * file, int num_nodes) rtx reg_move = nsp->first_reg_move; int j; - fprintf (file, "Node %d:\n", i); + fprintf (file, "Node = %d; INSN = %d\n", i, + (INSN_UID (g->nodes[i].insn))); fprintf (file, " asap = %d:\n", nsp->asap); fprintf (file, " time = %d:\n", nsp->time); fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves); @@ -390,29 +390,6 @@ print_node_sched_params (FILE * file, int num_nodes) } } -/* Calculate an upper bound for II. SMS should not schedule the loop if it - requires more cycles than this bound. Currently set to the sum of the - longest latency edge for each node. Reset based on experiments. */ -static int -calculate_maxii (ddg_ptr g) -{ - int i; - int maxii = 0; - - for (i = 0; i < g->num_nodes; i++) - { - ddg_node_ptr u = &g->nodes[i]; - ddg_edge_ptr e; - int max_edge_latency = 0; - - for (e = u->out; e; e = e->next_out) - max_edge_latency = MAX (max_edge_latency, e->latency); - - maxii += max_edge_latency; - } - return maxii; -} - /* Breaking intra-loop register anti-dependences: Each intra-loop register anti-dependence implies a cross-iteration true @@ -533,40 +510,6 @@ generate_reg_moves (partial_schedule_ptr ps, bool rescan) return reg_move_replaces; } -/* We call this when we want to undo the SMS schedule for a given loop. - One of the things that we do is to delete the register moves generated - for the sake of SMS; this function deletes the register move instructions - recorded in the undo buffer. */ -static void -undo_generate_reg_moves (partial_schedule_ptr ps, - struct undo_replace_buff_elem *reg_move_replaces) -{ - int i,j; - - for (i = 0; i < ps->g->num_nodes; i++) - { - ddg_node_ptr u = &ps->g->nodes[i]; - rtx prev; - rtx crr = SCHED_FIRST_REG_MOVE (u); - - for (j = 0; j < SCHED_NREG_MOVES (u); j++) - { - prev = PREV_INSN (crr); - delete_insn (crr); - crr = prev; - } - SCHED_FIRST_REG_MOVE (u) = NULL_RTX; - } - - while (reg_move_replaces) - { - struct undo_replace_buff_elem *rep = reg_move_replaces; - - reg_move_replaces = reg_move_replaces->next; - replace_rtx (rep->insn, rep->new_reg, rep->orig_reg); - } -} - /* Free memory allocated for the undo buffer. */ static void free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces) @@ -638,28 +581,6 @@ permute_partial_schedule (partial_schedule_ptr ps, rtx last) PREV_INSN (last)); } -/* As part of undoing SMS we return to the original ordering of the - instructions inside the loop kernel. Given the partial schedule PS, this - function returns the ordering of the instruction according to their CUID - in the DDG (PS->G), which is the original order of the instruction before - performing SMS. */ -static void -undo_permute_partial_schedule (partial_schedule_ptr ps, rtx last) -{ - int i; - - for (i = 0 ; i < ps->g->num_nodes; i++) - if (last == ps->g->nodes[i].insn - || last == ps->g->nodes[i].first_note) - break; - else if (PREV_INSN (last) != ps->g->nodes[i].insn) - reorder_insns_nobb (ps->g->nodes[i].first_note, ps->g->nodes[i].insn, - PREV_INSN (last)); -} - -/* Used to generate the prologue & epilogue. Duplicate the subset of - nodes whose stages are between FROM_STAGE and TO_STAGE (inclusive - of both), together with a prefix/suffix of their reg_moves. */ static void duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage, int to_stage, int for_prolog) @@ -869,6 +790,9 @@ canon_loop (struct loop *loop) version may be entered. Just a guess. */ #define PROB_SMS_ENOUGH_ITERATIONS 80 +/* Used to calculate the upper bound of ii. */ +#define MAXII_FACTOR 2 + /* Main entry point, perform SMS scheduling on the loops of the function that consist of single basic blocks. */ static void @@ -1097,7 +1021,7 @@ sms_schedule (void) mii = 1; /* Need to pass some estimate of mii. */ rec_mii = sms_order_nodes (g, mii, node_order); mii = MAX (res_MII (g), rec_mii); - maxii = (calculate_maxii (g) * SMS_MAX_II_FACTOR) / 100; + maxii = MAXII_FACTOR * mii; if (dump_file) fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n", @@ -1131,8 +1055,6 @@ sms_schedule (void) } else { - int orig_cycles = kernel_number_of_cycles (BB_HEAD (g->bb), BB_END (g->bb)); - int new_cycles; struct undo_replace_buff_elem *reg_move_replaces; if (dump_file) @@ -1154,68 +1076,46 @@ sms_schedule (void) normalize_sched_times (ps); rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)); set_columns_for_ps (ps); - - /* Generate the kernel just to be able to measure its cycles. */ + + canon_loop (loop); + + /* case the BCT count is not known , Do loop-versioning */ + if (count_reg && ! count_init) + { + rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg, + GEN_INT(stage_count)); + unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS + * REG_BR_PROB_BASE) / 100; + + loop_version (loop, comp_rtx, &condition_bb, + prob, prob, REG_BR_PROB_BASE - prob, + true); + } + + /* Set new iteration count of loop kernel. */ + if (count_reg && count_init) + SET_SRC (single_set (count_init)) = GEN_INT (loop_count + - stage_count + 1); + + /* Now apply the scheduled kernel to the RTL of the loop. */ permute_partial_schedule (ps, g->closing_branch->first_note); - reg_move_replaces = generate_reg_moves (ps, false); - /* Get the number of cycles the new kernel expect to execute in. */ - new_cycles = kernel_number_of_cycles (BB_HEAD (g->bb), BB_END (g->bb)); + /* Mark this loop as software pipelined so the later + scheduling passes doesn't touch it. */ + if (! flag_resched_modulo_sched) + g->bb->flags |= BB_DISABLE_SCHEDULE; + /* The life-info is not valid any more. */ + df_set_bb_dirty (g->bb); - /* Get back to the original loop so we can do loop versioning. */ - undo_permute_partial_schedule (ps, g->closing_branch->first_note); - if (reg_move_replaces) - undo_generate_reg_moves (ps, reg_move_replaces); - - if ( new_cycles >= orig_cycles) - { - /* SMS is not profitable so undo the permutation and reg move generation - and return the kernel to its original state. */ - if (dump_file) - fprintf (dump_file, "Undoing SMS because it is not profitable.\n"); - - } + reg_move_replaces = generate_reg_moves (ps, true); + if (dump_file) + print_node_sched_params (dump_file, g->num_nodes, g); + /* Generate prolog and epilog. */ + if (count_reg && !count_init) + generate_prolog_epilog (ps, loop, count_reg); else - { - canon_loop (loop); - - /* case the BCT count is not known , Do loop-versioning */ - if (count_reg && ! count_init) - { - rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg, - GEN_INT(stage_count)); - unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS - * REG_BR_PROB_BASE) / 100; - - loop_version (loop, comp_rtx, &condition_bb, - prob, prob, REG_BR_PROB_BASE - prob, - true); - } - - /* Set new iteration count of loop kernel. */ - if (count_reg && count_init) - SET_SRC (single_set (count_init)) = GEN_INT (loop_count - - stage_count + 1); - - /* Now apply the scheduled kernel to the RTL of the loop. */ - permute_partial_schedule (ps, g->closing_branch->first_note); - - /* Mark this loop as software pipelined so the later - scheduling passes doesn't touch it. */ - if (! flag_resched_modulo_sched) - g->bb->flags |= BB_DISABLE_SCHEDULE; - /* The life-info is not valid any more. */ - df_set_bb_dirty (g->bb); - - reg_move_replaces = generate_reg_moves (ps, true); - if (dump_file) - print_node_sched_params (dump_file, g->num_nodes); - /* Generate prolog and epilog. */ - if (count_reg && !count_init) - generate_prolog_epilog (ps, loop, count_reg); - else - generate_prolog_epilog (ps, loop, NULL_RTX); - } + generate_prolog_epilog (ps, loop, NULL_RTX); + free_undo_replace_buff (reg_move_replaces); } @@ -1529,17 +1429,21 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order) of nodes within the cycle. */ sbitmap_zero (must_precede); sbitmap_zero (must_follow); - for (e = u_node->in; e != 0; e = e->next_in) + /* TODO: We can add an insn to the must_precede or must_follow + bitmaps only if it has tight dependence to U and they + both scheduled in the same row. The current check is less + conservative and content with the fact that both U and the + insn are scheduled in the same row. */ + for (e = u_node->in; e != 0; e = e->next_in) if (TEST_BIT (sched_nodes, e->src->cuid) - && e->latency == (ii * e->distance) - && start == SCHED_TIME (e->src)) - SET_BIT (must_precede, e->src->cuid); + && (SMODULO (SCHED_TIME (e->src), ii) == SMODULO (start, ii))) + SET_BIT (must_precede, e->src->cuid); - for (e = u_node->out; e != 0; e = e->next_out) + for (e = u_node->out; e != 0; e = e->next_out) if (TEST_BIT (sched_nodes, e->dest->cuid) - && e->latency == (ii * e->distance) - && end == SCHED_TIME (e->dest)) - SET_BIT (must_follow, e->dest->cuid); + && (SMODULO (SCHED_TIME (e->dest), ii) == + SMODULO ((end - step), ii))) + SET_BIT (must_follow, e->dest->cuid); success = 0; if ((step > 0 && start < end) || (step < 0 && start > end)) @@ -2259,57 +2163,7 @@ advance_one_cycle (void) targetm.sched.dfa_post_cycle_insn ()); } -/* Given the kernel of a loop (from FIRST_INSN to LAST_INSN), finds - the number of cycles according to DFA that the kernel fits in, - we use this to check if we done well with SMS after we add - register moves. In some cases register moves overhead makes - it even worse than the original loop. We want SMS to be performed - when it gives less cycles after register moves are added. */ -static int -kernel_number_of_cycles (rtx first_insn, rtx last_insn) -{ - int cycles = 0; - rtx insn; - int can_issue_more = issue_rate; - - state_reset (curr_state); - - for (insn = first_insn; - insn != NULL_RTX && insn != last_insn; - insn = NEXT_INSN (insn)) - { - if (! INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE) - continue; - /* Check if there is room for the current insn. */ - if (!can_issue_more || state_dead_lock_p (curr_state)) - { - cycles ++; - advance_one_cycle (); - can_issue_more = issue_rate; - } - - /* Update the DFA state and return with failure if the DFA found - recource conflicts. */ - if (state_transition (curr_state, insn) >= 0) - { - cycles ++; - advance_one_cycle (); - can_issue_more = issue_rate; - } - - if (targetm.sched.variable_issue) - can_issue_more = - targetm.sched.variable_issue (sched_dump, sched_verbose, - insn, can_issue_more); - /* A naked CLOBBER or USE generates no instruction, so don't - let them consume issue slots. */ - else if (GET_CODE (PATTERN (insn)) != USE - && GET_CODE (PATTERN (insn)) != CLOBBER) - can_issue_more--; - } - return cycles; -} /* Checks if PS has resource conflicts according to DFA, starting from FROM cycle to TO cycle; returns true if there are conflicts and false diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a6f9d6ea5ea..851229a6bcf 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2007-08-08 Vladimir Yanovsky + Revital Eres + + * gfortran.dg/sms-1.f90: Add comment. + * gfortran.dg/sms-2.f90: New. + 2007-08-07 Ian Lance Taylor * gcc.dg/instrument-1.c: New test. diff --git a/gcc/testsuite/gfortran.dg/sms-1.f90 b/gcc/testsuite/gfortran.dg/sms-1.f90 index f9bfafe9208..754cb8caeb1 100644 --- a/gcc/testsuite/gfortran.dg/sms-1.f90 +++ b/gcc/testsuite/gfortran.dg/sms-1.f90 @@ -1,5 +1,7 @@ ! { dg-do run } -! { dg-options "-O2 -fmodulo-sched" } +! { dg-options "-O2 -fmodulo-sched" } +! This testcase related to INC instruction which is +! currently not supported in SMS. program main integer (kind = 8) :: i, l8, u8, step8 integer (kind = 4) :: l4, step4 diff --git a/gcc/testsuite/gfortran.dg/sms-2.f90 b/gcc/testsuite/gfortran.dg/sms-2.f90 new file mode 100644 index 00000000000..80ab9bf4915 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/sms-2.f90 @@ -0,0 +1,19 @@ +! { dg-do run } +! { dg-options "-O2 -fmodulo-sched" } +! This testcase related to wrong order within a cycle fix. +! +program foo + real, dimension (5, 5, 5, 5) :: a + + a (:, :, :, :) = 4 + a (:, 2, :, 4) = 10 + a (:, 2, :, 1) = 0 + + forall (i = 1:5, i == 3) + a(i, i, i, i) = -5 + end forall + + if (sum (a) .ne. 2541.0) call abort () +end + +