/* Loop autoparallelization.
- Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+ Copyright (C) 2006, 2007, 2008, 2009, 2010
+ Free Software Foundation, Inc.
Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
Zdenek Dvorak <dvorakz@suse.cz>.
/*
Reduction handling:
- currently we use vect_is_simple_reduction() to detect reduction patterns.
+ currently we use vect_force_simple_reduction() to detect reduction patterns.
The code transformation will be introduced by an example.
in parallel). */
static bool
-loop_parallel_p (struct loop *loop)
+loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
{
VEC (ddr_p, heap) * dependence_relations;
VEC (data_reference_p, heap) *datarefs;
if (dump_file && (dump_flags & TDF_DETAILS))
dump_data_dependence_relations (dump_file, dependence_relations);
- trans = lambda_trans_matrix_new (1, 1);
+ trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
LTM_MATRIX (trans)[0][0] = -1;
if (lambda_transform_legal_p (trans, 1, dependence_relations))
basic_block bb;
basic_block new_bb;
edge e;
- tree t, addr, addr_type, ref, x;
+ tree t, addr, ref, x;
tree tmp_load, name;
gimple load;
load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
- addr_type = build_pointer_type (type);
addr = build_addr (t, current_function_decl);
for (n = 0; bbs[n] != loop->latch; n++)
continue;
- n--;
nbbs = XNEWVEC (basic_block, n);
ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
bbs + 1, n, nbbs);
{
gimple_stmt_iterator gsi;
basic_block bb, paral_bb, for_bb, ex_bb;
- tree t, param, res;
+ tree t, param;
gimple stmt, for_stmt, phi, cond_stmt;
tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
edge exit, nexit, guard, end, e;
source_location locus;
tree def;
phi = gsi_stmt (gsi);
- res = PHI_RESULT (phi);
stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
gen_parallel_loop (struct loop *loop, htab_t reduction_list,
unsigned n_threads, struct tree_niter_desc *niter)
{
- struct loop *nloop;
loop_iterator li;
tree many_iterations_cond, type, nit;
tree arg_struct, new_arg_struct;
/* We assume that the loop usually iterates a lot. */
prob = 4 * REG_BR_PROB_BASE / 5;
- nloop = loop_version (loop, many_iterations_cond, NULL,
- prob, prob, REG_BR_PROB_BASE - prob, true);
+ loop_version (loop, many_iterations_cond, NULL,
+ prob, prob, REG_BR_PROB_BASE - prob, true);
update_ssa (TODO_update_ssa);
free_original_copy_tables ();
/* Base all the induction variables in LOOP on a single control one. */
- canonicalize_loop_ivs (loop, &nit);
+ canonicalize_loop_ivs (loop, &nit, true);
/* Ensure that the exit condition is the first statement in the loop. */
transform_to_exit_first_loop (loop, reduction_list, nit);
if (!simple_iv (loop, loop, res, &iv, true)
&& simple_loop_info)
{
- gimple reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true, &double_reduc);
+ gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
+ phi, true,
+ &double_reduc);
if (reduc_stmt && !double_reduc)
build_new_reduction (reduction_list, reduc_stmt, phi);
}
struct tree_niter_desc niter_desc;
loop_iterator li;
htab_t reduction_list;
+ struct obstack parloop_obstack;
+ HOST_WIDE_INT estimated;
+ LOC loop_loc;
/* Do not parallelize loops in the functions created by parallelization. */
if (parallelized_function_p (cfun->decl))
return false;
+ if (cfun->has_nonlocal_label)
+ return false;
+ gcc_obstack_init (&parloop_obstack);
reduction_list = htab_create (10, reduction_info_hash,
reduction_info_eq, free);
init_stmt_vec_info_vec ();
if (/* And of course, the loop must be parallelizable. */
!can_duplicate_loop_p (loop)
|| loop_has_blocks_with_irreducible_flag (loop)
+ || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
/* FIXME: the check for vector phi nodes could be removed. */
|| loop_has_vector_phi_nodes (loop))
continue;
-
+ estimated = estimated_loop_iterations_int (loop, false);
/* FIXME: Bypass this check as graphite doesn't update the
count and frequency correctly now. */
if (!flag_loop_parallelize_all
- && ((estimated_loop_iterations_int (loop, false)
- <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
+ && ((estimated !=-1
+ && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
/* Do not bother with loops in cold areas. */
|| optimize_loop_nest_for_size_p (loop)))
continue;
if (!try_create_reduction_list (loop, reduction_list))
continue;
- if (!flag_loop_parallelize_all && !loop_parallel_p (loop))
+ if (!flag_loop_parallelize_all
+ && !loop_parallel_p (loop, &parloop_obstack))
continue;
changed = true;
if (dump_file && (dump_flags & TDF_DETAILS))
{
- fprintf (dump_file, "parallelizing ");
if (loop->inner)
- fprintf (dump_file, "outer loop\n");
+ fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
else
- fprintf (dump_file, "inner loop\n");
+ fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
+ loop_loc = find_loop_location (loop);
+ if (loop_loc != UNKNOWN_LOC)
+ fprintf (dump_file, "\nloop at %s:%d: ",
+ LOC_FILE (loop_loc), LOC_LINE (loop_loc));
}
gen_parallel_loop (loop, reduction_list,
n_threads, &niter_desc);
verify_flow_info ();
verify_dominators (CDI_DOMINATORS);
verify_loop_structure ();
- verify_loop_closed_ssa ();
+ verify_loop_closed_ssa (true);
}
free_stmt_vec_info_vec ();
htab_delete (reduction_list);
+ obstack_free (&parloop_obstack, NULL);
/* Parallelization will cause new function calls to be inserted through
- which local variables will escape. Reset the points-to solutions
- for ESCAPED and CALLUSED. */
+ which local variables will escape. Reset the points-to solution
+ for ESCAPED. */
if (changed)
- {
- pt_solution_reset (&cfun->gimple_df->escaped);
- pt_solution_reset (&cfun->gimple_df->callused);
- }
+ pt_solution_reset (&cfun->gimple_df->escaped);
return changed;
}