/* Loop autoparallelization.
- Copyright (C) 2006 Free Software Foundation, Inc.
+ Copyright (C) 2006, 2007 Free Software Foundation, Inc.
Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
Zdenek Dvorak <dvorakz@suse.cz>.
currently we use vect_is_simple_reduction() to detect reduction patterns.
The code transformation will be introduced by an example.
- source code:
-
+
parloop
{
int sum=1;
- for (i = 0; i < N/1000; i++)
+ for (i = 0; i < N; i++)
{
x[i] = i + 3;
sum+=x[i];
}
}
-gimple code:
-
+gimple-like code:
header_bb:
- # sum_24 = PHI <sum_14(3), 1(2)>;
- # i_21 = PHI <i_15(3), 0(2)>;
-<L0>:;
- D.2191_10 = i_21 + 3;
- x[i_21] = D.2191_10;
- sum_14 = D.2191_10 + sum_24;
- i_15 = i_21 + 1;
- if (N_8 > i_15) goto <L0>; else goto <L2>;
+ # sum_29 = PHI <sum_11(5), 1(3)>
+ # i_28 = PHI <i_12(5), 0(3)>
+ D.1795_8 = i_28 + 3;
+ x[i_28] = D.1795_8;
+ sum_11 = D.1795_8 + sum_29;
+ i_12 = i_28 + 1;
+ if (N_6(D) > i_12)
+ goto header_bb;
+
exit_bb:
- # sum_25 = PHI <sum_14(3)>;
-<L2>:;
+ # sum_21 = PHI <sum_11(4)>
+ printf (&"%d"[0], sum_21);
after reduction transformation (only relevant parts):
....
-<L16>:;
- D.2241_2 = (unsigned int) N_8;
- D.2242_26 = D.2241_2 - 1;
- if (D.2242_26 > 399) goto <L26>; else goto <L27>;
-
-#two new variables are created for each reduction:
-"reduction" is the variable holding the neutral element
-for the particular operation, e.g. 0 for PLUS_EXPR,
-1 for MULT_EXPR, etc.
-"reduction_initial" is the initial value given by the user.
-It is kept and will be used after the parallel computing
-is done.#
-
-<L26>:;
- reduction.38_42 = 0;
- reduction_initial.39_43 = 1;
- x.40_44 = &x;
- .paral_data_store.47.D.2261 = D.2242_26;
- .paral_data_store.47.reduction.38 = reduction.38_42;
- .paral_data_store.47.x.40 = x.40_44;
- __builtin_GOMP_parallel_start (parloop._loopfn.0, &.paral_data_store.47, 4);
- parloop._loopfn.0 (&.paral_data_store.47);
- __builtin_GOMP_parallel_end ();
-
-# collecting the result after the join of the threads is done at
- create_loads_for_reductions().
- a new variable "reduction_final" is created. It calculates the
- final value from the initial value and the value computed by
- the threads. #
-
- .paral_data_load.48_49 = &.paral_data_store.47;
- reduction_final.49_50 = .paral_data_load.48_49->reduction.38;
- reduction_final.49_51 = reduction_initial.39_43 + reduction_final.49_50;
- ivtmp.37_36 = D.2242_26;
- i_37 = (int) ivtmp.37_36;
- D.2191_38 = i_37 + 3;
- x[i_37] = D.2191_38;
- sum_40 = D.2191_38 + reduction_final.49_51;
- i_41 = i_37 + 1;
- goto <bb 8> (<L2>);
-
- # sum_25 = PHI <sum_40(4), sum_9(6)>;
-<L2>:;
- printf (&"sum is %d\n"[0], sum_25);
-...
+ # Storing the the initial value given by the user. #
-}
+ .paral_data_store.32.sum.27 = 1;
+
+ #pragma omp parallel num_threads(4)
-parloop._loopfn.0 (.paral_data_param)
-{
- ...
-
-<L28>:;
- .paral_data_param_52 = .paral_data_param_75;
- .paral_data_load.48_48 = (struct .paral_data.46 *) .paral_data_param_52;
- D.2289_46 = .paral_data_load.48_48->D.2261;
- reduction.43_45 = .paral_data_load.48_48->reduction.38;
- x.45_47 = .paral_data_load.48_48->x.40;
- # SUCC: 23 [100.0%] (fallthru)
-
- # BLOCK 23
- # PRED: 21 [100.0%] (fallthru)
-<L30>:;
- D.2292_60 = __builtin_omp_get_num_threads ();
- D.2293_61 = (unsigned int) D.2292_60;
- D.2294_62 = __builtin_omp_get_thread_num ();
- D.2295_63 = (unsigned int) D.2294_62;
- D.2296_64 = D.2289_46 / D.2293_61;
- D.2297_65 = D.2293_61 * D.2296_64;
- D.2298_66 = D.2297_65 != D.2289_46;
- D.2299_67 = D.2296_64 + D.2298_66;
- D.2300_68 = D.2299_67 * D.2295_63;
- D.2301_69 = D.2299_67 + D.2300_68;
- D.2302_70 = MIN_EXPR <D.2301_69, D.2289_46>;
- ivtmp.41_54 = D.2300_68;
- if (D.2300_68 >= D.2302_70) goto <L31>; else goto <L32>;
- # SUCC: 26 [100.0%] (false) 24 (true)
-
- # BLOCK 26
- # PRED: 23 [100.0%] (false)
-<L32>:;
- # SUCC: 4 [100.0%] (fallthru)
-
- # BLOCK 4
- # PRED: 5 [100.0%] (true) 26 [100.0%] (fallthru)
- # ivtmp.41_31 = PHI <ivtmp.41_30(5), ivtmp.41_54(26)>;
- # sum.42_32 = PHI <sum.42_14(5), reduction.43_45(26)>;
-<L0>:;
- # SUCC: 19 [100.0%] (fallthru)
-
- # BLOCK 19
- # PRED: 4 [100.0%] (fallthru)
- # sum.42_24 = PHI <sum.42_32(4)>;
- # ivtmp.41_17 = PHI <ivtmp.41_31(4)>;
- i.44_21 = (int) ivtmp.41_17;
- D.2310_10 = i.44_21 + 3;
- (*x.45_47)[i.44_21] = D.2310_10;
- sum.42_14 = D.2310_10 + sum.42_24;
- i.44_15 = i.44_21 + 1;
- # SUCC: 5 [100.0%] (fallthru)
-
- # BLOCK 5
- # PRED: 19 [100.0%] (fallthru)
-<L17>:;
- ivtmp.41_30 = ivtmp.41_31 + 1;
- if (ivtmp.41_30 < D.2302_70) goto <L0>; else goto <L31>;
- # SUCC: 4 [100.0%] (true) 24 (false)
-
- # Adding this reduction phi is done at
- create_phi_for_local_result() #
-
- # BLOCK 24
- # PRED: 5 (false) 23 (true)
- # reduction.38_56 = PHI <sum.42_14(5), 0(23)>;
- <L31>:;
- __builtin_GOMP_barrier ();
- # SUCC: 25 [100.0%] (fallthru)
-
- # Creating the atomic operation is
- done at create_call_for_reduction_1() #
-
- # BLOCK 25
- # PRED: 24 [100.0%] (fallthru)
- D.2306_57 = &.paral_data_load.48_48->reduction.38;
- D.2307_58 = (unsigned int) reduction.38_56;
- D.2308_59 = __sync_fetch_and_add_4 (D.2306_57, D.2307_58);
- # SUCC: 22 [100.0%] (fallthru)
-
- # BLOCK 22
- # PRED: 25 [100.0%] (fallthru)
- <L29>:;
- return;
- # SUCC: EXIT
+ #pragma omp for schedule(static)
+
+ # The neutral element corresponding to the particular
+ reduction's operation, e.g. 0 for PLUS_EXPR,
+ 1 for MULT_EXPR, etc. replaces the user's initial value. #
+
+ # sum.27_29 = PHI <sum.27_11, 0>
+
+ sum.27_11 = D.1827_8 + sum.27_29;
+
+ OMP_CONTINUE
+
+ # Adding this reduction phi is done at create_phi_for_local_result() #
+ # sum.27_56 = PHI <sum.27_11, 0>
+ OMP_RETURN
+ # Creating the atomic operation is done at
+ create_call_for_reduction_1() #
+
+ #pragma omp atomic_load
+ D.1839_59 = *&.paral_data_load.33_51->reduction.23;
+ D.1840_60 = sum.27_56 + D.1839_59;
+ #pragma omp atomic_store (D.1840_60);
+
+ OMP_RETURN
+
+ # collecting the result after the join of the threads is done at
+ create_loads_for_reductions().
+ The value computed by the threads is loaded from the
+ shared struct. #
+
+
+ .paral_data_load.33_52 = &.paral_data_store.32;
+ sum_37 = .paral_data_load.33_52->sum.27;
+ sum_43 = D.1795_41 + sum_37;
+
+ exit bb:
+ # sum_21 = PHI <sum_43, sum_26>
+ printf (&"%d"[0], sum_21);
+
+...
+
}
*/
enum tree_code reduction_code; /* code for the reduction operation. */
tree keep_res; /* The PHI_RESULT of this phi is the resulting value
of the reduction variable when existing the loop. */
- tree initial_value; /* An ssa name representing a new variable holding
- the initial value of the reduction var before entering the loop. */
+ tree initial_value; /* The initial value of the reduction var before entering the loop. */
tree field; /* the name of the field in the parloop data structure intended for reduction. */
- tree reduction_init; /* An ssa name representing a new variable which will be
- assigned the proper reduction initialization value (init). */
tree init; /* reduction initialization value. */
tree new_phi; /* (helper field) Newly created phi node whose result
will be passed to the atomic operation. Represents
return ret;
}
-/* Assigns the address of VAR in TYPE to an ssa name, and returns this name.
+/* Return true when LOOP contains basic blocks marked with the
+ BB_IRREDUCIBLE_LOOP flag. */
+
+static inline bool
+loop_has_blocks_with_irreducible_flag (struct loop *loop)
+{
+ unsigned i;
+ basic_block *bbs = get_loop_body_in_dom_order (loop);
+ bool res = true;
+
+ for (i = 0; i < loop->num_nodes; i++)
+ if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
+ goto end;
+
+ res = false;
+ end:
+ free (bbs);
+ return res;
+}
+
+/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
The assignment statement is placed before LOOP. DECL_ADDRESS maps decls
- to their addresses that can be reused. */
+ to their addresses that can be reused. The address of OBJ is known to
+ be invariant in the whole function. */
static tree
-take_address_of (tree var, tree type, struct loop *loop, htab_t decl_address)
+take_address_of (tree obj, tree type, struct loop *loop, htab_t decl_address)
{
- int uid = DECL_UID (var);
+ int uid;
void **dslot;
struct int_tree_map ielt, *nielt;
- tree name, bvar, stmt;
+ tree *var_p, name, bvar, stmt, addr;
edge entry = loop_preheader_edge (loop);
+ /* Since the address of OBJ is invariant, the trees may be shared.
+ Avoid rewriting unrelated parts of the code. */
+ obj = unshare_expr (obj);
+ for (var_p = &obj;
+ handled_component_p (*var_p);
+ var_p = &TREE_OPERAND (*var_p, 0))
+ continue;
+ uid = DECL_UID (*var_p);
+
ielt.uid = uid;
dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
if (!*dslot)
{
- bvar = create_tmp_var (type, get_name (var));
+ addr = build_addr (*var_p, current_function_decl);
+ bvar = create_tmp_var (TREE_TYPE (addr), get_name (*var_p));
add_referenced_var (bvar);
- stmt = build_gimple_modify_stmt (bvar,
- fold_convert (type,
- build_addr (var,
- current_function_decl)));
+ stmt = build_gimple_modify_stmt (bvar, addr);
name = make_ssa_name (bvar, stmt);
GIMPLE_STMT_OPERAND (stmt, 0) = name;
bsi_insert_on_edge_immediate (entry, stmt);
nielt->uid = uid;
nielt->to = name;
*dslot = nielt;
-
- return name;
}
+ else
+ name = ((struct int_tree_map *) *dslot)->to;
- name = ((struct int_tree_map *) *dslot)->to;
- if (TREE_TYPE (name) == type)
- return name;
+ if (var_p != &obj)
+ {
+ *var_p = build1 (INDIRECT_REF, TREE_TYPE (*var_p), name);
+ name = force_gimple_operand (build_addr (obj, current_function_decl),
+ &stmt, true, NULL_TREE);
+ if (stmt)
+ bsi_insert_on_edge_immediate (entry, stmt);
+ }
- bvar = SSA_NAME_VAR (name);
- stmt = build_gimple_modify_stmt (bvar, fold_convert (type, name));
- name = make_ssa_name (bvar, stmt);
- GIMPLE_STMT_OPERAND (stmt, 0) = name;
- bsi_insert_on_edge_immediate (entry, stmt);
+ if (TREE_TYPE (name) != type)
+ {
+ name = force_gimple_operand (fold_convert (type, name), &stmt, true,
+ NULL_TREE);
+ if (stmt)
+ bsi_insert_on_edge_immediate (entry, stmt);
+ }
return name;
}
static int
initialize_reductions (void **slot, void *data)
{
- tree t, stmt;
tree init, c;
- tree name, name1;
tree bvar, type, arg;
edge e;
init = omp_reduction_init (c, TREE_TYPE (bvar));
reduc->init = init;
- t = build_gimple_modify_stmt (bvar, init);
- name = make_ssa_name (bvar, t);
-
- GIMPLE_STMT_OPERAND (t, 0) = name;
- SSA_NAME_DEF_STMT (name) = t;
-
- /* Replace the argument
- representing the initialization value. Keeping the old value
- in a new variable "reduction_initial", that will be taken in
- consideration after the parallel computing is done. */
+ /* Replace the argument representing the initialization value
+ with the initialization value for the reduction (neutral
+ element for the particular operation, e.g. 0 for PLUS_EXPR,
+ 1 for MULT_EXPR, etc).
+ Keep the old value in a new variable "reduction_initial",
+ that will be taken in consideration after the parallel
+ computing is done. */
e = loop_preheader_edge (loop);
arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
/* Create new variable to hold the initial value. */
- type = TREE_TYPE (bvar);
- bvar = create_tmp_var (type, "reduction_initial");
- add_referenced_var (bvar);
-
- stmt = build_gimple_modify_stmt (bvar, arg);
- name1 = make_ssa_name (bvar, stmt);
- GIMPLE_STMT_OPERAND (stmt, 0) = name1;
- SSA_NAME_DEF_STMT (name1) = stmt;
- bsi_insert_on_edge_immediate (e, stmt);
- bsi_insert_on_edge_immediate (e, t);
SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
- (reduc->reduc_phi, loop_preheader_edge (loop)), name);
- reduc->initial_value = name1;
- reduc->reduction_init = name;
+ (reduc->reduc_phi, loop_preheader_edge (loop)), init);
+ reduc->initial_value = arg;
return 1;
}
walk_tree. */
static tree
-eliminate_local_variables_1 (tree * tp, int *walk_subtrees, void *data)
+eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
{
struct elv_data *dta = data;
- tree t = *tp, var, addr, addr_type, type;
+ tree t = *tp, var, addr, addr_type, type, obj;
if (DECL_P (t))
{
if (TREE_CODE (t) == ADDR_EXPR)
{
- var = TREE_OPERAND (t, 0);
- if (!DECL_P (var))
+ /* ADDR_EXPR may appear in two contexts:
+ -- as a gimple operand, when the address taken is a function invariant
+ -- as gimple rhs, when the resulting address in not a function
+ invariant
+ We do not need to do anything special in the latter case (the base of
+ the memory reference whose address is taken may be replaced in the
+ DECL_P case). The former case is more complicated, as we need to
+ ensure that the new address is still a gimple operand. Thus, it
+ is not sufficient to replace just the base of the memory reference --
+ we need to move the whole computation of the address out of the
+ loop. */
+ if (!is_gimple_val (t))
return NULL_TREE;
*walk_subtrees = 0;
- if (!SSA_VAR_P (var) || DECL_EXTERNAL (var))
+ obj = TREE_OPERAND (t, 0);
+ var = get_base_address (obj);
+ if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
return NULL_TREE;
addr_type = TREE_TYPE (t);
- addr = take_address_of (var, addr_type, dta->loop, dta->decl_address);
+ addr = take_address_of (obj, addr_type, dta->loop, dta->decl_address);
*tp = addr;
dta->changed = true;
if (!*dslot)
{
var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
+ DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
add_referenced_var (var_copy);
nielt = XNEW (struct int_tree_map);
nielt->uid = uid;
}
}
-/* A helper structure for passing the TYPE and REDUCTION_LIST
- to the DATA parameter of add_field_for_name. */
-struct data_arg
+/* Callback for htab_traverse. Adds a field corresponding to the reduction
+ specified in SLOT. The type is passed in DATA. */
+
+static int
+add_field_for_reduction (void **slot, void *data)
{
- tree type;
- htab_t reduction_list;
-};
+
+ struct reduction_info *red = *slot;
+ tree type = data;
+ tree var = SSA_NAME_VAR (GIMPLE_STMT_OPERAND (red->reduc_stmt, 0));
+ tree field = build_decl (FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
+
+ insert_field_into_struct (type, field);
+
+ red->field = field;
+
+ return 1;
+}
/* Callback for htab_traverse. Adds a field corresponding to a ssa name
- described in SLOT. The type is passed in DATA. The Reduction list
- is also passes in DATA. */
+ described in SLOT. The type is passed in DATA. */
static int
add_field_for_name (void **slot, void *data)
{
- tree stmt;
- use_operand_p use_p = NULL;
-
struct name_to_copy_elt *elt = *slot;
- struct data_arg *data_arg = (struct data_arg *) data;
- tree type = data_arg->type;
+ tree type = data;
tree name = ssa_name (elt->version);
tree var = SSA_NAME_VAR (name);
tree field = build_decl (FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
insert_field_into_struct (type, field);
elt->field = field;
- /* Find uses of name to determine if this name is related to
- a reduction phi, and if so, record the field in the reduction struct. */
-
- if ((htab_elements (data_arg->reduction_list) > 0)
- && single_imm_use (elt->new_name, &use_p, &stmt)
- && TREE_CODE (stmt) == PHI_NODE)
- {
- /* check if STMT is a REDUC_PHI of some reduction. */
- struct reduction_info *red;
-
- red = reduction_phi (data_arg->reduction_list ,stmt);
- if (red)
- red->field = field;
- }
-
return 1;
}
e = EDGE_PRED (store_bb, 1);
else
e = EDGE_PRED (store_bb, 0);
- local_res = make_ssa_name (SSA_NAME_VAR (reduc->reduction_init), NULL_TREE);
+ local_res = make_ssa_name (SSA_NAME_VAR (GIMPLE_STMT_OPERAND (reduc->reduc_stmt, 0)), NULL_TREE);
new_phi = create_phi_node (local_res, store_bb);
SSA_NAME_DEF_STMT (local_res) = new_phi;
add_phi_arg (new_phi, reduc->init, e);
htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
}
-/* Callback for htab_traverse. Create a new variable that loads the
- final reduction value at the
- join point of all threads, adds the initial value the reduction
- variable had before the parallel computation started, and
- inserts it in the right place. */
+/* Callback for htab_traverse. Loads the final reduction value at the
+ join point of all threads, and inserts it in the right place. */
static int
create_loads_for_reductions (void **slot, void *data)
struct clsn_data *clsn_data = data;
tree stmt;
block_stmt_iterator bsi;
- tree type = TREE_TYPE (red->reduction_init);
+ tree type = TREE_TYPE (GIMPLE_STMT_OPERAND (red->reduc_stmt, 0));
tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
tree load_struct;
- tree bvar, name;
+ tree name;
tree x;
bsi = bsi_after_labels (clsn_data->load_bb);
load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
NULL_TREE);
- bvar = create_tmp_var (type, "reduction_final");
- add_referenced_var (bvar);
-
- /* Apply operation between the new variable which is the result
- of computation all threads, and the initial value which is kept
- at reduction->inital_value. */
-
- stmt = build_gimple_modify_stmt (bvar, load_struct);
- name = make_ssa_name (bvar, stmt);
- GIMPLE_STMT_OPERAND (stmt, 0) = name;
- SSA_NAME_DEF_STMT (name) = stmt;
- bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
-
- x =
- fold_build2 (red->reduction_code, TREE_TYPE (load_struct),
- name, red->initial_value);
+ x = load_struct;
name = PHI_RESULT (red->keep_res);
stmt = build_gimple_modify_stmt (name, x);
GIMPLE_STMT_OPERAND (stmt, 0) = name;
}
+/* Callback for htab_traverse. Store the neutral value for the
+ particular reduction's operation, e.g. 0 for PLUS_EXPR,
+ 1 for MULT_EXPR, etc. into the reduction field.
+ The reduction is specified in SLOT. The store information is
+ passed in DATA. */
+
+static int
+create_stores_for_reduction (void **slot, void *data)
+{
+ struct reduction_info *red = *slot;
+ struct clsn_data *clsn_data = data;
+ tree stmt;
+ block_stmt_iterator bsi;
+ tree type = TREE_TYPE (GIMPLE_STMT_OPERAND (red->reduc_stmt, 0));
+
+ bsi = bsi_last (clsn_data->store_bb);
+ stmt =
+ build_gimple_modify_stmt (build3
+ (COMPONENT_REF, type, clsn_data->store,
+ red->field, NULL_TREE),
+ red->initial_value);
+ mark_virtual_ops_for_renaming (stmt);
+ bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
+
+ return 1;
+}
+
/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
store to a field of STORE in STORE_BB for the ssa name and its duplicate
specified in SLOT. */
}
else
{
- struct data_arg data_arg;
-
/* Create the type for the structure to store the ssa names to. */
type = lang_hooks.types.make_type (RECORD_TYPE);
type_name = build_decl (TYPE_DECL, create_tmp_var_name (".paral_data"),
type);
TYPE_NAME (type) = type_name;
- data_arg.type = type;
- data_arg.reduction_list = reduction_list;
- htab_traverse (name_copies, add_field_for_name, &data_arg);
+ htab_traverse (name_copies, add_field_for_name, type);
+ if (htab_elements (reduction_list) > 0)
+ {
+ /* Create the fields for reductions. */
+ htab_traverse (reduction_list, add_field_for_reduction,
+ type);
+ }
layout_type (type);
-
+
/* Create the loads and stores. */
*arg_struct = create_tmp_var (type, ".paral_data_store");
add_referenced_var (*arg_struct);
ld_st_data->load = *new_arg_struct;
ld_st_data->store_bb = bb0;
ld_st_data->load_bb = bb1;
+
htab_traverse (name_copies, create_loads_and_stores_for_name,
ld_st_data);
- /* Load the calculation from memory into a new
- reduction variable (after the join of the threads). */
+ /* Load the calculation from memory (after the join of the threads). */
+
if (htab_elements (reduction_list) > 0)
{
+ htab_traverse (reduction_list, create_stores_for_reduction,
+ ld_st_data);
clsn_data.load = make_ssa_name (nvar, NULL_TREE);
clsn_data.load_bb = single_dom_exit (loop)->dest;
clsn_data.store = ld_st_data->store;
TREE_USED (t) = 1;
DECL_ARGUMENTS (decl) = t;
- allocate_struct_function (decl);
+ allocate_struct_function (decl, false);
/* The call to allocate_struct_function clobbers CFUN, so we need to restore
it. */
- cfun = act_cfun;
+ set_cfun (act_cfun);
return decl;
}
canonicalize_loop_ivs (struct loop *loop, htab_t reduction_list, tree nit)
{
unsigned precision = TYPE_PRECISION (TREE_TYPE (nit));
- tree phi, prev, res, type, var_before, val, atype, t, next;
+ tree phi, prev, res, type, var_before, val, atype, mtype, t, next;
block_stmt_iterator bsi;
bool ok;
affine_iv iv;
remove_phi_node (phi, prev, false);
atype = TREE_TYPE (res);
- val = fold_build2 (PLUS_EXPR, atype,
- unshare_expr (iv.base),
- fold_build2 (MULT_EXPR, atype,
- unshare_expr (iv.step),
- fold_convert (atype, var_before)));
+ mtype = POINTER_TYPE_P (atype) ? sizetype : atype;
+ val = fold_build2 (MULT_EXPR, mtype, unshare_expr (iv.step),
+ fold_convert (mtype, var_before));
+ val = fold_build2 (POINTER_TYPE_P (atype)
+ ? POINTER_PLUS_EXPR : PLUS_EXPR,
+ atype, unshare_expr (iv.base), val);
val = force_gimple_operand_bsi (&bsi, val, false, NULL_TREE, true,
BSI_SAME_STMT);
t = build_gimple_modify_stmt (res, val);
/* Initialize the control variable to NIT. */
bsi = bsi_after_labels (ex_bb);
+ nit = force_gimple_operand_bsi (&bsi,
+ fold_convert (TREE_TYPE (control_name), nit),
+ false, NULL_TREE, false, BSI_SAME_STMT);
t = build_gimple_modify_stmt (control_name, nit);
bsi_insert_before (&bsi, t, BSI_NEW_STMT);
SSA_NAME_DEF_STMT (control_name) = t;
unsigned n_threads, struct tree_niter_desc *niter)
{
struct loop *nloop;
+ loop_iterator li;
tree many_iterations_cond, type, nit;
tree stmts, arg_struct, new_arg_struct;
basic_block parallel_head;
expander to do it). */
cancel_loop_tree (loop);
+ /* Free loop bound estimations that could contain references to
+ removed statements. */
+ FOR_EACH_LOOP (li, loop, 0)
+ free_numbers_of_iterations_estimates_loop (loop);
+
/* Expand the parallel constructs. We do it directly here instead of running
a separate expand_omp pass, since it is more efficient, and less likely to
cause troubles with further analyses not being able to deal with the
|| expected_loop_iterations (loop) <= n_threads
/* And of course, the loop must be parallelizable. */
|| !can_duplicate_loop_p (loop)
+ || loop_has_blocks_with_irreducible_flag (loop)
|| !loop_parallel_p (loop, reduction_list, &niter_desc))
continue;