X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=blobdiff_plain;f=gcc%2Fomp-low.c;h=2f9aedeac39933f5a8b12aa2339fd7b22d32917f;hp=1ced1eaa8755bb2db1bb0cea90e68793c1cd9840;hb=36eba48c3119a0236a818f0bb51b11c56b4f351e;hpb=a0147880f21db35ccccac321894d9eb9cedbb28f diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 1ced1eaa875..2f9aedeac39 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -3,13 +3,13 @@ marshalling to implement data sharing and copying clauses. Contributed by Diego Novillo - Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc. + Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later +Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY @@ -18,9 +18,8 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING. If not, write to the Free -Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA -02110-1301, USA. */ +along with GCC; see the file COPYING3. If not see +. */ #include "config.h" #include "system.h" @@ -41,7 +40,9 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "tree-pass.h" #include "ggc.h" #include "except.h" - +#include "splay-tree.h" +#include "optabs.h" +#include "cfgloop.h" /* Lowering of OpenMP parallel and workshare constructs proceeds in two phases. The first phase scans the function looking for OMP statements @@ -76,6 +77,14 @@ typedef struct omp_context tree sender_decl; tree receiver_decl; + /* These are used just by task contexts, if task firstprivate fn is + needed. srecord_type is used to communicate from the thread + that encountered the task construct to task firstprivate fn, + record_type is allocated by GOMP_task, initialized by task firstprivate + fn and passed to the task body fn. */ + splay_tree sfield_map; + tree srecord_type; + /* A chain of variables to add to the top-level block surrounding the construct. In the case of a parallel, this is in the child function. */ tree block_vars; @@ -94,21 +103,30 @@ typedef struct omp_context } omp_context; +struct omp_for_data_loop +{ + tree v, n1, n2, step; + enum tree_code cond_code; +}; + /* A structure describing the main elements of a parallel loop. */ struct omp_for_data { - tree v, n1, n2, step, chunk_size, for_stmt; - enum tree_code cond_code; - tree pre; + struct omp_for_data_loop loop; + tree chunk_size, for_stmt; + tree pre, iter_type; + int collapse; bool have_nowait, have_ordered; enum omp_clause_schedule_kind sched_kind; + struct omp_for_data_loop *loops; }; static splay_tree all_contexts; -static int parallel_nesting_level; +static int taskreg_nesting_level; struct omp_region *root_omp_region; +static bitmap task_shared_vars; static void scan_omp (tree *, omp_context *); static void lower_omp (tree *, omp_context *); @@ -117,7 +135,7 @@ static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *); /* Find an OpenMP clause of type KIND within CLAUSES. */ -static tree +tree find_omp_clause (tree clauses, enum tree_code kind) { for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses)) @@ -136,6 +154,25 @@ is_parallel_ctx (omp_context *ctx) } +/* Return true if CTX is for an omp task. */ + +static inline bool +is_task_ctx (omp_context *ctx) +{ + return TREE_CODE (ctx->stmt) == OMP_TASK; +} + + +/* Return true if CTX is for an omp parallel or omp task. */ + +static inline bool +is_taskreg_ctx (omp_context *ctx) +{ + return TREE_CODE (ctx->stmt) == OMP_PARALLEL + || TREE_CODE (ctx->stmt) == OMP_TASK; +} + + /* Return true if REGION is a combined parallel+workshare region. */ static inline bool @@ -149,64 +186,28 @@ is_combined_parallel (struct omp_region *region) them into *FD. */ static void -extract_omp_for_data (tree for_stmt, struct omp_for_data *fd) +extract_omp_for_data (tree for_stmt, struct omp_for_data *fd, + struct omp_for_data_loop *loops) { - tree t; + tree t, var, *collapse_iter, *collapse_count; + tree count = NULL_TREE, iter_type = long_integer_type_node; + struct omp_for_data_loop *loop; + int i; + struct omp_for_data_loop dummy_loop; fd->for_stmt = for_stmt; fd->pre = NULL; - - t = OMP_FOR_INIT (for_stmt); - gcc_assert (TREE_CODE (t) == GIMPLE_MODIFY_STMT); - fd->v = GIMPLE_STMT_OPERAND (t, 0); - gcc_assert (DECL_P (fd->v)); - gcc_assert (TREE_CODE (TREE_TYPE (fd->v)) == INTEGER_TYPE); - fd->n1 = GIMPLE_STMT_OPERAND (t, 1); - - t = OMP_FOR_COND (for_stmt); - fd->cond_code = TREE_CODE (t); - gcc_assert (TREE_OPERAND (t, 0) == fd->v); - fd->n2 = TREE_OPERAND (t, 1); - switch (fd->cond_code) - { - case LT_EXPR: - case GT_EXPR: - break; - case LE_EXPR: - fd->n2 = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->n2), fd->n2, - build_int_cst (TREE_TYPE (fd->n2), 1)); - fd->cond_code = LT_EXPR; - break; - case GE_EXPR: - fd->n2 = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->n2), fd->n2, - build_int_cst (TREE_TYPE (fd->n2), 1)); - fd->cond_code = GT_EXPR; - break; - default: - gcc_unreachable (); - } - - t = OMP_FOR_INCR (fd->for_stmt); - gcc_assert (TREE_CODE (t) == GIMPLE_MODIFY_STMT); - gcc_assert (GIMPLE_STMT_OPERAND (t, 0) == fd->v); - t = GIMPLE_STMT_OPERAND (t, 1); - gcc_assert (TREE_OPERAND (t, 0) == fd->v); - switch (TREE_CODE (t)) - { - case PLUS_EXPR: - fd->step = TREE_OPERAND (t, 1); - break; - case MINUS_EXPR: - fd->step = TREE_OPERAND (t, 1); - fd->step = fold_build1 (NEGATE_EXPR, TREE_TYPE (fd->step), fd->step); - break; - default: - gcc_unreachable (); - } + fd->collapse = TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); + if (fd->collapse > 1) + fd->loops = loops; + else + fd->loops = &fd->loop; fd->have_nowait = fd->have_ordered = false; fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; fd->chunk_size = NULL_TREE; + collapse_iter = NULL; + collapse_count = NULL; for (t = OMP_FOR_CLAUSES (for_stmt); t ; t = OMP_CLAUSE_CHAIN (t)) switch (OMP_CLAUSE_CODE (t)) @@ -221,20 +222,223 @@ extract_omp_for_data (tree for_stmt, struct omp_for_data *fd) fd->sched_kind = OMP_CLAUSE_SCHEDULE_KIND (t); fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t); break; + case OMP_CLAUSE_COLLAPSE: + if (fd->collapse > 1) + { + collapse_iter = &OMP_CLAUSE_COLLAPSE_ITERVAR (t); + collapse_count = &OMP_CLAUSE_COLLAPSE_COUNT (t); + } default: break; } + /* FIXME: for now map schedule(auto) to schedule(static). + There should be analysis to determine whether all iterations + are approximately the same amount of work (then schedule(static) + is best) or if it varies (then schedule(dynamic,N) is better). */ + if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_AUTO) + { + fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; + gcc_assert (fd->chunk_size == NULL); + } + gcc_assert (fd->collapse == 1 || collapse_iter != NULL); if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME) gcc_assert (fd->chunk_size == NULL); else if (fd->chunk_size == NULL) { /* We only need to compute a default chunk size for ordered static loops and dynamic loops. */ - if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC || fd->have_ordered) + if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC + || fd->have_ordered + || fd->collapse > 1) fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) ? integer_zero_node : integer_one_node; } + + for (i = 0; i < fd->collapse; i++) + { + if (fd->collapse == 1) + loop = &fd->loop; + else if (loops != NULL) + loop = loops + i; + else + loop = &dummy_loop; + + t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), i); + gcc_assert (TREE_CODE (t) == GIMPLE_MODIFY_STMT); + loop->v = GIMPLE_STMT_OPERAND (t, 0); + gcc_assert (SSA_VAR_P (loop->v)); + gcc_assert (TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE + || TREE_CODE (TREE_TYPE (loop->v)) == POINTER_TYPE); + var = TREE_CODE (loop->v) == SSA_NAME ? SSA_NAME_VAR (loop->v) : loop->v; + loop->n1 = GIMPLE_STMT_OPERAND (t, 1); + + t = TREE_VEC_ELT (OMP_FOR_COND (for_stmt), i); + loop->cond_code = TREE_CODE (t); + gcc_assert (TREE_OPERAND (t, 0) == var); + loop->n2 = TREE_OPERAND (t, 1); + switch (loop->cond_code) + { + case LT_EXPR: + case GT_EXPR: + break; + case LE_EXPR: + if (POINTER_TYPE_P (TREE_TYPE (loop->n2))) + loop->n2 = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (loop->n2), + loop->n2, size_one_node); + else + loop->n2 = fold_build2 (PLUS_EXPR, TREE_TYPE (loop->n2), loop->n2, + build_int_cst (TREE_TYPE (loop->n2), 1)); + loop->cond_code = LT_EXPR; + break; + case GE_EXPR: + if (POINTER_TYPE_P (TREE_TYPE (loop->n2))) + loop->n2 = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (loop->n2), + loop->n2, size_int (-1)); + else + loop->n2 = fold_build2 (MINUS_EXPR, TREE_TYPE (loop->n2), loop->n2, + build_int_cst (TREE_TYPE (loop->n2), 1)); + loop->cond_code = GT_EXPR; + break; + default: + gcc_unreachable (); + } + + t = TREE_VEC_ELT (OMP_FOR_INCR (for_stmt), i); + gcc_assert (TREE_CODE (t) == GIMPLE_MODIFY_STMT); + gcc_assert (GIMPLE_STMT_OPERAND (t, 0) == var); + t = GIMPLE_STMT_OPERAND (t, 1); + gcc_assert (TREE_OPERAND (t, 0) == var); + switch (TREE_CODE (t)) + { + case PLUS_EXPR: + case POINTER_PLUS_EXPR: + loop->step = TREE_OPERAND (t, 1); + break; + case MINUS_EXPR: + loop->step = TREE_OPERAND (t, 1); + loop->step = fold_build1 (NEGATE_EXPR, TREE_TYPE (loop->step), + loop->step); + break; + default: + gcc_unreachable (); + } + + if (iter_type != long_long_unsigned_type_node) + { + if (POINTER_TYPE_P (TREE_TYPE (loop->v))) + iter_type = long_long_unsigned_type_node; + else if (TYPE_UNSIGNED (TREE_TYPE (loop->v)) + && TYPE_PRECISION (TREE_TYPE (loop->v)) + >= TYPE_PRECISION (iter_type)) + { + tree n; + + if (loop->cond_code == LT_EXPR) + n = fold_build2 (PLUS_EXPR, TREE_TYPE (loop->v), + loop->n2, loop->step); + else + n = loop->n1; + if (TREE_CODE (n) != INTEGER_CST + || tree_int_cst_lt (TYPE_MAX_VALUE (iter_type), n)) + iter_type = long_long_unsigned_type_node; + } + else if (TYPE_PRECISION (TREE_TYPE (loop->v)) + > TYPE_PRECISION (iter_type)) + { + tree n1, n2; + + if (loop->cond_code == LT_EXPR) + { + n1 = loop->n1; + n2 = fold_build2 (PLUS_EXPR, TREE_TYPE (loop->v), + loop->n2, loop->step); + } + else + { + n1 = fold_build2 (MINUS_EXPR, TREE_TYPE (loop->v), + loop->n2, loop->step); + n2 = loop->n1; + } + if (TREE_CODE (n1) != INTEGER_CST + || TREE_CODE (n2) != INTEGER_CST + || !tree_int_cst_lt (TYPE_MIN_VALUE (iter_type), n1) + || !tree_int_cst_lt (n2, TYPE_MAX_VALUE (iter_type))) + iter_type = long_long_unsigned_type_node; + } + } + + if (collapse_count && *collapse_count == NULL) + { + if ((i == 0 || count != NULL_TREE) + && TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE + && TREE_CONSTANT (loop->n1) + && TREE_CONSTANT (loop->n2) + && TREE_CODE (loop->step) == INTEGER_CST) + { + tree itype = TREE_TYPE (loop->v); + + if (POINTER_TYPE_P (itype)) + itype + = lang_hooks.types.type_for_size (TYPE_PRECISION (itype), 0); + t = build_int_cst (itype, (loop->cond_code == LT_EXPR ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, + fold_convert (itype, loop->step), t); + t = fold_build2 (PLUS_EXPR, itype, t, + fold_convert (itype, loop->n2)); + t = fold_build2 (MINUS_EXPR, itype, t, + fold_convert (itype, loop->n1)); + if (TYPE_UNSIGNED (itype) && loop->cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, + fold_convert (itype, + loop->step))); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, + fold_convert (itype, loop->step)); + t = fold_convert (long_long_unsigned_type_node, t); + if (count != NULL_TREE) + count = fold_build2 (MULT_EXPR, long_long_unsigned_type_node, + count, t); + else + count = t; + if (TREE_CODE (count) != INTEGER_CST) + count = NULL_TREE; + } + else + count = NULL_TREE; + } + } + + if (count) + { + if (!tree_int_cst_lt (count, TYPE_MAX_VALUE (long_integer_type_node))) + iter_type = long_long_unsigned_type_node; + else + iter_type = long_integer_type_node; + } + else if (collapse_iter && *collapse_iter != NULL) + iter_type = TREE_TYPE (*collapse_iter); + fd->iter_type = iter_type; + if (collapse_iter && *collapse_iter == NULL) + *collapse_iter = create_tmp_var (iter_type, ".iter"); + if (collapse_count && *collapse_count == NULL) + { + if (count) + *collapse_count = fold_convert (iter_type, count); + else + *collapse_count = create_tmp_var (iter_type, ".count"); + } + + if (fd->collapse > 1) + { + fd->loop.v = *collapse_iter; + fd->loop.n1 = build_int_cst (TREE_TYPE (fd->loop.v), 0); + fd->loop.n2 = *collapse_count; + fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1); + fd->loop.cond_code = LT_EXPR; + } } @@ -294,16 +498,21 @@ workshare_safe_to_combine_p (basic_block par_entry_bb, basic_block ws_entry_bb) gcc_assert (TREE_CODE (ws_stmt) == OMP_FOR); - extract_omp_for_data (ws_stmt, &fd); + extract_omp_for_data (ws_stmt, &fd, NULL); + + if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST) + return false; + if (fd.iter_type != long_integer_type_node) + return false; /* FIXME. We give up too easily here. If any of these arguments are not constants, they will likely involve variables that have been mapped into fields of .omp_data_s for sharing with the child function. With appropriate data flow, it would be possible to see through this. */ - if (!is_gimple_min_invariant (fd.n1) - || !is_gimple_min_invariant (fd.n2) - || !is_gimple_min_invariant (fd.step) + if (!is_gimple_min_invariant (fd.loop.n1) + || !is_gimple_min_invariant (fd.loop.n2) + || !is_gimple_min_invariant (fd.loop.step) || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size))) return false; @@ -325,7 +534,7 @@ get_ws_args_for (tree ws_stmt) struct omp_for_data fd; tree ws_args; - extract_omp_for_data (ws_stmt, &fd); + extract_omp_for_data (ws_stmt, &fd, NULL); ws_args = NULL_TREE; if (fd.chunk_size) @@ -334,21 +543,24 @@ get_ws_args_for (tree ws_stmt) ws_args = tree_cons (NULL, t, ws_args); } - t = fold_convert (long_integer_type_node, fd.step); + t = fold_convert (long_integer_type_node, fd.loop.step); ws_args = tree_cons (NULL, t, ws_args); - t = fold_convert (long_integer_type_node, fd.n2); + t = fold_convert (long_integer_type_node, fd.loop.n2); ws_args = tree_cons (NULL, t, ws_args); - t = fold_convert (long_integer_type_node, fd.n1); + t = fold_convert (long_integer_type_node, fd.loop.n1); ws_args = tree_cons (NULL, t, ws_args); return ws_args; } else if (TREE_CODE (ws_stmt) == OMP_SECTIONS) { - basic_block bb = bb_for_stmt (ws_stmt); - t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs)); + /* Number of sections is equal to the number of edges from the + OMP_SECTIONS_SWITCH statement, except for the one to the exit + of the sections region. */ + basic_block bb = single_succ (bb_for_stmt (ws_stmt)); + t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1); t = tree_cons (NULL, t, NULL); return t; } @@ -366,7 +578,8 @@ determine_parallel_type (struct omp_region *region) basic_block ws_entry_bb, ws_exit_bb; if (region == NULL || region->inner == NULL - || region->exit == NULL || region->inner->exit == NULL) + || region->exit == NULL || region->inner->exit == NULL + || region->inner->cont == NULL) return; /* We only support parallel+for and parallel+sections. */ @@ -384,9 +597,12 @@ determine_parallel_type (struct omp_region *region) if (single_succ (par_entry_bb) == ws_entry_bb && single_succ (ws_exit_bb) == par_exit_bb - && workshare_safe_to_combine_p (par_entry_bb, ws_entry_bb)) + && workshare_safe_to_combine_p (par_entry_bb, ws_entry_bb) + && (OMP_PARALLEL_COMBINED (last_stmt (par_entry_bb)) + || (last_and_only_stmt (ws_entry_bb) + && last_and_only_stmt (par_exit_bb)))) { - tree ws_stmt = last_stmt (region->inner->entry); + tree ws_stmt = last_stmt (ws_entry_bb); if (region->inner->type == OMP_FOR) { @@ -421,7 +637,7 @@ determine_parallel_type (struct omp_region *region) /* Return true if EXPR is variable sized. */ static inline bool -is_variable_sized (tree expr) +is_variable_sized (const_tree expr) { return !TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (expr))); } @@ -441,17 +657,17 @@ is_reference (tree decl) static inline tree lookup_decl (tree var, omp_context *ctx) { - splay_tree_node n; - n = splay_tree_lookup (ctx->cb.decl_map, (splay_tree_key) var); - return (tree) n->value; + tree *n; + n = (tree *) pointer_map_contains (ctx->cb.decl_map, var); + return *n; } static inline tree -maybe_lookup_decl (tree var, omp_context *ctx) +maybe_lookup_decl (const_tree var, omp_context *ctx) { - splay_tree_node n; - n = splay_tree_lookup (ctx->cb.decl_map, (splay_tree_key) var); - return n ? (tree) n->value : NULL_TREE; + tree *n; + n = (tree *) pointer_map_contains (ctx->cb.decl_map, var); + return n ? *n : NULL_TREE; } static inline tree @@ -463,6 +679,16 @@ lookup_field (tree var, omp_context *ctx) } static inline tree +lookup_sfield (tree var, omp_context *ctx) +{ + splay_tree_node n; + n = splay_tree_lookup (ctx->sfield_map + ? ctx->sfield_map : ctx->field_map, + (splay_tree_key) var); + return (tree) n->value; +} + +static inline tree maybe_lookup_field (tree var, omp_context *ctx) { splay_tree_node n; @@ -470,18 +696,18 @@ maybe_lookup_field (tree var, omp_context *ctx) return n ? (tree) n->value : NULL_TREE; } -/* Return true if DECL should be copied by pointer. SHARED_P is true - if DECL is to be shared. */ +/* Return true if DECL should be copied by pointer. SHARED_CTX is + the parallel context if DECL is to be shared. */ static bool -use_pointer_for_field (tree decl, bool shared_p) +use_pointer_for_field (tree decl, omp_context *shared_ctx) { if (AGGREGATE_TYPE_P (TREE_TYPE (decl))) return true; /* We can only use copy-in/copy-out semantics for shared variables when we know the value is not accessible from an outer scope. */ - if (shared_p) + if (shared_ctx) { /* ??? Trivially accessible from anywhere. But why would we even be passing an address in this case? Should we simply assert @@ -501,26 +727,88 @@ use_pointer_for_field (tree decl, bool shared_p) address taken. */ if (TREE_ADDRESSABLE (decl)) return true; + + /* Disallow copy-in/out in nested parallel if + decl is shared in outer parallel, otherwise + each thread could store the shared variable + in its own copy-in location, making the + variable no longer really shared. */ + if (!TREE_READONLY (decl) && shared_ctx->is_nested) + { + omp_context *up; + + for (up = shared_ctx->outer; up; up = up->outer) + if (maybe_lookup_decl (decl, up)) + break; + + if (up && is_taskreg_ctx (up)) + { + tree c; + + for (c = OMP_TASKREG_CLAUSES (up->stmt); + c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED + && OMP_CLAUSE_DECL (c) == decl) + break; + + if (c) + return true; + } + } + + /* For tasks avoid using copy-in/out, unless they are readonly + (in which case just copy-in is used). As tasks can be + deferred or executed in different thread, when GOMP_task + returns, the task hasn't necessarily terminated. */ + if (!TREE_READONLY (decl) && is_task_ctx (shared_ctx)) + { + tree outer = maybe_lookup_decl_in_outer_ctx (decl, shared_ctx); + if (is_gimple_reg (outer)) + { + /* Taking address of OUTER in lower_send_shared_vars + might need regimplification of everything that uses the + variable. */ + if (!task_shared_vars) + task_shared_vars = BITMAP_ALLOC (NULL); + bitmap_set_bit (task_shared_vars, DECL_UID (outer)); + TREE_ADDRESSABLE (outer) = 1; + } + return true; + } } return false; } -/* Construct a new automatic decl similar to VAR. */ +/* Create a new VAR_DECL and copy information from VAR to it. */ -static tree -omp_copy_decl_2 (tree var, tree name, tree type, omp_context *ctx) +tree +copy_var_decl (tree var, tree name, tree type) { tree copy = build_decl (VAR_DECL, name, type); TREE_ADDRESSABLE (copy) = TREE_ADDRESSABLE (var); + TREE_THIS_VOLATILE (copy) = TREE_THIS_VOLATILE (var); DECL_GIMPLE_REG_P (copy) = DECL_GIMPLE_REG_P (var); + DECL_NO_TBAA_P (copy) = DECL_NO_TBAA_P (var); DECL_ARTIFICIAL (copy) = DECL_ARTIFICIAL (var); DECL_IGNORED_P (copy) = DECL_IGNORED_P (var); + DECL_CONTEXT (copy) = DECL_CONTEXT (var); + DECL_SOURCE_LOCATION (copy) = DECL_SOURCE_LOCATION (var); TREE_USED (copy) = 1; - DECL_CONTEXT (copy) = current_function_decl; DECL_SEEN_IN_BIND_EXPR_P (copy) = 1; + return copy; +} + +/* Construct a new automatic decl similar to VAR. */ + +static tree +omp_copy_decl_2 (tree var, tree name, tree type, omp_context *ctx) +{ + tree copy = copy_var_decl (var, name, type); + + DECL_CONTEXT (copy) = current_function_decl; TREE_CHAIN (copy) = ctx->block_vars; ctx->block_vars = copy; @@ -571,9 +859,9 @@ build_outer_var_ref (tree var, omp_context *ctx) x = build_outer_var_ref (x, ctx); x = build_fold_indirect_ref (x); } - else if (is_parallel_ctx (ctx)) + else if (is_taskreg_ctx (ctx)) { - bool by_ref = use_pointer_for_field (var, false); + bool by_ref = use_pointer_for_field (var, NULL); x = build_receiver_ref (var, by_ref, ctx); } else if (ctx->outer) @@ -596,7 +884,7 @@ build_outer_var_ref (tree var, omp_context *ctx) static tree build_sender_ref (tree var, omp_context *ctx) { - tree field = lookup_field (var, ctx); + tree field = lookup_sfield (var, ctx); return build3 (COMPONENT_REF, TREE_TYPE (field), ctx->sender_decl, field, NULL); } @@ -604,15 +892,20 @@ build_sender_ref (tree var, omp_context *ctx) /* Add a new field for VAR inside the structure CTX->SENDER_DECL. */ static void -install_var_field (tree var, bool by_ref, omp_context *ctx) +install_var_field (tree var, bool by_ref, int mask, omp_context *ctx) { - tree field, type; + tree field, type, sfield = NULL_TREE; - gcc_assert (!splay_tree_lookup (ctx->field_map, (splay_tree_key) var)); + gcc_assert ((mask & 1) == 0 + || !splay_tree_lookup (ctx->field_map, (splay_tree_key) var)); + gcc_assert ((mask & 2) == 0 || !ctx->sfield_map + || !splay_tree_lookup (ctx->sfield_map, (splay_tree_key) var)); type = TREE_TYPE (var); if (by_ref) type = build_pointer_type (type); + else if ((mask & 3) == 1 && is_reference (var)) + type = TREE_TYPE (type); field = build_decl (FIELD_DECL, DECL_NAME (var), type); @@ -620,11 +913,57 @@ install_var_field (tree var, bool by_ref, omp_context *ctx) side effect of making dwarf2out ignore this member, so for helpful debugging we clear it later in delete_omp_context. */ DECL_ABSTRACT_ORIGIN (field) = var; + if (type == TREE_TYPE (var)) + { + DECL_ALIGN (field) = DECL_ALIGN (var); + DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var); + TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var); + } + else + DECL_ALIGN (field) = TYPE_ALIGN (type); + + if ((mask & 3) == 3) + { + insert_field_into_struct (ctx->record_type, field); + if (ctx->srecord_type) + { + sfield = build_decl (FIELD_DECL, DECL_NAME (var), type); + DECL_ABSTRACT_ORIGIN (sfield) = var; + DECL_ALIGN (sfield) = DECL_ALIGN (field); + DECL_USER_ALIGN (sfield) = DECL_USER_ALIGN (field); + TREE_THIS_VOLATILE (sfield) = TREE_THIS_VOLATILE (field); + insert_field_into_struct (ctx->srecord_type, sfield); + } + } + else + { + if (ctx->srecord_type == NULL_TREE) + { + tree t; - insert_field_into_struct (ctx->record_type, field); + ctx->srecord_type = lang_hooks.types.make_type (RECORD_TYPE); + ctx->sfield_map = splay_tree_new (splay_tree_compare_pointers, 0, 0); + for (t = TYPE_FIELDS (ctx->record_type); t ; t = TREE_CHAIN (t)) + { + sfield = build_decl (FIELD_DECL, DECL_NAME (t), TREE_TYPE (t)); + DECL_ABSTRACT_ORIGIN (sfield) = DECL_ABSTRACT_ORIGIN (t); + insert_field_into_struct (ctx->srecord_type, sfield); + splay_tree_insert (ctx->sfield_map, + (splay_tree_key) DECL_ABSTRACT_ORIGIN (t), + (splay_tree_value) sfield); + } + } + sfield = field; + insert_field_into_struct ((mask & 1) ? ctx->record_type + : ctx->srecord_type, field); + } - splay_tree_insert (ctx->field_map, (splay_tree_key) var, - (splay_tree_value) field); + if (mask & 1) + splay_tree_insert (ctx->field_map, (splay_tree_key) var, + (splay_tree_value) field); + if ((mask & 2) && ctx->sfield_map) + splay_tree_insert (ctx->sfield_map, (splay_tree_key) var, + (splay_tree_value) sfield); } static tree @@ -689,7 +1028,7 @@ omp_copy_decl (tree var, copy_body_data *cb) return new_var; } - while (!is_parallel_ctx (ctx)) + while (!is_taskreg_ctx (ctx)) { ctx = ctx->outer; if (ctx == NULL) @@ -758,7 +1097,7 @@ debug_all_omp_regions (void) struct omp_region * new_omp_region (basic_block bb, enum tree_code type, struct omp_region *parent) { - struct omp_region *region = xcalloc (1, sizeof (*region)); + struct omp_region *region = XCNEW (struct omp_region); region->outer = parent; region->entry = bb; @@ -844,11 +1183,42 @@ new_omp_context (tree stmt, omp_context *outer_ctx) ctx->depth = 1; } - ctx->cb.decl_map = splay_tree_new (splay_tree_compare_pointers, 0, 0); + ctx->cb.decl_map = pointer_map_create (); return ctx; } +static void maybe_catch_exception (tree *stmt_p); + +/* Finalize task copyfn. */ + +static void +finalize_task_copyfn (tree task_stmt) +{ + struct function *child_cfun; + tree child_fn, old_fn; + + child_fn = OMP_TASK_COPYFN (task_stmt); + if (child_fn == NULL_TREE) + return; + + child_cfun = DECL_STRUCT_FUNCTION (child_fn); + + /* Inform the callgraph about the new function. */ + DECL_STRUCT_FUNCTION (child_fn)->curr_properties + = cfun->curr_properties; + + old_fn = current_function_decl; + push_cfun (child_cfun); + current_function_decl = child_fn; + gimplify_body (&DECL_SAVED_TREE (child_fn), child_fn, false); + maybe_catch_exception (&BIND_EXPR_BODY (DECL_SAVED_TREE (child_fn))); + pop_cfun (); + current_function_decl = old_fn; + + cgraph_add_new_function (child_fn, false); +} + /* Destroy a omp_context data structures. Called through the splay tree value delete callback. */ @@ -857,10 +1227,12 @@ delete_omp_context (splay_tree_value value) { omp_context *ctx = (omp_context *) value; - splay_tree_delete (ctx->cb.decl_map); + pointer_map_destroy (ctx->cb.decl_map); if (ctx->field_map) splay_tree_delete (ctx->field_map); + if (ctx->sfield_map) + splay_tree_delete (ctx->sfield_map); /* We hijacked DECL_ABSTRACT_ORIGIN earlier. We need to clear it before it produces corrupt debug information. */ @@ -870,6 +1242,15 @@ delete_omp_context (splay_tree_value value) for (t = TYPE_FIELDS (ctx->record_type); t ; t = TREE_CHAIN (t)) DECL_ABSTRACT_ORIGIN (t) = NULL; } + if (ctx->srecord_type) + { + tree t; + for (t = TYPE_FIELDS (ctx->srecord_type); t ; t = TREE_CHAIN (t)) + DECL_ABSTRACT_ORIGIN (t) = NULL; + } + + if (is_task_ctx (ctx)) + finalize_task_copyfn (ctx->stmt); XDELETE (ctx); } @@ -904,6 +1285,9 @@ fixup_child_record_type (omp_context *ctx) DECL_CONTEXT (new_f) = type; TREE_TYPE (new_f) = remap_type (TREE_TYPE (f), &ctx->cb); TREE_CHAIN (new_f) = new_fields; + walk_tree (&DECL_SIZE (new_f), copy_body_r, &ctx->cb, NULL); + walk_tree (&DECL_SIZE_UNIT (new_f), copy_body_r, &ctx->cb, NULL); + walk_tree (&DECL_FIELD_OFFSET (new_f), copy_body_r, &ctx->cb, NULL); new_fields = new_f; /* Arrange to be able to look up the receiver field @@ -935,25 +1319,28 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) { case OMP_CLAUSE_PRIVATE: decl = OMP_CLAUSE_DECL (c); - if (!is_variable_sized (decl)) + if (OMP_CLAUSE_PRIVATE_OUTER_REF (c)) + goto do_private; + else if (!is_variable_sized (decl)) install_var_local (decl, ctx); break; case OMP_CLAUSE_SHARED: - gcc_assert (is_parallel_ctx (ctx)); + gcc_assert (is_taskreg_ctx (ctx)); decl = OMP_CLAUSE_DECL (c); - gcc_assert (!is_variable_sized (decl)); - by_ref = use_pointer_for_field (decl, true); + gcc_assert (!COMPLETE_TYPE_P (TREE_TYPE (decl)) + || !is_variable_sized (decl)); /* Global variables don't need to be copied, the receiver side will use them directly. */ if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx))) break; + by_ref = use_pointer_for_field (decl, ctx); if (! TREE_READONLY (decl) || TREE_ADDRESSABLE (decl) || by_ref || is_reference (decl)) { - install_var_field (decl, by_ref, ctx); + install_var_field (decl, by_ref, 3, ctx); install_var_local (decl, ctx); break; } @@ -973,13 +1360,26 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) decl = OMP_CLAUSE_DECL (c); do_private: if (is_variable_sized (decl)) - break; - else if (is_parallel_ctx (ctx) - && ! is_global_var (maybe_lookup_decl_in_outer_ctx (decl, - ctx))) { - by_ref = use_pointer_for_field (decl, false); - install_var_field (decl, by_ref, ctx); + if (is_task_ctx (ctx)) + install_var_field (decl, false, 1, ctx); + break; + } + else if (is_taskreg_ctx (ctx)) + { + bool global + = is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)); + by_ref = use_pointer_for_field (decl, NULL); + + if (is_task_ctx (ctx) + && (global || by_ref || is_reference (decl))) + { + install_var_field (decl, false, 1, ctx); + if (!global) + install_var_field (decl, by_ref, 2, ctx); + } + else if (!global) + install_var_field (decl, by_ref, 3, ctx); } install_var_local (decl, ctx); break; @@ -991,8 +1391,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_COPYIN: decl = OMP_CLAUSE_DECL (c); - by_ref = use_pointer_for_field (decl, false); - install_var_field (decl, by_ref, ctx); + by_ref = use_pointer_for_field (decl, NULL); + install_var_field (decl, by_ref, 3, ctx); break; case OMP_CLAUSE_DEFAULT: @@ -1008,6 +1408,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_NOWAIT: case OMP_CLAUSE_ORDERED: + case OMP_CLAUSE_COLLAPSE: + case OMP_CLAUSE_UNTIED: break; default: @@ -1022,6 +1424,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_LASTPRIVATE: /* Let the corresponding firstprivate clause create the variable. */ + if (OMP_CLAUSE_LASTPRIVATE_STMT (c)) + scan_array_reductions = true; if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c)) break; /* FALLTHRU */ @@ -1054,6 +1458,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_SCHEDULE: case OMP_CLAUSE_NOWAIT: case OMP_CLAUSE_ORDERED: + case OMP_CLAUSE_COLLAPSE: + case OMP_CLAUSE_UNTIED: break; default: @@ -1069,6 +1475,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) scan_omp (&OMP_CLAUSE_REDUCTION_INIT (c), ctx); scan_omp (&OMP_CLAUSE_REDUCTION_MERGE (c), ctx); } + else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE + && OMP_CLAUSE_LASTPRIVATE_STMT (c)) + scan_omp (&OMP_CLAUSE_LASTPRIVATE_STMT (c), ctx); } /* Create a new name for omp child function. Returns an identifier. */ @@ -1076,15 +1485,17 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) static GTY(()) unsigned int tmp_ompfn_id_num; static tree -create_omp_child_function_name (void) +create_omp_child_function_name (bool task_copy) { tree name = DECL_ASSEMBLER_NAME (current_function_decl); size_t len = IDENTIFIER_LENGTH (name); char *tmp_name, *prefix; + const char *suffix; - prefix = alloca (len + sizeof ("_omp_fn")); + suffix = task_copy ? "_omp_cpyfn" : "_omp_fn"; + prefix = XALLOCAVEC (char, len + strlen (suffix) + 1); memcpy (prefix, IDENTIFIER_POINTER (name), len); - strcpy (prefix + len, "_omp_fn"); + strcpy (prefix + len, suffix); #ifndef NO_DOT_IN_LABEL prefix[len] = '.'; #elif !defined NO_DOLLAR_IN_LABEL @@ -1098,17 +1509,24 @@ create_omp_child_function_name (void) yet, just the bare decl. */ static void -create_omp_child_function (omp_context *ctx) +create_omp_child_function (omp_context *ctx, bool task_copy) { tree decl, type, name, t; - name = create_omp_child_function_name (); - type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE); + name = create_omp_child_function_name (task_copy); + if (task_copy) + type = build_function_type_list (void_type_node, ptr_type_node, + ptr_type_node, NULL_TREE); + else + type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE); decl = build_decl (FUNCTION_DECL, name, type); decl = lang_hooks.decls.pushdecl (decl); - ctx->cb.dst_fn = decl; + if (!task_copy) + ctx->cb.dst_fn = decl; + else + OMP_TASK_COPYFN (ctx->stmt) = decl; TREE_STATIC (decl) = 1; TREE_USED (decl) = 1; @@ -1131,15 +1549,27 @@ create_omp_child_function (omp_context *ctx) DECL_CONTEXT (t) = current_function_decl; TREE_USED (t) = 1; DECL_ARGUMENTS (decl) = t; - ctx->receiver_decl = t; + if (!task_copy) + ctx->receiver_decl = t; + else + { + t = build_decl (PARM_DECL, get_identifier (".omp_data_o"), + ptr_type_node); + DECL_ARTIFICIAL (t) = 1; + DECL_ARG_TYPE (t) = ptr_type_node; + DECL_CONTEXT (t) = current_function_decl; + TREE_USED (t) = 1; + TREE_CHAIN (t) = DECL_ARGUMENTS (decl); + DECL_ARGUMENTS (decl) = t; + } /* Allocate memory for the function structure. The call to allocate_struct_function clobbers CFUN, so we need to restore it afterward. */ - allocate_struct_function (decl); + push_struct_function (decl); DECL_SOURCE_LOCATION (decl) = EXPR_LOCATION (ctx->stmt); cfun->function_end_locus = EXPR_LOCATION (ctx->stmt); - cfun = ctx->cb.src_cfun; + pop_cfun (); } @@ -1162,7 +1592,7 @@ scan_omp_parallel (tree *stmt_p, omp_context *outer_ctx) } ctx = new_omp_context (*stmt_p, outer_ctx); - if (parallel_nesting_level > 1) + if (taskreg_nesting_level > 1) ctx->is_nested = true; ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0); ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED; @@ -1170,7 +1600,7 @@ scan_omp_parallel (tree *stmt_p, omp_context *outer_ctx) name = create_tmp_var_name (".omp_data_s"); name = build_decl (TYPE_DECL, name, ctx->record_type); TYPE_NAME (ctx->record_type) = name; - create_omp_child_function (ctx); + create_omp_child_function (ctx, false); OMP_PARALLEL_FN (*stmt_p) = ctx->cb.dst_fn; scan_sharing_clauses (OMP_PARALLEL_CLAUSES (*stmt_p), ctx); @@ -1185,6 +1615,84 @@ scan_omp_parallel (tree *stmt_p, omp_context *outer_ctx) } } +/* Scan an OpenMP task directive. */ + +static void +scan_omp_task (tree *stmt_p, omp_context *outer_ctx) +{ + omp_context *ctx; + tree name; + + /* Ignore task directives with empty bodies. */ + if (optimize > 0 + && empty_body_p (OMP_TASK_BODY (*stmt_p))) + { + *stmt_p = build_empty_stmt (); + return; + } + + ctx = new_omp_context (*stmt_p, outer_ctx); + if (taskreg_nesting_level > 1) + ctx->is_nested = true; + ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0); + ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED; + ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE); + name = create_tmp_var_name (".omp_data_s"); + name = build_decl (TYPE_DECL, name, ctx->record_type); + TYPE_NAME (ctx->record_type) = name; + create_omp_child_function (ctx, false); + OMP_TASK_FN (*stmt_p) = ctx->cb.dst_fn; + + scan_sharing_clauses (OMP_TASK_CLAUSES (*stmt_p), ctx); + + if (ctx->srecord_type) + { + name = create_tmp_var_name (".omp_data_a"); + name = build_decl (TYPE_DECL, name, ctx->srecord_type); + TYPE_NAME (ctx->srecord_type) = name; + create_omp_child_function (ctx, true); + } + + scan_omp (&OMP_TASK_BODY (*stmt_p), ctx); + + if (TYPE_FIELDS (ctx->record_type) == NULL) + { + ctx->record_type = ctx->receiver_decl = NULL; + OMP_TASK_ARG_SIZE (*stmt_p) + = build_int_cst (long_integer_type_node, 0); + OMP_TASK_ARG_ALIGN (*stmt_p) + = build_int_cst (long_integer_type_node, 1); + } + else + { + tree *p, vla_fields = NULL_TREE, *q = &vla_fields; + /* Move VLA fields to the end. */ + p = &TYPE_FIELDS (ctx->record_type); + while (*p) + if (!TYPE_SIZE_UNIT (TREE_TYPE (*p)) + || ! TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (*p)))) + { + *q = *p; + *p = TREE_CHAIN (*p); + TREE_CHAIN (*q) = NULL_TREE; + q = &TREE_CHAIN (*q); + } + else + p = &TREE_CHAIN (*p); + *p = vla_fields; + layout_type (ctx->record_type); + fixup_child_record_type (ctx); + if (ctx->srecord_type) + layout_type (ctx->srecord_type); + OMP_TASK_ARG_SIZE (*stmt_p) + = fold_convert (long_integer_type_node, + TYPE_SIZE_UNIT (ctx->record_type)); + OMP_TASK_ARG_ALIGN (*stmt_p) + = build_int_cst (long_integer_type_node, + TYPE_ALIGN_UNIT (ctx->record_type)); + } +} + /* Scan an OpenMP loop directive. */ @@ -1193,6 +1701,7 @@ scan_omp_for (tree *stmt_p, omp_context *outer_ctx) { omp_context *ctx; tree stmt; + int i; stmt = *stmt_p; ctx = new_omp_context (stmt, outer_ctx); @@ -1200,9 +1709,12 @@ scan_omp_for (tree *stmt_p, omp_context *outer_ctx) scan_sharing_clauses (OMP_FOR_CLAUSES (stmt), ctx); scan_omp (&OMP_FOR_PRE_BODY (stmt), ctx); - scan_omp (&OMP_FOR_INIT (stmt), ctx); - scan_omp (&OMP_FOR_COND (stmt), ctx); - scan_omp (&OMP_FOR_INCR (stmt), ctx); + for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (stmt)); i++) + { + scan_omp (&TREE_VEC_ELT (OMP_FOR_INIT (stmt), i), ctx); + scan_omp (&TREE_VEC_ELT (OMP_FOR_COND (stmt), i), ctx); + scan_omp (&TREE_VEC_ELT (OMP_FOR_INCR (stmt), i), ctx); + } scan_omp (&OMP_FOR_BODY (stmt), ctx); } @@ -1255,6 +1767,7 @@ check_omp_nesting_restrictions (tree t, omp_context *ctx) case OMP_FOR: case OMP_SECTIONS: case OMP_SINGLE: + case CALL_EXPR: for (; ctx != NULL; ctx = ctx->outer) switch (TREE_CODE (ctx->stmt)) { @@ -1263,8 +1776,17 @@ check_omp_nesting_restrictions (tree t, omp_context *ctx) case OMP_SINGLE: case OMP_ORDERED: case OMP_MASTER: + case OMP_TASK: + if (TREE_CODE (t) == CALL_EXPR) + { + warning (0, "barrier region may not be closely nested inside " + "of work-sharing, critical, ordered, master or " + "explicit task region"); + return; + } warning (0, "work-sharing region may not be closely nested inside " - "of work-sharing, critical, ordered or master region"); + "of work-sharing, critical, ordered, master or explicit " + "task region"); return; case OMP_PARALLEL: return; @@ -1279,8 +1801,9 @@ check_omp_nesting_restrictions (tree t, omp_context *ctx) case OMP_FOR: case OMP_SECTIONS: case OMP_SINGLE: + case OMP_TASK: warning (0, "master region may not be closely nested inside " - "of work-sharing region"); + "of work-sharing or explicit task region"); return; case OMP_PARALLEL: return; @@ -1293,8 +1816,9 @@ check_omp_nesting_restrictions (tree t, omp_context *ctx) switch (TREE_CODE (ctx->stmt)) { case OMP_CRITICAL: + case OMP_TASK: warning (0, "ordered region may not be closely nested inside " - "of critical region"); + "of critical or explicit task region"); return; case OMP_FOR: if (find_omp_clause (OMP_CLAUSES (ctx->stmt), @@ -1329,24 +1853,40 @@ check_omp_nesting_restrictions (tree t, omp_context *ctx) static tree scan_omp_1 (tree *tp, int *walk_subtrees, void *data) { - struct walk_stmt_info *wi = data; - omp_context *ctx = wi->info; + struct walk_stmt_info *wi = (struct walk_stmt_info *) data; + omp_context *ctx = (omp_context *) wi->info; tree t = *tp; if (EXPR_HAS_LOCATION (t)) input_location = EXPR_LOCATION (t); /* Check the OpenMP nesting restrictions. */ - if (OMP_DIRECTIVE_P (t) && ctx != NULL) - check_omp_nesting_restrictions (t, ctx); + if (ctx != NULL) + { + if (OMP_DIRECTIVE_P (t)) + check_omp_nesting_restrictions (t, ctx); + else if (TREE_CODE (t) == CALL_EXPR) + { + tree fndecl = get_callee_fndecl (t); + if (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL + && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_GOMP_BARRIER) + check_omp_nesting_restrictions (t, ctx); + } + } *walk_subtrees = 0; switch (TREE_CODE (t)) { case OMP_PARALLEL: - parallel_nesting_level++; + taskreg_nesting_level++; scan_omp_parallel (tp, ctx); - parallel_nesting_level--; + taskreg_nesting_level--; + break; + + case OMP_TASK: + taskreg_nesting_level++; + scan_omp_task (tp, ctx); + taskreg_nesting_level--; break; case OMP_FOR: @@ -1424,11 +1964,10 @@ scan_omp (tree *stmt_p, omp_context *ctx) /* Build a call to GOMP_barrier. */ -static void -build_omp_barrier (tree *stmt_list) +static tree +build_omp_barrier (void) { - tree t = build_call_expr (built_in_decls[BUILT_IN_GOMP_BARRIER], 0); - gimplify_and_add (t, stmt_list); + return build_call_expr (built_in_decls[BUILT_IN_GOMP_BARRIER], 0); } /* If a context was created for STMT when it was scanned, return it. */ @@ -1497,14 +2036,12 @@ lookup_decl_in_outer_ctx (tree decl, omp_context *ctx) tree t; omp_context *up; - gcc_assert (ctx->is_nested); - for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer) t = maybe_lookup_decl (decl, up); - gcc_assert (t); + gcc_assert (!ctx->is_nested || t || is_global_var (decl)); - return t; + return t ? t : decl; } @@ -1517,9 +2054,8 @@ maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx) tree t = NULL; omp_context *up; - if (ctx->is_nested) - for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer) - t = maybe_lookup_decl (decl, up); + for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer) + t = maybe_lookup_decl (decl, up); return t ? t : decl; } @@ -1667,16 +2203,18 @@ lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, if (pass == 0) continue; - ptr = DECL_VALUE_EXPR (new_var); - gcc_assert (TREE_CODE (ptr) == INDIRECT_REF); - ptr = TREE_OPERAND (ptr, 0); - gcc_assert (DECL_P (ptr)); - - x = TYPE_SIZE_UNIT (TREE_TYPE (new_var)); - x = build_call_expr (built_in_decls[BUILT_IN_ALLOCA], 1, x); - x = fold_convert (TREE_TYPE (ptr), x); - x = build_gimple_modify_stmt (ptr, x); - gimplify_and_add (x, ilist); + if (c_kind != OMP_CLAUSE_FIRSTPRIVATE || !is_task_ctx (ctx)) + { + ptr = DECL_VALUE_EXPR (new_var); + gcc_assert (TREE_CODE (ptr) == INDIRECT_REF); + ptr = TREE_OPERAND (ptr, 0); + gcc_assert (DECL_P (ptr)); + x = TYPE_SIZE_UNIT (TREE_TYPE (new_var)); + x = build_call_expr (built_in_decls[BUILT_IN_ALLOCA], 1, x); + x = fold_convert (TREE_TYPE (ptr), x); + x = build_gimple_modify_stmt (ptr, x); + gimplify_and_add (x, ilist); + } } else if (is_reference (var)) { @@ -1692,7 +2230,12 @@ lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, continue; x = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_var))); - if (TREE_CONSTANT (x)) + if (c_kind == OMP_CLAUSE_FIRSTPRIVATE && is_task_ctx (ctx)) + { + x = build_receiver_ref (var, false, ctx); + x = build_fold_addr_expr (x); + } + else if (TREE_CONSTANT (x)) { const char *name = NULL; if (DECL_NAME (var)) @@ -1732,7 +2275,7 @@ lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, /* Set up the DECL_VALUE_EXPR for shared variables now. This needs to be delayed until after fixup_child_record_type so that we get the correct type during the dereference. */ - by_ref = use_pointer_for_field (var, true); + by_ref = use_pointer_for_field (var, ctx); x = build_receiver_ref (var, by_ref, ctx); SET_DECL_VALUE_EXPR (new_var, x); DECL_HAS_VALUE_EXPR_P (new_var) = 1; @@ -1752,7 +2295,18 @@ lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, /* FALLTHRU */ case OMP_CLAUSE_PRIVATE: - x = lang_hooks.decls.omp_clause_default_ctor (c, new_var); + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_PRIVATE) + x = build_outer_var_ref (var, ctx); + else if (OMP_CLAUSE_PRIVATE_OUTER_REF (c)) + { + if (is_task_ctx (ctx)) + x = build_receiver_ref (var, false, ctx); + else + x = build_outer_var_ref (var, ctx); + } + else + x = NULL; + x = lang_hooks.decls.omp_clause_default_ctor (c, new_var, x); if (x) gimplify_and_add (x, ilist); /* FALLTHRU */ @@ -1768,6 +2322,20 @@ lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, break; case OMP_CLAUSE_FIRSTPRIVATE: + if (is_task_ctx (ctx)) + { + if (is_reference (var) || is_variable_sized (var)) + goto do_dtor; + else if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, + ctx)) + || use_pointer_for_field (var, NULL)) + { + x = build_receiver_ref (var, false, ctx); + SET_DECL_VALUE_EXPR (new_var, x); + DECL_HAS_VALUE_EXPR_P (new_var) = 1; + goto do_dtor; + } + } x = build_outer_var_ref (var, ctx); x = lang_hooks.decls.omp_clause_copy_ctor (c, new_var, x); gimplify_and_add (x, ilist); @@ -1775,7 +2343,7 @@ lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, break; case OMP_CLAUSE_COPYIN: - by_ref = use_pointer_for_field (var, false); + by_ref = use_pointer_for_field (var, NULL); x = build_receiver_ref (var, by_ref, ctx); x = lang_hooks.decls.omp_clause_assign_op (c, new_var, x); append_to_statement_list (x, ©in_seq); @@ -1785,8 +2353,16 @@ lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, case OMP_CLAUSE_REDUCTION: if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) { + tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c); + x = build_outer_var_ref (var, ctx); + + if (is_reference (var)) + x = build_fold_addr_expr (x); + SET_DECL_VALUE_EXPR (placeholder, x); + DECL_HAS_VALUE_EXPR_P (placeholder) = 1; gimplify_and_add (OMP_CLAUSE_REDUCTION_INIT (c), ilist); OMP_CLAUSE_REDUCTION_INIT (c) = NULL; + DECL_HAS_VALUE_EXPR_P (placeholder) = 0; } else { @@ -1821,7 +2397,7 @@ lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, lastprivate clauses we need to ensure the lastprivate copying happens after firstprivate copying in all threads. */ if (copyin_by_ref || lastprivate_firstprivate) - build_omp_barrier (ilist); + gimplify_and_add (build_omp_barrier (), ilist); } @@ -1831,9 +2407,10 @@ lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, static void lower_lastprivate_clauses (tree clauses, tree predicate, tree *stmt_list, - omp_context *ctx) + omp_context *ctx) { tree sub_list, x, c; + bool par_clauses = false; /* Early exit if there are no lastprivate clauses. */ clauses = find_omp_clause (clauses, OMP_CLAUSE_LASTPRIVATE); @@ -1853,25 +2430,47 @@ lower_lastprivate_clauses (tree clauses, tree predicate, tree *stmt_list, OMP_CLAUSE_LASTPRIVATE); if (clauses == NULL) return; + par_clauses = true; } sub_list = alloc_stmt_list (); - for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c)) + for (c = clauses; c ;) { tree var, new_var; - if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_LASTPRIVATE) - continue; + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE) + { + var = OMP_CLAUSE_DECL (c); + new_var = lookup_decl (var, ctx); - var = OMP_CLAUSE_DECL (c); - new_var = lookup_decl (var, ctx); + if (OMP_CLAUSE_LASTPRIVATE_STMT (c)) + gimplify_and_add (OMP_CLAUSE_LASTPRIVATE_STMT (c), &sub_list); + OMP_CLAUSE_LASTPRIVATE_STMT (c) = NULL; - x = build_outer_var_ref (var, ctx); - if (is_reference (var)) - new_var = build_fold_indirect_ref (new_var); - x = lang_hooks.decls.omp_clause_assign_op (c, x, new_var); - append_to_statement_list (x, &sub_list); + x = build_outer_var_ref (var, ctx); + if (is_reference (var)) + new_var = build_fold_indirect_ref (new_var); + x = lang_hooks.decls.omp_clause_assign_op (c, x, new_var); + append_to_statement_list (x, &sub_list); + } + c = OMP_CLAUSE_CHAIN (c); + if (c == NULL && !par_clauses) + { + /* If this was a workshare clause, see if it had been combined + with its parallel. In that case, continue looking for the + clauses also on the parallel statement itself. */ + if (is_parallel_ctx (ctx)) + break; + + ctx = ctx->outer; + if (ctx == NULL || !is_parallel_ctx (ctx)) + break; + + c = find_omp_clause (OMP_PARALLEL_CLAUSES (ctx->stmt), + OMP_CLAUSE_LASTPRIVATE); + par_clauses = true; + } } if (predicate) @@ -1988,10 +2587,10 @@ lower_copyprivate_clauses (tree clauses, tree *slist, tree *rlist, continue; var = OMP_CLAUSE_DECL (c); - by_ref = use_pointer_for_field (var, false); + by_ref = use_pointer_for_field (var, NULL); ref = build_sender_ref (var, ctx); - x = (ctx->is_nested) ? lookup_decl_in_outer_ctx (var, ctx) : var; + x = lookup_decl_in_outer_ctx (var, ctx); x = by_ref ? build_fold_addr_expr (x) : x; x = build_gimple_modify_stmt (ref, x); gimplify_and_add (x, slist); @@ -2023,6 +2622,10 @@ lower_send_clauses (tree clauses, tree *ilist, tree *olist, omp_context *ctx) switch (OMP_CLAUSE_CODE (c)) { + case OMP_CLAUSE_PRIVATE: + if (OMP_CLAUSE_PRIVATE_OUTER_REF (c)) + break; + continue; case OMP_CLAUSE_FIRSTPRIVATE: case OMP_CLAUSE_COPYIN: case OMP_CLAUSE_LASTPRIVATE: @@ -2032,19 +2635,19 @@ lower_send_clauses (tree clauses, tree *ilist, tree *olist, omp_context *ctx) continue; } - var = val = OMP_CLAUSE_DECL (c); - if (ctx->is_nested) - var = lookup_decl_in_outer_ctx (val, ctx); + val = OMP_CLAUSE_DECL (c); + var = lookup_decl_in_outer_ctx (val, ctx); if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYIN && is_global_var (var)) continue; if (is_variable_sized (val)) continue; - by_ref = use_pointer_for_field (val, false); + by_ref = use_pointer_for_field (val, NULL); switch (OMP_CLAUSE_CODE (c)) { + case OMP_CLAUSE_PRIVATE: case OMP_CLAUSE_FIRSTPRIVATE: case OMP_CLAUSE_COPYIN: do_in = true; @@ -2058,7 +2661,11 @@ lower_send_clauses (tree clauses, tree *ilist, tree *olist, omp_context *ctx) do_in = true; } else - do_out = true; + { + do_out = true; + if (lang_hooks.decls.omp_private_outer_ref (val)) + do_in = true; + } break; case OMP_CLAUSE_REDUCTION: @@ -2076,6 +2683,8 @@ lower_send_clauses (tree clauses, tree *ilist, tree *olist, omp_context *ctx) x = by_ref ? build_fold_addr_expr (var) : var; x = build_gimple_modify_stmt (ref, x); gimplify_and_add (x, ilist); + if (is_task_ctx (ctx)) + DECL_ABSTRACT_ORIGIN (TREE_OPERAND (ref, 1)) = NULL; } if (do_out) @@ -2094,27 +2703,25 @@ lower_send_clauses (tree clauses, tree *ilist, tree *olist, omp_context *ctx) static void lower_send_shared_vars (tree *ilist, tree *olist, omp_context *ctx) { - tree var, ovar, nvar, f, x; + tree var, ovar, nvar, f, x, record_type; if (ctx->record_type == NULL) return; - for (f = TYPE_FIELDS (ctx->record_type); f ; f = TREE_CHAIN (f)) + record_type = ctx->srecord_type ? ctx->srecord_type : ctx->record_type; + for (f = TYPE_FIELDS (record_type); f ; f = TREE_CHAIN (f)) { ovar = DECL_ABSTRACT_ORIGIN (f); nvar = maybe_lookup_decl (ovar, ctx); if (!nvar || !DECL_HAS_VALUE_EXPR_P (nvar)) continue; - var = ovar; - /* If CTX is a nested parallel directive. Find the immediately enclosing parallel or workshare construct that contains a mapping for OVAR. */ - if (ctx->is_nested) - var = lookup_decl_in_outer_ctx (ovar, ctx); + var = lookup_decl_in_outer_ctx (ovar, ctx); - if (use_pointer_for_field (ovar, true)) + if (use_pointer_for_field (ovar, ctx)) { x = build_sender_ref (ovar, ctx); var = build_fold_addr_expr (var); @@ -2127,9 +2734,12 @@ lower_send_shared_vars (tree *ilist, tree *olist, omp_context *ctx) x = build_gimple_modify_stmt (x, var); gimplify_and_add (x, ilist); - x = build_sender_ref (ovar, ctx); - x = build_gimple_modify_stmt (var, x); - gimplify_and_add (x, olist); + if (!TREE_READONLY (var)) + { + x = build_sender_ref (ovar, ctx); + x = build_gimple_modify_stmt (var, x); + gimplify_and_add (x, olist); + } } } } @@ -2145,12 +2755,11 @@ static void expand_parallel_call (struct omp_region *region, basic_block bb, tree entry_stmt, tree ws_args) { - tree t, t1, t2, val, cond, c, list, clauses; + tree t, t1, t2, val, cond, c, clauses; block_stmt_iterator si; int start_ix; clauses = OMP_PARALLEL_CLAUSES (entry_stmt); - push_gimplify_context (); /* Determine what flavor of GOMP_parallel_start we will be emitting. */ @@ -2160,8 +2769,11 @@ expand_parallel_call (struct omp_region *region, basic_block bb, switch (region->inner->type) { case OMP_FOR: + gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); start_ix = BUILT_IN_GOMP_PARALLEL_LOOP_STATIC_START - + region->inner->sched_kind; + + (region->inner->sched_kind + == OMP_CLAUSE_SCHEDULE_RUNTIME + ? 3 : region->inner->sched_kind); break; case OMP_SECTIONS: start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS_START; @@ -2196,15 +2808,28 @@ expand_parallel_call (struct omp_region *region, basic_block bb, cond = gimple_boolify (cond); if (integer_zerop (val)) - val = build2 (EQ_EXPR, unsigned_type_node, cond, - build_int_cst (TREE_TYPE (cond), 0)); + val = fold_build2 (EQ_EXPR, unsigned_type_node, cond, + build_int_cst (TREE_TYPE (cond), 0)); else { basic_block cond_bb, then_bb, else_bb; - edge e; - tree t, then_lab, else_lab, tmp; + edge e, e_then, e_else; + tree t, tmp_then, tmp_else, tmp_join, tmp_var; + + tmp_var = create_tmp_var (TREE_TYPE (val), NULL); + if (gimple_in_ssa_p (cfun)) + { + tmp_then = make_ssa_name (tmp_var, NULL_TREE); + tmp_else = make_ssa_name (tmp_var, NULL_TREE); + tmp_join = make_ssa_name (tmp_var, NULL_TREE); + } + else + { + tmp_then = tmp_var; + tmp_else = tmp_var; + tmp_join = tmp_var; + } - tmp = create_tmp_var (TREE_TYPE (val), NULL); e = split_block (bb, NULL); cond_bb = e->src; bb = e->dest; @@ -2212,45 +2837,50 @@ expand_parallel_call (struct omp_region *region, basic_block bb, then_bb = create_empty_bb (cond_bb); else_bb = create_empty_bb (then_bb); - then_lab = create_artificial_label (); - else_lab = create_artificial_label (); + set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); + set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); t = build3 (COND_EXPR, void_type_node, - cond, - build_and_jump (&then_lab), - build_and_jump (&else_lab)); + cond, NULL_TREE, NULL_TREE); si = bsi_start (cond_bb); bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); si = bsi_start (then_bb); - t = build1 (LABEL_EXPR, void_type_node, then_lab); - bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); - t = build_gimple_modify_stmt (tmp, val); + t = build_gimple_modify_stmt (tmp_then, val); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (tmp_then) = t; bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); si = bsi_start (else_bb); - t = build1 (LABEL_EXPR, void_type_node, else_lab); - bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); - t = build_gimple_modify_stmt (tmp, + t = build_gimple_modify_stmt (tmp_else, build_int_cst (unsigned_type_node, 1)); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (tmp_else) = t; bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); - make_edge (then_bb, bb, EDGE_FALLTHRU); - make_edge (else_bb, bb, EDGE_FALLTHRU); + e_then = make_edge (then_bb, bb, EDGE_FALLTHRU); + e_else = make_edge (else_bb, bb, EDGE_FALLTHRU); - val = tmp; + if (gimple_in_ssa_p (cfun)) + { + tree phi = create_phi_node (tmp_join, bb); + SSA_NAME_DEF_STMT (tmp_join) = phi; + add_phi_arg (phi, tmp_then, e_then); + add_phi_arg (phi, tmp_else, e_else); + } + + val = tmp_join; } - list = NULL_TREE; - val = get_formal_tmp_var (val, &list); si = bsi_start (bb); - bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + val = force_gimple_operand_bsi (&si, val, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); } - list = NULL_TREE; + si = bsi_last (bb); t = OMP_PARALLEL_DATA_ARG (entry_stmt); if (t == NULL) t1 = null_pointer_node; @@ -2268,7 +2898,8 @@ expand_parallel_call (struct omp_region *region, basic_block bb, else t = build_call_expr (built_in_decls[start_ix], 3, t2, t1, val); - gimplify_and_add (t, &list); + force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); t = OMP_PARALLEL_DATA_ARG (entry_stmt); if (t == NULL) @@ -2276,15 +2907,54 @@ expand_parallel_call (struct omp_region *region, basic_block bb, else t = build_fold_addr_expr (t); t = build_call_expr (OMP_PARALLEL_FN (entry_stmt), 1, t); - gimplify_and_add (t, &list); + force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); t = build_call_expr (built_in_decls[BUILT_IN_GOMP_PARALLEL_END], 0); - gimplify_and_add (t, &list); + force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); +} + + +/* Build the function call to GOMP_task to actually + generate the task operation. BB is the block where to insert the code. */ + +static void +expand_task_call (basic_block bb, tree entry_stmt) +{ + tree t, t1, t2, t3, flags, cond, c, clauses; + block_stmt_iterator si; + + clauses = OMP_TASK_CLAUSES (entry_stmt); + + c = find_omp_clause (clauses, OMP_CLAUSE_IF); + if (c) + cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (c)); + else + cond = boolean_true_node; + + c = find_omp_clause (clauses, OMP_CLAUSE_UNTIED); + flags = build_int_cst (unsigned_type_node, (c ? 1 : 0)); si = bsi_last (bb); - bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + t = OMP_TASK_DATA_ARG (entry_stmt); + if (t == NULL) + t2 = null_pointer_node; + else + t2 = build_fold_addr_expr (t); + t1 = build_fold_addr_expr (OMP_TASK_FN (entry_stmt)); + t = OMP_TASK_COPYFN (entry_stmt); + if (t == NULL) + t3 = null_pointer_node; + else + t3 = build_fold_addr_expr (t); - pop_gimplify_context (NULL_TREE); + t = build_call_expr (built_in_decls[BUILT_IN_GOMP_TASK], 7, t1, t2, t3, + OMP_TASK_ARG_SIZE (entry_stmt), + OMP_TASK_ARG_ALIGN (entry_stmt), cond, flags); + + force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); } @@ -2397,22 +3067,90 @@ remove_exit_barriers (struct omp_region *region) } } -/* Expand the OpenMP parallel directive starting at REGION. */ +/* Optimize omp_get_thread_num () and omp_get_num_threads () + calls. These can't be declared as const functions, but + within one parallel body they are constant, so they can be + transformed there into __builtin_omp_get_{thread_num,num_threads} () + which are declared const. Similarly for task body, except + that in untied task omp_get_thread_num () can change at any task + scheduling point. */ static void -expand_omp_parallel (struct omp_region *region) +optimize_omp_library_calls (tree entry_stmt) +{ + basic_block bb; + block_stmt_iterator bsi; + tree thr_num_id + = DECL_ASSEMBLER_NAME (built_in_decls [BUILT_IN_OMP_GET_THREAD_NUM]); + tree num_thr_id + = DECL_ASSEMBLER_NAME (built_in_decls [BUILT_IN_OMP_GET_NUM_THREADS]); + bool untied_task = (TREE_CODE (entry_stmt) == OMP_TASK + && find_omp_clause (OMP_TASK_CLAUSES (entry_stmt), + OMP_CLAUSE_UNTIED) != NULL); + + FOR_EACH_BB (bb) + for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) + { + tree stmt = bsi_stmt (bsi); + tree call = get_call_expr_in (stmt); + tree decl; + + if (call + && (decl = get_callee_fndecl (call)) + && DECL_EXTERNAL (decl) + && TREE_PUBLIC (decl) + && DECL_INITIAL (decl) == NULL) + { + tree built_in; + + if (DECL_NAME (decl) == thr_num_id) + { + /* In #pragma omp task untied omp_get_thread_num () can change + during the execution of the task region. */ + if (untied_task) + continue; + built_in = built_in_decls [BUILT_IN_OMP_GET_THREAD_NUM]; + } + else if (DECL_NAME (decl) == num_thr_id) + built_in = built_in_decls [BUILT_IN_OMP_GET_NUM_THREADS]; + else + continue; + + if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in) + || call_expr_nargs (call) != 0) + continue; + + if (flag_exceptions && !TREE_NOTHROW (decl)) + continue; + + if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE + || TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (decl))) + != TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (built_in)))) + continue; + + CALL_EXPR_FN (call) = build_fold_addr_expr (built_in); + } + } +} + +/* Expand the OpenMP parallel or task directive starting at REGION. */ + +static void +expand_omp_taskreg (struct omp_region *region) { basic_block entry_bb, exit_bb, new_bb; - struct function *child_cfun, *saved_cfun; - tree child_fn, block, t, ws_args; + struct function *child_cfun; + tree child_fn, block, t, ws_args, *tp; block_stmt_iterator si; tree entry_stmt; edge e; entry_stmt = last_stmt (region->entry); - child_fn = OMP_PARALLEL_FN (entry_stmt); + child_fn = OMP_TASKREG_FN (entry_stmt); child_cfun = DECL_STRUCT_FUNCTION (child_fn); - saved_cfun = cfun; + /* If this function has been already instrumented, make sure + the child function isn't instrumented again. */ + child_cfun->after_tree_profile = cfun->after_tree_profile; entry_bb = region->entry; exit_bb = region->exit; @@ -2433,16 +3171,17 @@ expand_omp_parallel (struct omp_region *region) entry_succ_e = single_succ_edge (entry_bb); si = bsi_last (entry_bb); - gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_PARALLEL); + gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_PARALLEL + || TREE_CODE (bsi_stmt (si)) == OMP_TASK); bsi_remove (&si, true); new_bb = entry_bb; - remove_edge (entry_succ_e); if (exit_bb) { exit_succ_e = single_succ_edge (exit_bb); make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU); } + remove_edge_and_dominated_blocks (entry_succ_e); } else { @@ -2459,10 +3198,11 @@ expand_omp_parallel (struct omp_region *region) a function call that has been inlined, the original PARM_DECL .OMP_DATA_I may have been converted into a different local variable. In which case, we need to keep the assignment. */ - if (OMP_PARALLEL_DATA_ARG (entry_stmt)) + if (OMP_TASKREG_DATA_ARG (entry_stmt)) { basic_block entry_succ_bb = single_succ (entry_bb); block_stmt_iterator si; + tree parcopy_stmt = NULL_TREE, arg, narg; for (si = bsi_start (entry_succ_bb); ; bsi_next (&si)) { @@ -2477,51 +3217,57 @@ expand_omp_parallel (struct omp_region *region) STRIP_NOPS (arg); if (TREE_CODE (arg) == ADDR_EXPR && TREE_OPERAND (arg, 0) - == OMP_PARALLEL_DATA_ARG (entry_stmt)) + == OMP_TASKREG_DATA_ARG (entry_stmt)) { - if (GIMPLE_STMT_OPERAND (stmt, 0) - == DECL_ARGUMENTS (child_fn)) - bsi_remove (&si, true); - else - GIMPLE_STMT_OPERAND (stmt, 1) = DECL_ARGUMENTS (child_fn); + parcopy_stmt = stmt; break; } } + + gcc_assert (parcopy_stmt != NULL_TREE); + arg = DECL_ARGUMENTS (child_fn); + + if (!gimple_in_ssa_p (cfun)) + { + if (GIMPLE_STMT_OPERAND (parcopy_stmt, 0) == arg) + bsi_remove (&si, true); + else + GIMPLE_STMT_OPERAND (parcopy_stmt, 1) = arg; + } + else + { + /* If we are in ssa form, we must load the value from the default + definition of the argument. That should not be defined now, + since the argument is not used uninitialized. */ + gcc_assert (gimple_default_def (cfun, arg) == NULL); + narg = make_ssa_name (arg, build_empty_stmt ()); + set_default_def (arg, narg); + GIMPLE_STMT_OPERAND (parcopy_stmt, 1) = narg; + update_stmt (parcopy_stmt); + } } /* Declare local variables needed in CHILD_CFUN. */ block = DECL_INITIAL (child_fn); - BLOCK_VARS (block) = list2chain (child_cfun->unexpanded_var_list); - DECL_SAVED_TREE (child_fn) = single_succ (entry_bb)->stmt_list; - - /* Reset DECL_CONTEXT on locals and function arguments. */ - for (t = BLOCK_VARS (block); t; t = TREE_CHAIN (t)) - DECL_CONTEXT (t) = child_fn; + BLOCK_VARS (block) = list2chain (child_cfun->local_decls); + DECL_SAVED_TREE (child_fn) = bb_stmt_list (single_succ (entry_bb)); + TREE_USED (block) = 1; + /* Reset DECL_CONTEXT on function arguments. */ for (t = DECL_ARGUMENTS (child_fn); t; t = TREE_CHAIN (t)) DECL_CONTEXT (t) = child_fn; - /* Split ENTRY_BB at OMP_PARALLEL so that it can be moved to the - child function. */ + /* Split ENTRY_BB at OMP_PARALLEL or OMP_TASK, so that it can be + moved to the child function. */ si = bsi_last (entry_bb); t = bsi_stmt (si); - gcc_assert (t && TREE_CODE (t) == OMP_PARALLEL); + gcc_assert (t && (TREE_CODE (t) == OMP_PARALLEL + || TREE_CODE (t) == OMP_TASK)); bsi_remove (&si, true); e = split_block (entry_bb, t); entry_bb = e->dest; single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; - /* Move the parallel region into CHILD_CFUN. We need to reset - dominance information because the expansion of the inner - regions has invalidated it. */ - free_dominance_info (CDI_DOMINATORS); - new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb); - if (exit_bb) - single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; - DECL_STRUCT_FUNCTION (child_fn)->curr_properties - = cfun->curr_properties; - cgraph_add_new_function (child_fn, true); - /* Convert OMP_RETURN into a RETURN_EXPR. */ if (exit_bb) { @@ -2532,10 +3278,70 @@ expand_omp_parallel (struct omp_region *region) bsi_insert_after (&si, t, BSI_SAME_STMT); bsi_remove (&si, true); } - } + /* Move the parallel region into CHILD_CFUN. */ + + if (gimple_in_ssa_p (cfun)) + { + push_cfun (child_cfun); + init_tree_ssa (child_cfun); + init_ssa_operands (); + cfun->gimple_df->in_ssa_p = true; + pop_cfun (); + block = NULL_TREE; + } + else + block = TREE_BLOCK (entry_stmt); + + new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); + if (exit_bb) + single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; + + /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ + for (tp = &child_cfun->local_decls; *tp; ) + if (DECL_CONTEXT (TREE_VALUE (*tp)) != cfun->decl) + tp = &TREE_CHAIN (*tp); + else + *tp = TREE_CHAIN (*tp); + + /* Inform the callgraph about the new function. */ + DECL_STRUCT_FUNCTION (child_fn)->curr_properties + = cfun->curr_properties; + cgraph_add_new_function (child_fn, true); + + /* Fix the callgraph edges for child_cfun. Those for cfun will be + fixed in a following pass. */ + push_cfun (child_cfun); + if (optimize) + optimize_omp_library_calls (entry_stmt); + rebuild_cgraph_edges (); + + /* Some EH regions might become dead, see PR34608. If + pass_cleanup_cfg isn't the first pass to happen with the + new child, these dead EH edges might cause problems. + Clean them up now. */ + if (flag_exceptions) + { + basic_block bb; + tree save_current = current_function_decl; + bool changed = false; + + current_function_decl = child_fn; + FOR_EACH_BB (bb) + changed |= tree_purge_dead_eh_edges (bb); + if (changed) + cleanup_tree_cfg (); + current_function_decl = save_current; + } + pop_cfun (); + } + /* Emit a library call to launch the children threads. */ - expand_parallel_call (region, new_bb, entry_stmt, ws_args); + if (TREE_CODE (entry_stmt) == OMP_PARALLEL) + expand_parallel_call (region, new_bb, entry_stmt, ws_args); + else + expand_task_call (new_bb, entry_stmt); + update_ssa (TODO_update_ssa_only_virtuals); } @@ -2560,7 +3366,64 @@ expand_omp_parallel (struct omp_region *region) L3: If this is a combined omp parallel loop, instead of the call to - GOMP_loop_foo_start, we emit 'goto L3'. */ + GOMP_loop_foo_start, we call GOMP_loop_foo_next. + + For collapsed loops, given parameters: + collapse(3) + for (V1 = N11; V1 cond1 N12; V1 += STEP1) + for (V2 = N21; V2 cond2 N22; V2 += STEP2) + for (V3 = N31; V3 cond3 N32; V3 += STEP3) + BODY; + + we generate pseudocode + + if (cond3 is <) + adj = STEP3 - 1; + else + adj = STEP3 + 1; + count3 = (adj + N32 - N31) / STEP3; + if (cond2 is <) + adj = STEP2 - 1; + else + adj = STEP2 + 1; + count2 = (adj + N22 - N21) / STEP2; + if (cond1 is <) + adj = STEP1 - 1; + else + adj = STEP1 + 1; + count1 = (adj + N12 - N11) / STEP1; + count = count1 * count2 * count3; + more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0); + if (more) goto L0; else goto L3; + L0: + V = istart0; + T = V; + V3 = N31 + (T % count3) * STEP3; + T = T / count3; + V2 = N21 + (T % count2) * STEP2; + T = T / count2; + V1 = N11 + T * STEP1; + iend = iend0; + L1: + BODY; + V += 1; + if (V < iend) goto L10; else goto L2; + L10: + V3 += STEP3; + if (V3 cond3 N32) goto L1; else goto L11; + L11: + V3 = N31; + V2 += STEP2; + if (V2 cond2 N22) goto L1; else goto L12; + L12: + V2 = N21; + V1 += STEP1; + goto L1; + L2: + if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; + L3: + + */ static void expand_omp_for_generic (struct omp_region *region, @@ -2568,131 +3431,351 @@ expand_omp_for_generic (struct omp_region *region, enum built_in_function start_fn, enum built_in_function next_fn) { - tree l0, l1, l2 = NULL, l3 = NULL; - tree type, istart0, iend0, iend; - tree t, list; - basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb; + tree type, istart0, iend0, iend, phi; + tree t, vmain, vback, bias = NULL_TREE; + basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb; basic_block l2_bb = NULL, l3_bb = NULL; block_stmt_iterator si; bool in_combined_parallel = is_combined_parallel (region); - - type = TREE_TYPE (fd->v); - - istart0 = create_tmp_var (long_integer_type_node, ".istart0"); - iend0 = create_tmp_var (long_integer_type_node, ".iend0"); - iend = create_tmp_var (type, NULL); + bool broken_loop = region->cont == NULL; + edge e, ne; + tree *counts = NULL; + int i; + + gcc_assert (!broken_loop || !in_combined_parallel); + gcc_assert (fd->iter_type == long_integer_type_node + || !in_combined_parallel); + + type = TREE_TYPE (fd->loop.v); + istart0 = create_tmp_var (fd->iter_type, ".istart0"); + iend0 = create_tmp_var (fd->iter_type, ".iend0"); TREE_ADDRESSABLE (istart0) = 1; TREE_ADDRESSABLE (iend0) = 1; + if (gimple_in_ssa_p (cfun)) + { + add_referenced_var (istart0); + add_referenced_var (iend0); + } - gcc_assert ((region->cont != NULL) ^ (region->exit == NULL)); - - entry_bb = region->entry; - l0_bb = create_empty_bb (entry_bb); - l1_bb = single_succ (entry_bb); + /* See if we need to bias by LLONG_MIN. */ + if (fd->iter_type == long_long_unsigned_type_node + && TREE_CODE (type) == INTEGER_TYPE + && !TYPE_UNSIGNED (type)) + { + tree n1, n2; - l0 = tree_block_label (l0_bb); - l1 = tree_block_label (l1_bb); + if (fd->loop.cond_code == LT_EXPR) + { + n1 = fd->loop.n1; + n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); + } + else + { + n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); + n2 = fd->loop.n1; + } + if (TREE_CODE (n1) != INTEGER_CST + || TREE_CODE (n2) != INTEGER_CST + || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) + bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); + } + entry_bb = region->entry; cont_bb = region->cont; - exit_bb = region->exit; - if (cont_bb) + collapse_bb = NULL; + gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); + gcc_assert (broken_loop + || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); + l0_bb = split_edge (FALLTHRU_EDGE (entry_bb)); + l1_bb = single_succ (l0_bb); + if (!broken_loop) { l2_bb = create_empty_bb (cont_bb); - l3_bb = single_succ (cont_bb); - - l2 = tree_block_label (l2_bb); - l3 = tree_block_label (l3_bb); + gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb); + gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); } + else + l2_bb = NULL; + l3_bb = BRANCH_EDGE (entry_bb)->dest; + exit_bb = region->exit; si = bsi_last (entry_bb); + gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR); - if (!in_combined_parallel) + if (fd->collapse > 1) + { + /* collapsed loops need work for expansion in SSA form. */ + gcc_assert (!gimple_in_ssa_p (cfun)); + counts = (tree *) alloca (fd->collapse * sizeof (tree)); + for (i = 0; i < fd->collapse; i++) + { + tree itype = TREE_TYPE (fd->loops[i].v); + + if (POINTER_TYPE_P (itype)) + itype = lang_hooks.types.type_for_size (TYPE_PRECISION (itype), 0); + t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR + ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, + fold_convert (itype, fd->loops[i].step), t); + t = fold_build2 (PLUS_EXPR, itype, t, + fold_convert (itype, fd->loops[i].n2)); + t = fold_build2 (MINUS_EXPR, itype, t, + fold_convert (itype, fd->loops[i].n1)); + if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, + fold_convert (itype, + fd->loops[i].step))); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, + fold_convert (itype, fd->loops[i].step)); + t = fold_convert (type, t); + if (TREE_CODE (t) == INTEGER_CST) + counts[i] = t; + else + { + counts[i] = create_tmp_var (type, ".count"); + t = build_gimple_modify_stmt (counts[i], t); + force_gimple_operand_bsi (&si, t, true, NULL_TREE, + true, BSI_SAME_STMT); + } + if (SSA_VAR_P (fd->loop.n2)) + { + if (i == 0) + t = build_gimple_modify_stmt (fd->loop.n2, counts[0]); + else + { + t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]); + t = build_gimple_modify_stmt (fd->loop.n2, t); + } + force_gimple_operand_bsi (&si, t, true, NULL_TREE, + true, BSI_SAME_STMT); + } + } + } + if (in_combined_parallel) + { + /* In a combined parallel loop, emit a call to + GOMP_loop_foo_next. */ + t = build_call_expr (built_in_decls[next_fn], 2, + build_fold_addr_expr (istart0), + build_fold_addr_expr (iend0)); + } + else { tree t0, t1, t2, t3, t4; /* If this is not a combined parallel loop, emit a call to GOMP_loop_foo_start in ENTRY_BB. */ - list = alloc_stmt_list (); t4 = build_fold_addr_expr (iend0); t3 = build_fold_addr_expr (istart0); - t2 = fold_convert (long_integer_type_node, fd->step); - t1 = fold_convert (long_integer_type_node, fd->n2); - t0 = fold_convert (long_integer_type_node, fd->n1); - if (fd->chunk_size) + t2 = fold_convert (fd->iter_type, fd->loop.step); + t1 = fold_convert (fd->iter_type, fd->loop.n2); + t0 = fold_convert (fd->iter_type, fd->loop.n1); + if (bias) + { + t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); + t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); + } + if (fd->iter_type == long_integer_type_node) { - t = fold_convert (long_integer_type_node, fd->chunk_size); - t = build_call_expr (built_in_decls[start_fn], 6, - t0, t1, t2, t, t3, t4); + if (fd->chunk_size) + { + t = fold_convert (fd->iter_type, fd->chunk_size); + t = build_call_expr (built_in_decls[start_fn], 6, + t0, t1, t2, t, t3, t4); + } + else + t = build_call_expr (built_in_decls[start_fn], 5, + t0, t1, t2, t3, t4); } else - t = build_call_expr (built_in_decls[start_fn], 5, - t0, t1, t2, t3, t4); - t = get_formal_tmp_var (t, &list); - if (cont_bb) { - t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l0), - build_and_jump (&l3)); - append_to_statement_list (t, &list); + tree t5; + tree c_bool_type; + + /* The GOMP_loop_ull_*start functions have additional boolean + argument, true for < loops and false for > loops. + In Fortran, the C bool type can be different from + boolean_type_node. */ + c_bool_type = TREE_TYPE (TREE_TYPE (built_in_decls[start_fn])); + t5 = build_int_cst (c_bool_type, + fd->loop.cond_code == LT_EXPR ? 1 : 0); + if (fd->chunk_size) + { + t = fold_convert (fd->iter_type, fd->chunk_size); + t = build_call_expr (built_in_decls[start_fn], 7, + t5, t0, t1, t2, t, t3, t4); + } + else + t = build_call_expr (built_in_decls[start_fn], 6, + t5, t0, t1, t2, t3, t4); } - bsi_insert_after (&si, list, BSI_SAME_STMT); } + if (TREE_TYPE (t) != boolean_type_node) + t = fold_build2 (NE_EXPR, boolean_type_node, + t, build_int_cst (TREE_TYPE (t), 0)); + t = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + true, BSI_SAME_STMT); + t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE); + bsi_insert_after (&si, t, BSI_SAME_STMT); + + /* Remove the OMP_FOR statement. */ bsi_remove (&si, true); /* Iteration setup for sequential loop goes in L0_BB. */ - list = alloc_stmt_list (); - t = fold_convert (type, istart0); - t = build_gimple_modify_stmt (fd->v, t); - gimplify_and_add (t, &list); - - t = fold_convert (type, iend0); - t = build_gimple_modify_stmt (iend, t); - gimplify_and_add (t, &list); - si = bsi_start (l0_bb); - bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + if (bias) + t = fold_convert (type, fold_build2 (MINUS_EXPR, fd->iter_type, + istart0, bias)); + else + t = fold_convert (type, istart0); + t = force_gimple_operand_bsi (&si, t, false, NULL_TREE, + false, BSI_CONTINUE_LINKING); + t = build_gimple_modify_stmt (fd->loop.v, t); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (fd->loop.v) = t; - /* Handle the rare case where BODY doesn't ever return. */ - if (cont_bb == NULL) + if (bias) + t = fold_convert (type, fold_build2 (MINUS_EXPR, fd->iter_type, + iend0, bias)); + else + t = fold_convert (type, iend0); + iend = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); + if (fd->collapse > 1) { - remove_edge (single_succ_edge (entry_bb)); - make_edge (entry_bb, l0_bb, EDGE_FALLTHRU); - make_edge (l0_bb, l1_bb, EDGE_FALLTHRU); - return; + tree tem = create_tmp_var (type, ".tem"); + + t = build_gimple_modify_stmt (tem, fd->loop.v); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + for (i = fd->collapse - 1; i >= 0; i--) + { + tree vtype = TREE_TYPE (fd->loops[i].v), itype; + itype = vtype; + if (POINTER_TYPE_P (vtype)) + itype = lang_hooks.types.type_for_size (TYPE_PRECISION (vtype), 0); + t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]); + t = fold_convert (itype, t); + t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].step); + if (POINTER_TYPE_P (vtype)) + t = fold_build2 (POINTER_PLUS_EXPR, vtype, + fd->loops[i].n1, fold_convert (sizetype, t)); + else + t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t); + t = build_gimple_modify_stmt (fd->loops[i].v, t); + force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); + if (i != 0) + { + t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]); + t = build_gimple_modify_stmt (tem, t); + force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); + } + } } - /* Code to control the increment and predicate for the sequential - loop goes in the first half of EXIT_BB (we split EXIT_BB so - that we can inherit all the edges going out of the loop - body). */ - list = alloc_stmt_list (); + if (!broken_loop) + { + /* Code to control the increment and predicate for the sequential + loop goes in the CONT_BB. */ + si = bsi_last (cont_bb); + t = bsi_stmt (si); + gcc_assert (TREE_CODE (t) == OMP_CONTINUE); + vmain = TREE_OPERAND (t, 1); + vback = TREE_OPERAND (t, 0); - t = build2 (PLUS_EXPR, type, fd->v, fd->step); - t = build_gimple_modify_stmt (fd->v, t); - gimplify_and_add (t, &list); + if (POINTER_TYPE_P (type)) + t = fold_build2 (POINTER_PLUS_EXPR, type, vmain, + fold_convert (sizetype, fd->loop.step)); + else + t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step); + t = force_gimple_operand_bsi (&si, t, false, NULL_TREE, + true, BSI_SAME_STMT); + t = build_gimple_modify_stmt (vback, t); + bsi_insert_before (&si, t, BSI_SAME_STMT); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (vback) = t; - t = build2 (fd->cond_code, boolean_type_node, fd->v, iend); - t = get_formal_tmp_var (t, &list); - t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l1), - build_and_jump (&l2)); - append_to_statement_list (t, &list); + t = build2 (fd->loop.cond_code, boolean_type_node, vback, iend); + t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE); + bsi_insert_before (&si, t, BSI_SAME_STMT); - si = bsi_last (cont_bb); - bsi_insert_after (&si, list, BSI_SAME_STMT); - gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE); - bsi_remove (&si, true); + /* Remove OMP_CONTINUE. */ + bsi_remove (&si, true); - /* Emit code to get the next parallel iteration in L2_BB. */ - list = alloc_stmt_list (); + if (fd->collapse > 1) + { + basic_block last_bb, bb; - t = build_call_expr (built_in_decls[next_fn], 2, - build_fold_addr_expr (istart0), - build_fold_addr_expr (iend0)); - t = get_formal_tmp_var (t, &list); - t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l0), - build_and_jump (&l3)); - append_to_statement_list (t, &list); - - si = bsi_start (l2_bb); - bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + last_bb = cont_bb; + for (i = fd->collapse - 1; i >= 0; i--) + { + tree vtype = TREE_TYPE (fd->loops[i].v); + + bb = create_empty_bb (last_bb); + si = bsi_start (bb); + + if (i < fd->collapse - 1) + { + e = make_edge (last_bb, bb, EDGE_FALSE_VALUE); + e->probability = REG_BR_PROB_BASE / 8; + + t = build_gimple_modify_stmt (fd->loops[i + 1].v, + fd->loops[i + 1].n1); + force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); + } + else + collapse_bb = bb; + + set_immediate_dominator (CDI_DOMINATORS, bb, last_bb); + + if (POINTER_TYPE_P (vtype)) + t = fold_build2 (POINTER_PLUS_EXPR, vtype, + fd->loops[i].v, + fold_convert (sizetype, fd->loops[i].step)); + else + t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, + fd->loops[i].step); + t = build_gimple_modify_stmt (fd->loops[i].v, t); + force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); + + if (i > 0) + { + t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, + fd->loops[i].v, fd->loops[i].n2); + t = force_gimple_operand_bsi (&si, t, false, NULL_TREE, + false, BSI_CONTINUE_LINKING); + t = build3 (COND_EXPR, void_type_node, t, + NULL_TREE, NULL_TREE); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + e = make_edge (bb, l1_bb, EDGE_TRUE_VALUE); + e->probability = REG_BR_PROB_BASE * 7 / 8; + } + else + make_edge (bb, l1_bb, EDGE_FALLTHRU); + last_bb = bb; + } + } + + /* Emit code to get the next parallel iteration in L2_BB. */ + si = bsi_start (l2_bb); + + t = build_call_expr (built_in_decls[next_fn], 2, + build_fold_addr_expr (istart0), + build_fold_addr_expr (iend0)); + if (TREE_TYPE (t) != boolean_type_node) + t = fold_build2 (NE_EXPR, boolean_type_node, + t, build_int_cst (TREE_TYPE (t), 0)); + t = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); + t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + } /* Add the loop cleanup function. */ si = bsi_last (exit_bb); @@ -2705,23 +3788,44 @@ expand_omp_for_generic (struct omp_region *region, bsi_remove (&si, true); /* Connect the new blocks. */ - remove_edge (single_succ_edge (entry_bb)); - if (in_combined_parallel) - make_edge (entry_bb, l2_bb, EDGE_FALLTHRU); - else - { - make_edge (entry_bb, l0_bb, EDGE_TRUE_VALUE); - make_edge (entry_bb, l3_bb, EDGE_FALSE_VALUE); - } + find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE; + find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE; - make_edge (l0_bb, l1_bb, EDGE_FALLTHRU); + if (!broken_loop) + { + e = find_edge (cont_bb, l3_bb); + ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE); - remove_edge (single_succ_edge (cont_bb)); - make_edge (cont_bb, l1_bb, EDGE_TRUE_VALUE); - make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE); + for (phi = phi_nodes (l3_bb); phi; phi = PHI_CHAIN (phi)) + SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne), + PHI_ARG_DEF_FROM_EDGE (phi, e)); + remove_edge (e); - make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); - make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE); + make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE); + if (fd->collapse > 1) + { + e = find_edge (cont_bb, l1_bb); + remove_edge (e); + e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); + } + else + { + e = find_edge (cont_bb, l1_bb); + e->flags = EDGE_TRUE_VALUE; + } + e->probability = REG_BR_PROB_BASE * 7 / 8; + find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8; + make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); + + set_immediate_dominator (CDI_DOMINATORS, l2_bb, + recompute_dominator (CDI_DOMINATORS, l2_bb)); + set_immediate_dominator (CDI_DOMINATORS, l3_bb, + recompute_dominator (CDI_DOMINATORS, l3_bb)); + set_immediate_dominator (CDI_DOMINATORS, l0_bb, + recompute_dominator (CDI_DOMINATORS, l0_bb)); + set_immediate_dominator (CDI_DOMINATORS, l1_bb, + recompute_dominator (CDI_DOMINATORS, l1_bb)); + } } @@ -2737,14 +3841,17 @@ expand_omp_for_generic (struct omp_region *region, adj = STEP - 1; else adj = STEP + 1; - n = (adj + N2 - N1) / STEP; + if ((__typeof (V)) -1 > 0 && cond is >) + n = -(adj + N2 - N1) / -STEP; + else + n = (adj + N2 - N1) / STEP; q = n / nthreads; q += (q * nthreads != n); s0 = q * threadid; e0 = min(s0 + q, n); + V = s0 * STEP + N1; if (s0 >= e0) goto L2; else goto L0; L0: - V = s0 * STEP + N1; e = e0 * STEP + N1; L1: BODY; @@ -2757,138 +3864,158 @@ static void expand_omp_for_static_nochunk (struct omp_region *region, struct omp_for_data *fd) { - tree l0, l1, l2, n, q, s0, e0, e, t, nthreads, threadid; - tree type, list; + tree n, q, s0, e0, e, t, nthreads, threadid; + tree type, itype, vmain, vback; basic_block entry_bb, exit_bb, seq_start_bb, body_bb, cont_bb; basic_block fin_bb; block_stmt_iterator si; - type = TREE_TYPE (fd->v); + itype = type = TREE_TYPE (fd->loop.v); + if (POINTER_TYPE_P (type)) + itype = lang_hooks.types.type_for_size (TYPE_PRECISION (type), 0); entry_bb = region->entry; - seq_start_bb = create_empty_bb (entry_bb); - body_bb = single_succ (entry_bb); cont_bb = region->cont; - fin_bb = single_succ (cont_bb); + gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); + gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); + seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb)); + body_bb = single_succ (seq_start_bb); + gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb); + gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); + fin_bb = FALLTHRU_EDGE (cont_bb)->dest; exit_bb = region->exit; - l0 = tree_block_label (seq_start_bb); - l1 = tree_block_label (body_bb); - l2 = tree_block_label (fin_bb); - /* Iteration space partitioning goes in ENTRY_BB. */ - list = alloc_stmt_list (); + si = bsi_last (entry_bb); + gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR); t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS], 0); - t = fold_convert (type, t); - nthreads = get_formal_tmp_var (t, &list); + t = fold_convert (itype, t); + nthreads = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + true, BSI_SAME_STMT); t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM], 0); - t = fold_convert (type, t); - threadid = get_formal_tmp_var (t, &list); - - fd->n1 = fold_convert (type, fd->n1); - if (!is_gimple_val (fd->n1)) - fd->n1 = get_formal_tmp_var (fd->n1, &list); - - fd->n2 = fold_convert (type, fd->n2); - if (!is_gimple_val (fd->n2)) - fd->n2 = get_formal_tmp_var (fd->n2, &list); - - fd->step = fold_convert (type, fd->step); - if (!is_gimple_val (fd->step)) - fd->step = get_formal_tmp_var (fd->step, &list); - - t = build_int_cst (type, (fd->cond_code == LT_EXPR ? -1 : 1)); - t = fold_build2 (PLUS_EXPR, type, fd->step, t); - t = fold_build2 (PLUS_EXPR, type, t, fd->n2); - t = fold_build2 (MINUS_EXPR, type, t, fd->n1); - t = fold_build2 (TRUNC_DIV_EXPR, type, t, fd->step); - t = fold_convert (type, t); - if (is_gimple_val (t)) - n = t; + t = fold_convert (itype, t); + threadid = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + true, BSI_SAME_STMT); + + fd->loop.n1 + = force_gimple_operand_bsi (&si, fold_convert (type, fd->loop.n1), + true, NULL_TREE, true, BSI_SAME_STMT); + fd->loop.n2 + = force_gimple_operand_bsi (&si, fold_convert (itype, fd->loop.n2), + true, NULL_TREE, true, BSI_SAME_STMT); + fd->loop.step + = force_gimple_operand_bsi (&si, fold_convert (itype, fd->loop.step), + true, NULL_TREE, true, BSI_SAME_STMT); + + t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, fd->loop.step, t); + t = fold_build2 (PLUS_EXPR, itype, t, fd->loop.n2); + t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, fd->loop.n1)); + if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, fd->loop.step)); else - n = get_formal_tmp_var (t, &list); + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd->loop.step); + t = fold_convert (itype, t); + n = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT); - t = build2 (TRUNC_DIV_EXPR, type, n, nthreads); - q = get_formal_tmp_var (t, &list); + t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads); + q = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT); - t = build2 (MULT_EXPR, type, q, nthreads); - t = build2 (NE_EXPR, type, t, n); - t = build2 (PLUS_EXPR, type, q, t); - q = get_formal_tmp_var (t, &list); + t = fold_build2 (MULT_EXPR, itype, q, nthreads); + t = fold_build2 (NE_EXPR, itype, t, n); + t = fold_build2 (PLUS_EXPR, itype, q, t); + q = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT); - t = build2 (MULT_EXPR, type, q, threadid); - s0 = get_formal_tmp_var (t, &list); + t = build2 (MULT_EXPR, itype, q, threadid); + s0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT); - t = build2 (PLUS_EXPR, type, s0, q); - t = build2 (MIN_EXPR, type, t, n); - e0 = get_formal_tmp_var (t, &list); + t = fold_build2 (PLUS_EXPR, itype, s0, q); + t = fold_build2 (MIN_EXPR, itype, t, n); + e0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT); t = build2 (GE_EXPR, boolean_type_node, s0, e0); - t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l2), - build_and_jump (&l0)); - append_to_statement_list (t, &list); + t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE); + bsi_insert_before (&si, t, BSI_SAME_STMT); - si = bsi_last (entry_bb); - gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR); - bsi_insert_after (&si, list, BSI_SAME_STMT); + /* Remove the OMP_FOR statement. */ bsi_remove (&si, true); /* Setup code for sequential iteration goes in SEQ_START_BB. */ - list = alloc_stmt_list (); - - t = fold_convert (type, s0); - t = build2 (MULT_EXPR, type, t, fd->step); - t = build2 (PLUS_EXPR, type, t, fd->n1); - t = build_gimple_modify_stmt (fd->v, t); - gimplify_and_add (t, &list); - - t = fold_convert (type, e0); - t = build2 (MULT_EXPR, type, t, fd->step); - t = build2 (PLUS_EXPR, type, t, fd->n1); - e = get_formal_tmp_var (t, &list); - si = bsi_start (seq_start_bb); - bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); - - /* The code controlling the sequential loop replaces the OMP_CONTINUE. */ - list = alloc_stmt_list (); - - t = build2 (PLUS_EXPR, type, fd->v, fd->step); - t = build_gimple_modify_stmt (fd->v, t); - gimplify_and_add (t, &list); - t = build2 (fd->cond_code, boolean_type_node, fd->v, e); - t = get_formal_tmp_var (t, &list); - t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l1), - build_and_jump (&l2)); - append_to_statement_list (t, &list); + t = fold_convert (itype, s0); + t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step); + if (POINTER_TYPE_P (type)) + t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1, + fold_convert (sizetype, t)); + else + t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1); + t = force_gimple_operand_bsi (&si, t, false, NULL_TREE, + false, BSI_CONTINUE_LINKING); + t = build_gimple_modify_stmt (fd->loop.v, t); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (fd->loop.v) = t; + + t = fold_convert (itype, e0); + t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step); + if (POINTER_TYPE_P (type)) + t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1, + fold_convert (sizetype, t)); + else + t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1); + e = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); + /* The code controlling the sequential loop replaces the OMP_CONTINUE. */ si = bsi_last (cont_bb); - gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE); - bsi_insert_after (&si, list, BSI_SAME_STMT); + t = bsi_stmt (si); + gcc_assert (TREE_CODE (t) == OMP_CONTINUE); + vmain = TREE_OPERAND (t, 1); + vback = TREE_OPERAND (t, 0); + + if (POINTER_TYPE_P (type)) + t = fold_build2 (POINTER_PLUS_EXPR, type, vmain, + fold_convert (sizetype, fd->loop.step)); + else + t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step); + t = force_gimple_operand_bsi (&si, t, false, NULL_TREE, + true, BSI_SAME_STMT); + t = build_gimple_modify_stmt (vback, t); + bsi_insert_before (&si, t, BSI_SAME_STMT); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (vback) = t; + + t = build2 (fd->loop.cond_code, boolean_type_node, vback, e); + t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE); + bsi_insert_before (&si, t, BSI_SAME_STMT); + + /* Remove the OMP_CONTINUE statement. */ bsi_remove (&si, true); /* Replace the OMP_RETURN with a barrier, or nothing. */ si = bsi_last (exit_bb); if (!OMP_RETURN_NOWAIT (bsi_stmt (si))) - { - list = alloc_stmt_list (); - build_omp_barrier (&list); - bsi_insert_after (&si, list, BSI_SAME_STMT); - } + force_gimple_operand_bsi (&si, build_omp_barrier (), false, NULL_TREE, + false, BSI_SAME_STMT); bsi_remove (&si, true); /* Connect all the blocks. */ - make_edge (seq_start_bb, body_bb, EDGE_FALLTHRU); + find_edge (entry_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; + find_edge (entry_bb, fin_bb)->flags = EDGE_TRUE_VALUE; - remove_edge (single_succ_edge (entry_bb)); - make_edge (entry_bb, fin_bb, EDGE_TRUE_VALUE); - make_edge (entry_bb, seq_start_bb, EDGE_FALSE_VALUE); - - make_edge (cont_bb, body_bb, EDGE_TRUE_VALUE); + find_edge (cont_bb, body_bb)->flags = EDGE_TRUE_VALUE; find_edge (cont_bb, fin_bb)->flags = EDGE_FALSE_VALUE; + + set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, entry_bb); + set_immediate_dominator (CDI_DOMINATORS, body_bb, + recompute_dominator (CDI_DOMINATORS, body_bb)); + set_immediate_dominator (CDI_DOMINATORS, fin_bb, + recompute_dominator (CDI_DOMINATORS, fin_bb)); } @@ -2904,8 +4031,14 @@ expand_omp_for_static_nochunk (struct omp_region *region, adj = STEP - 1; else adj = STEP + 1; - n = (adj + N2 - N1) / STEP; + if ((__typeof (V)) -1 > 0 && cond is >) + n = -(adj + N2 - N1) / -STEP; + else + n = (adj + N2 - N1) / STEP; trip = 0; + V = threadid * CHUNK * STEP + N1; -- this extra definition of V is + here so that V is defined + if the loop is not entered L0: s0 = (trip * nthreads + threadid) * CHUNK; e0 = min(s0 + CHUNK, n); @@ -2924,170 +4057,255 @@ expand_omp_for_static_nochunk (struct omp_region *region, */ static void -expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd) +expand_omp_for_static_chunk (struct omp_region *region, + struct omp_for_data *fd) { - tree l0, l1, l2, l3, l4, n, s0, e0, e, t; - tree trip, nthreads, threadid; - tree type; + tree n, s0, e0, e, t, phi, nphi, args; + tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid; + tree type, itype, cont, v_main, v_back, v_extra; basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb; basic_block trip_update_bb, cont_bb, fin_bb; - tree list; block_stmt_iterator si; + edge se, re, ene; - type = TREE_TYPE (fd->v); + itype = type = TREE_TYPE (fd->loop.v); + if (POINTER_TYPE_P (type)) + itype = lang_hooks.types.type_for_size (TYPE_PRECISION (type), 0); entry_bb = region->entry; - iter_part_bb = create_empty_bb (entry_bb); - seq_start_bb = create_empty_bb (iter_part_bb); - body_bb = single_succ (entry_bb); + se = split_block (entry_bb, last_stmt (entry_bb)); + entry_bb = se->src; + iter_part_bb = se->dest; cont_bb = region->cont; - trip_update_bb = create_empty_bb (cont_bb); - fin_bb = single_succ (cont_bb); + gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2); + gcc_assert (BRANCH_EDGE (iter_part_bb)->dest + == FALLTHRU_EDGE (cont_bb)->dest); + seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb)); + body_bb = single_succ (seq_start_bb); + gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb); + gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); + fin_bb = FALLTHRU_EDGE (cont_bb)->dest; + trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb)); exit_bb = region->exit; - l0 = tree_block_label (iter_part_bb); - l1 = tree_block_label (seq_start_bb); - l2 = tree_block_label (body_bb); - l3 = tree_block_label (trip_update_bb); - l4 = tree_block_label (fin_bb); - /* Trip and adjustment setup goes in ENTRY_BB. */ - list = alloc_stmt_list (); + si = bsi_last (entry_bb); + gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR); t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS], 0); - t = fold_convert (type, t); - nthreads = get_formal_tmp_var (t, &list); + t = fold_convert (itype, t); + nthreads = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + true, BSI_SAME_STMT); t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM], 0); - t = fold_convert (type, t); - threadid = get_formal_tmp_var (t, &list); - - fd->n1 = fold_convert (type, fd->n1); - if (!is_gimple_val (fd->n1)) - fd->n1 = get_formal_tmp_var (fd->n1, &list); - - fd->n2 = fold_convert (type, fd->n2); - if (!is_gimple_val (fd->n2)) - fd->n2 = get_formal_tmp_var (fd->n2, &list); - - fd->step = fold_convert (type, fd->step); - if (!is_gimple_val (fd->step)) - fd->step = get_formal_tmp_var (fd->step, &list); - - fd->chunk_size = fold_convert (type, fd->chunk_size); - if (!is_gimple_val (fd->chunk_size)) - fd->chunk_size = get_formal_tmp_var (fd->chunk_size, &list); - - t = build_int_cst (type, (fd->cond_code == LT_EXPR ? -1 : 1)); - t = fold_build2 (PLUS_EXPR, type, fd->step, t); - t = fold_build2 (PLUS_EXPR, type, t, fd->n2); - t = fold_build2 (MINUS_EXPR, type, t, fd->n1); - t = fold_build2 (TRUNC_DIV_EXPR, type, t, fd->step); - t = fold_convert (type, t); - if (is_gimple_val (t)) - n = t; + t = fold_convert (itype, t); + threadid = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + true, BSI_SAME_STMT); + + fd->loop.n1 + = force_gimple_operand_bsi (&si, fold_convert (type, fd->loop.n1), + true, NULL_TREE, true, BSI_SAME_STMT); + fd->loop.n2 + = force_gimple_operand_bsi (&si, fold_convert (itype, fd->loop.n2), + true, NULL_TREE, true, BSI_SAME_STMT); + fd->loop.step + = force_gimple_operand_bsi (&si, fold_convert (itype, fd->loop.step), + true, NULL_TREE, true, BSI_SAME_STMT); + fd->chunk_size + = force_gimple_operand_bsi (&si, fold_convert (itype, fd->chunk_size), + true, NULL_TREE, true, BSI_SAME_STMT); + + t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, fd->loop.step, t); + t = fold_build2 (PLUS_EXPR, itype, t, fd->loop.n2); + t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, fd->loop.n1)); + if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, fd->loop.step)); else - n = get_formal_tmp_var (t, &list); + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd->loop.step); + t = fold_convert (itype, t); + n = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + true, BSI_SAME_STMT); - t = build_int_cst (type, 0); - trip = get_initialized_tmp_var (t, &list, NULL); + trip_var = create_tmp_var (itype, ".trip"); + if (gimple_in_ssa_p (cfun)) + { + add_referenced_var (trip_var); + trip_init = make_ssa_name (trip_var, NULL_TREE); + trip_main = make_ssa_name (trip_var, NULL_TREE); + trip_back = make_ssa_name (trip_var, NULL_TREE); + } + else + { + trip_init = trip_var; + trip_main = trip_var; + trip_back = trip_var; + } - si = bsi_last (entry_bb); - gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR); - bsi_insert_after (&si, list, BSI_SAME_STMT); + t = build_gimple_modify_stmt (trip_init, build_int_cst (itype, 0)); + bsi_insert_before (&si, t, BSI_SAME_STMT); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (trip_init) = t; + + t = fold_build2 (MULT_EXPR, itype, threadid, fd->chunk_size); + t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step); + if (POINTER_TYPE_P (type)) + t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1, + fold_convert (sizetype, t)); + else + t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1); + v_extra = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + true, BSI_SAME_STMT); + + /* Remove the OMP_FOR. */ bsi_remove (&si, true); /* Iteration space partitioning goes in ITER_PART_BB. */ - list = alloc_stmt_list (); + si = bsi_last (iter_part_bb); - t = build2 (MULT_EXPR, type, trip, nthreads); - t = build2 (PLUS_EXPR, type, t, threadid); - t = build2 (MULT_EXPR, type, t, fd->chunk_size); - s0 = get_formal_tmp_var (t, &list); + t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads); + t = fold_build2 (PLUS_EXPR, itype, t, threadid); + t = fold_build2 (MULT_EXPR, itype, t, fd->chunk_size); + s0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); - t = build2 (PLUS_EXPR, type, s0, fd->chunk_size); - t = build2 (MIN_EXPR, type, t, n); - e0 = get_formal_tmp_var (t, &list); + t = fold_build2 (PLUS_EXPR, itype, s0, fd->chunk_size); + t = fold_build2 (MIN_EXPR, itype, t, n); + e0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); t = build2 (LT_EXPR, boolean_type_node, s0, n); - t = build3 (COND_EXPR, void_type_node, t, - build_and_jump (&l1), build_and_jump (&l4)); - append_to_statement_list (t, &list); - - si = bsi_start (iter_part_bb); - bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); /* Setup code for sequential iteration goes in SEQ_START_BB. */ - list = alloc_stmt_list (); - - t = fold_convert (type, s0); - t = build2 (MULT_EXPR, type, t, fd->step); - t = build2 (PLUS_EXPR, type, t, fd->n1); - t = build_gimple_modify_stmt (fd->v, t); - gimplify_and_add (t, &list); - - t = fold_convert (type, e0); - t = build2 (MULT_EXPR, type, t, fd->step); - t = build2 (PLUS_EXPR, type, t, fd->n1); - e = get_formal_tmp_var (t, &list); - si = bsi_start (seq_start_bb); - bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + + t = fold_convert (itype, s0); + t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step); + if (POINTER_TYPE_P (type)) + t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1, + fold_convert (sizetype, t)); + else + t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1); + t = force_gimple_operand_bsi (&si, t, false, NULL_TREE, + false, BSI_CONTINUE_LINKING); + t = build_gimple_modify_stmt (fd->loop.v, t); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (fd->loop.v) = t; + + t = fold_convert (itype, e0); + t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step); + if (POINTER_TYPE_P (type)) + t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1, + fold_convert (sizetype, t)); + else + t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1); + e = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); /* The code controlling the sequential loop goes in CONT_BB, replacing the OMP_CONTINUE. */ - list = alloc_stmt_list (); - - t = build2 (PLUS_EXPR, type, fd->v, fd->step); - t = build_gimple_modify_stmt (fd->v, t); - gimplify_and_add (t, &list); - - t = build2 (fd->cond_code, boolean_type_node, fd->v, e); - t = get_formal_tmp_var (t, &list); - t = build3 (COND_EXPR, void_type_node, t, - build_and_jump (&l2), build_and_jump (&l3)); - append_to_statement_list (t, &list); - si = bsi_last (cont_bb); - gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE); - bsi_insert_after (&si, list, BSI_SAME_STMT); + cont = bsi_stmt (si); + gcc_assert (TREE_CODE (cont) == OMP_CONTINUE); + v_main = TREE_OPERAND (cont, 1); + v_back = TREE_OPERAND (cont, 0); + + if (POINTER_TYPE_P (type)) + t = fold_build2 (POINTER_PLUS_EXPR, type, v_main, + fold_convert (sizetype, fd->loop.step)); + else + t = build2 (PLUS_EXPR, type, v_main, fd->loop.step); + t = build_gimple_modify_stmt (v_back, t); + bsi_insert_before (&si, t, BSI_SAME_STMT); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (v_back) = t; + + t = build2 (fd->loop.cond_code, boolean_type_node, v_back, e); + t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE); + bsi_insert_before (&si, t, BSI_SAME_STMT); + + /* Remove OMP_CONTINUE. */ bsi_remove (&si, true); /* Trip update code goes into TRIP_UPDATE_BB. */ - list = alloc_stmt_list (); - - t = build_int_cst (type, 1); - t = build2 (PLUS_EXPR, type, trip, t); - t = build_gimple_modify_stmt (trip, t); - gimplify_and_add (t, &list); - si = bsi_start (trip_update_bb); - bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + + t = build_int_cst (itype, 1); + t = build2 (PLUS_EXPR, itype, trip_main, t); + t = build_gimple_modify_stmt (trip_back, t); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (trip_back) = t; /* Replace the OMP_RETURN with a barrier, or nothing. */ si = bsi_last (exit_bb); if (!OMP_RETURN_NOWAIT (bsi_stmt (si))) - { - list = alloc_stmt_list (); - build_omp_barrier (&list); - bsi_insert_after (&si, list, BSI_SAME_STMT); - } + force_gimple_operand_bsi (&si, build_omp_barrier (), false, NULL_TREE, + false, BSI_SAME_STMT); bsi_remove (&si, true); /* Connect the new blocks. */ - remove_edge (single_succ_edge (entry_bb)); - make_edge (entry_bb, iter_part_bb, EDGE_FALLTHRU); + find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE; + find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE; - make_edge (iter_part_bb, seq_start_bb, EDGE_TRUE_VALUE); - make_edge (iter_part_bb, fin_bb, EDGE_FALSE_VALUE); + find_edge (cont_bb, body_bb)->flags = EDGE_TRUE_VALUE; + find_edge (cont_bb, trip_update_bb)->flags = EDGE_FALSE_VALUE; - make_edge (seq_start_bb, body_bb, EDGE_FALLTHRU); + redirect_edge_and_branch (single_succ_edge (trip_update_bb), iter_part_bb); - remove_edge (single_succ_edge (cont_bb)); - make_edge (cont_bb, body_bb, EDGE_TRUE_VALUE); - make_edge (cont_bb, trip_update_bb, EDGE_FALSE_VALUE); + if (gimple_in_ssa_p (cfun)) + { + /* When we redirect the edge from trip_update_bb to iter_part_bb, we + remove arguments of the phi nodes in fin_bb. We need to create + appropriate phi nodes in iter_part_bb instead. */ + se = single_pred_edge (fin_bb); + re = single_succ_edge (trip_update_bb); + ene = single_succ_edge (entry_bb); + + args = PENDING_STMT (re); + PENDING_STMT (re) = NULL_TREE; + for (phi = phi_nodes (fin_bb); + phi && args; + phi = PHI_CHAIN (phi), args = TREE_CHAIN (args)) + { + t = PHI_RESULT (phi); + gcc_assert (t == TREE_PURPOSE (args)); + nphi = create_phi_node (t, iter_part_bb); + SSA_NAME_DEF_STMT (t) = nphi; + + t = PHI_ARG_DEF_FROM_EDGE (phi, se); + /* A special case -- fd->loop.v is not yet computed in + iter_part_bb, we need to use v_extra instead. */ + if (t == fd->loop.v) + t = v_extra; + add_phi_arg (nphi, t, ene); + add_phi_arg (nphi, TREE_VALUE (args), re); + } + gcc_assert (!phi && !args); + while ((phi = phi_nodes (fin_bb)) != NULL_TREE) + remove_phi_node (phi, NULL_TREE, false); + + /* Make phi node for trip. */ + phi = create_phi_node (trip_main, iter_part_bb); + SSA_NAME_DEF_STMT (trip_main) = phi; + add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb)); + add_phi_arg (phi, trip_init, single_succ_edge (entry_bb)); + } - make_edge (trip_update_bb, iter_part_bb, EDGE_FALLTHRU); + set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb); + set_immediate_dominator (CDI_DOMINATORS, iter_part_bb, + recompute_dominator (CDI_DOMINATORS, iter_part_bb)); + set_immediate_dominator (CDI_DOMINATORS, fin_bb, + recompute_dominator (CDI_DOMINATORS, fin_bb)); + set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, + recompute_dominator (CDI_DOMINATORS, seq_start_bb)); + set_immediate_dominator (CDI_DOMINATORS, body_bb, + recompute_dominator (CDI_DOMINATORS, body_bb)); } @@ -3097,16 +4315,30 @@ static void expand_omp_for (struct omp_region *region) { struct omp_for_data fd; + struct omp_for_data_loop *loops; - push_gimplify_context (); + loops + = (struct omp_for_data_loop *) + alloca (TREE_VEC_LENGTH (OMP_FOR_INIT (last_stmt (region->entry))) + * sizeof (struct omp_for_data_loop)); - extract_omp_for_data (last_stmt (region->entry), &fd); + extract_omp_for_data (last_stmt (region->entry), &fd, loops); region->sched_kind = fd.sched_kind; + gcc_assert (EDGE_COUNT (region->entry->succs) == 2); + BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; + FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; + if (region->cont) + { + gcc_assert (EDGE_COUNT (region->cont->succs) == 2); + BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; + FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; + } + if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC && !fd.have_ordered - && region->cont - && region->exit) + && fd.collapse == 1 + && region->cont != NULL) { if (fd.chunk_size == NULL) expand_omp_for_static_nochunk (region, &fd); @@ -3115,13 +4347,25 @@ expand_omp_for (struct omp_region *region) } else { - int fn_index = fd.sched_kind + fd.have_ordered * 4; - int start_ix = BUILT_IN_GOMP_LOOP_STATIC_START + fn_index; - int next_ix = BUILT_IN_GOMP_LOOP_STATIC_NEXT + fn_index; + int fn_index, start_ix, next_ix; + + gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); + fn_index = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME) + ? 3 : fd.sched_kind; + fn_index += fd.have_ordered * 4; + start_ix = BUILT_IN_GOMP_LOOP_STATIC_START + fn_index; + next_ix = BUILT_IN_GOMP_LOOP_STATIC_NEXT + fn_index; + if (fd.iter_type == long_long_unsigned_type_node) + { + start_ix += BUILT_IN_GOMP_LOOP_ULL_STATIC_START + - BUILT_IN_GOMP_LOOP_STATIC_START; + next_ix += BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT + - BUILT_IN_GOMP_LOOP_STATIC_NEXT; + } expand_omp_for_generic (region, &fd, start_ix, next_ix); } - pop_gimplify_context (NULL); + update_ssa (TODO_update_ssa_only_virtuals); } @@ -3150,88 +4394,142 @@ expand_omp_for (struct omp_region *region) reduction; If this is a combined parallel sections, replace the call to - GOMP_sections_start with 'goto L1'. */ + GOMP_sections_start with call to GOMP_sections_next. */ static void expand_omp_sections (struct omp_region *region) { - tree label_vec, l0, l1, l2, t, u, v, sections_stmt; - unsigned i, len; - basic_block entry_bb, exit_bb, l0_bb, l1_bb, l2_bb, default_bb; + tree label_vec, l1, l2, t, u, sections_stmt, vin, vmain, vnext, cont; + unsigned i, casei, len; + basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb; block_stmt_iterator si; - struct omp_region *inner; + edge_iterator ei; edge e; + struct omp_region *inner; + bool exit_reachable = region->cont != NULL; + gcc_assert (exit_reachable == (region->exit != NULL)); entry_bb = region->entry; - l0_bb = create_empty_bb (entry_bb); - l0 = tree_block_label (l0_bb); - - gcc_assert ((region->cont != NULL) ^ (region->exit == NULL)); + l0_bb = single_succ (entry_bb); l1_bb = region->cont; - if (l1_bb) + l2_bb = region->exit; + if (exit_reachable) { - l2_bb = single_succ (l1_bb); + if (single_pred (l2_bb) == l0_bb) + l2 = tree_block_label (l2_bb); + else + { + /* This can happen if there are reductions. */ + len = EDGE_COUNT (l0_bb->succs); + gcc_assert (len > 0); + e = EDGE_SUCC (l0_bb, len - 1); + si = bsi_last (e->dest); + l2 = NULL_TREE; + if (bsi_end_p (si) || TREE_CODE (bsi_stmt (si)) != OMP_SECTION) + l2 = tree_block_label (e->dest); + else + FOR_EACH_EDGE (e, ei, l0_bb->succs) + { + si = bsi_last (e->dest); + if (bsi_end_p (si) || TREE_CODE (bsi_stmt (si)) != OMP_SECTION) + { + l2 = tree_block_label (e->dest); + break; + } + } + } default_bb = create_empty_bb (l1_bb->prev_bb); - l1 = tree_block_label (l1_bb); } else { - l2_bb = create_empty_bb (l0_bb); - default_bb = l2_bb; - - l1 = NULL; + default_bb = create_empty_bb (l0_bb); + l1 = NULL_TREE; + l2 = tree_block_label (default_bb); } - l2 = tree_block_label (l2_bb); - - exit_bb = region->exit; - - v = create_tmp_var (unsigned_type_node, ".section"); /* We will build a switch() with enough cases for all the OMP_SECTION regions, a '0' case to handle the end of more work and a default case to abort if something goes wrong. */ - len = EDGE_COUNT (entry_bb->succs); - label_vec = make_tree_vec (len + 2); + len = EDGE_COUNT (l0_bb->succs); + label_vec = make_tree_vec (len + 1); /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the OMP_SECTIONS statement. */ si = bsi_last (entry_bb); sections_stmt = bsi_stmt (si); gcc_assert (TREE_CODE (sections_stmt) == OMP_SECTIONS); + vin = OMP_SECTIONS_CONTROL (sections_stmt); if (!is_combined_parallel (region)) { /* If we are not inside a combined parallel+sections region, call GOMP_sections_start. */ - t = build_int_cst (unsigned_type_node, len); + t = build_int_cst (unsigned_type_node, + exit_reachable ? len - 1 : len); u = built_in_decls[BUILT_IN_GOMP_SECTIONS_START]; t = build_call_expr (u, 1, t); - t = build_gimple_modify_stmt (v, t); - bsi_insert_after (&si, t, BSI_SAME_STMT); } + else + { + /* Otherwise, call GOMP_sections_next. */ + u = built_in_decls[BUILT_IN_GOMP_SECTIONS_NEXT]; + t = build_call_expr (u, 0); + } + t = build_gimple_modify_stmt (vin, t); + bsi_insert_after (&si, t, BSI_SAME_STMT); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (vin) = t; bsi_remove (&si, true); - /* The switch() statement replacing OMP_SECTIONS goes in L0_BB. */ - si = bsi_start (l0_bb); + /* The switch() statement replacing OMP_SECTIONS_SWITCH goes in L0_BB. */ + si = bsi_last (l0_bb); + gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SECTIONS_SWITCH); + if (exit_reachable) + { + cont = last_stmt (l1_bb); + gcc_assert (TREE_CODE (cont) == OMP_CONTINUE); + vmain = TREE_OPERAND (cont, 1); + vnext = TREE_OPERAND (cont, 0); + } + else + { + vmain = vin; + vnext = NULL_TREE; + } - t = build3 (SWITCH_EXPR, void_type_node, v, NULL, label_vec); - bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + t = build3 (SWITCH_EXPR, void_type_node, vmain, NULL, label_vec); + bsi_insert_after (&si, t, BSI_SAME_STMT); + bsi_remove (&si, true); - t = build3 (CASE_LABEL_EXPR, void_type_node, - build_int_cst (unsigned_type_node, 0), NULL, l2); - TREE_VEC_ELT (label_vec, 0) = t; - make_edge (l0_bb, l2_bb, 0); + i = 0; + if (exit_reachable) + { + t = build3 (CASE_LABEL_EXPR, void_type_node, + build_int_cst (unsigned_type_node, 0), NULL, l2); + TREE_VEC_ELT (label_vec, 0) = t; + i++; + } /* Convert each OMP_SECTION into a CASE_LABEL_EXPR. */ - for (inner = region->inner, i = 1; inner; inner = inner->next, ++i) + for (inner = region->inner, casei = 1; + inner; + inner = inner->next, i++, casei++) { basic_block s_entry_bb, s_exit_bb; + /* Skip optional reduction region. */ + if (inner->type == OMP_ATOMIC_LOAD) + { + --i; + --casei; + continue; + } + s_entry_bb = inner->entry; s_exit_bb = inner->exit; t = tree_block_label (s_entry_bb); - u = build_int_cst (unsigned_type_node, i); + u = build_int_cst (unsigned_type_node, casei); u = build3 (CASE_LABEL_EXPR, void_type_node, u, NULL, t); TREE_VEC_ELT (label_vec, i) = u; @@ -3239,11 +4537,6 @@ expand_omp_sections (struct omp_region *region) gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SECTION); gcc_assert (i < len || OMP_SECTION_LAST (bsi_stmt (si))); bsi_remove (&si, true); - - e = single_pred_edge (s_entry_bb); - e->flags = 0; - redirect_edge_pred (e, l0_bb); - single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU; if (s_exit_bb == NULL) @@ -3259,29 +4552,30 @@ expand_omp_sections (struct omp_region *region) /* Error handling code goes in DEFAULT_BB. */ t = tree_block_label (default_bb); u = build3 (CASE_LABEL_EXPR, void_type_node, NULL, NULL, t); - TREE_VEC_ELT (label_vec, len + 1) = u; + TREE_VEC_ELT (label_vec, len) = u; make_edge (l0_bb, default_bb, 0); si = bsi_start (default_bb); t = build_call_expr (built_in_decls[BUILT_IN_TRAP], 0); bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); - /* Code to get the next section goes in L1_BB. */ - if (l1_bb) + if (exit_reachable) { + /* Code to get the next section goes in L1_BB. */ si = bsi_last (l1_bb); gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE); t = build_call_expr (built_in_decls[BUILT_IN_GOMP_SECTIONS_NEXT], 0); - t = build_gimple_modify_stmt (v, t); + t = build_gimple_modify_stmt (vnext, t); bsi_insert_after (&si, t, BSI_SAME_STMT); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (vnext) = t; bsi_remove (&si, true); - } - /* Cleanup function replaces OMP_RETURN in EXIT_BB. */ - if (exit_bb) - { - si = bsi_last (exit_bb); + single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU; + + /* Cleanup function replaces OMP_RETURN in EXIT_BB. */ + si = bsi_last (l2_bb); if (OMP_RETURN_NOWAIT (bsi_stmt (si))) t = built_in_decls[BUILT_IN_GOMP_SECTIONS_END_NOWAIT]; else @@ -3291,23 +4585,7 @@ expand_omp_sections (struct omp_region *region) bsi_remove (&si, true); } - /* Connect the new blocks. */ - if (is_combined_parallel (region)) - { - /* If this was a combined parallel+sections region, we did not - emit a GOMP_sections_start in the entry block, so we just - need to jump to L1_BB to get the next section. */ - make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); - } - else - make_edge (entry_bb, l0_bb, EDGE_FALLTHRU); - - if (l1_bb) - { - e = single_succ_edge (l1_bb); - redirect_edge_succ (e, l0_bb); - e->flags = EDGE_FALLTHRU; - } + set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb); } @@ -3337,11 +4615,8 @@ expand_omp_single (struct omp_region *region) si = bsi_last (exit_bb); if (!OMP_RETURN_NOWAIT (bsi_stmt (si)) || need_barrier) - { - tree t = alloc_stmt_list (); - build_omp_barrier (&t); - bsi_insert_after (&si, t, BSI_SAME_STMT); - } + force_gimple_operand_bsi (&si, build_omp_barrier (), false, NULL_TREE, + false, BSI_SAME_STMT); bsi_remove (&si, true); single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; } @@ -3377,6 +4652,384 @@ expand_omp_synch (struct omp_region *region) } } +/* A subroutine of expand_omp_atomic. Attempt to implement the atomic + operation as a __sync_fetch_and_op builtin. INDEX is log2 of the + size of the data type, and thus usable to find the index of the builtin + decl. Returns false if the expression is not of the proper form. */ + +static bool +expand_omp_atomic_fetch_op (basic_block load_bb, + tree addr, tree loaded_val, + tree stored_val, int index) +{ + enum built_in_function base; + tree decl, itype, call; + enum insn_code *optab; + tree rhs; + basic_block store_bb = single_succ (load_bb); + block_stmt_iterator bsi; + tree stmt; + + /* We expect to find the following sequences: + + load_bb: + OMP_ATOMIC_LOAD (tmp, mem) + + store_bb: + val = tmp OP something; (or: something OP tmp) + OMP_STORE (val) + + ???FIXME: Allow a more flexible sequence. + Perhaps use data flow to pick the statements. + + */ + + bsi = bsi_after_labels (store_bb); + stmt = bsi_stmt (bsi); + if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT) + return false; + bsi_next (&bsi); + if (TREE_CODE (bsi_stmt (bsi)) != OMP_ATOMIC_STORE) + return false; + + if (!operand_equal_p (GIMPLE_STMT_OPERAND (stmt, 0), stored_val, 0)) + return false; + + rhs = GIMPLE_STMT_OPERAND (stmt, 1); + + /* Check for one of the supported fetch-op operations. */ + switch (TREE_CODE (rhs)) + { + case PLUS_EXPR: + case POINTER_PLUS_EXPR: + base = BUILT_IN_FETCH_AND_ADD_N; + optab = sync_add_optab; + break; + case MINUS_EXPR: + base = BUILT_IN_FETCH_AND_SUB_N; + optab = sync_add_optab; + break; + case BIT_AND_EXPR: + base = BUILT_IN_FETCH_AND_AND_N; + optab = sync_and_optab; + break; + case BIT_IOR_EXPR: + base = BUILT_IN_FETCH_AND_OR_N; + optab = sync_ior_optab; + break; + case BIT_XOR_EXPR: + base = BUILT_IN_FETCH_AND_XOR_N; + optab = sync_xor_optab; + break; + default: + return false; + } + /* Make sure the expression is of the proper form. */ + if (operand_equal_p (TREE_OPERAND (rhs, 0), loaded_val, 0)) + rhs = TREE_OPERAND (rhs, 1); + else if (commutative_tree_code (TREE_CODE (rhs)) + && operand_equal_p (TREE_OPERAND (rhs, 1), loaded_val, 0)) + rhs = TREE_OPERAND (rhs, 0); + else + return false; + + decl = built_in_decls[base + index + 1]; + itype = TREE_TYPE (TREE_TYPE (decl)); + + if (optab[TYPE_MODE (itype)] == CODE_FOR_nothing) + return false; + + bsi = bsi_last (load_bb); + gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_LOAD); + call = build_call_expr (decl, 2, addr, fold_convert (itype, rhs)); + force_gimple_operand_bsi (&bsi, call, true, NULL_TREE, true, BSI_SAME_STMT); + bsi_remove (&bsi, true); + + bsi = bsi_last (store_bb); + gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_STORE); + bsi_remove (&bsi, true); + bsi = bsi_last (store_bb); + bsi_remove (&bsi, true); + + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa_no_phi); + + return true; +} + +/* A subroutine of expand_omp_atomic. Implement the atomic operation as: + + oldval = *addr; + repeat: + newval = rhs; // with oldval replacing *addr in rhs + oldval = __sync_val_compare_and_swap (addr, oldval, newval); + if (oldval != newval) + goto repeat; + + INDEX is log2 of the size of the data type, and thus usable to find the + index of the builtin decl. */ + +static bool +expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb, + tree addr, tree loaded_val, tree stored_val, + int index) +{ + tree loadedi, storedi, initial, new_storedi, old_vali; + tree type, itype, cmpxchg, iaddr; + block_stmt_iterator bsi; + basic_block loop_header = single_succ (load_bb); + tree phi, x; + edge e; + + cmpxchg = built_in_decls[BUILT_IN_VAL_COMPARE_AND_SWAP_N + index + 1]; + type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr))); + itype = TREE_TYPE (TREE_TYPE (cmpxchg)); + + if (sync_compare_and_swap[TYPE_MODE (itype)] == CODE_FOR_nothing) + return false; + + /* Load the initial value, replacing the OMP_ATOMIC_LOAD. */ + bsi = bsi_last (load_bb); + gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_LOAD); + /* For floating-point values, we'll need to view-convert them to integers + so that we can perform the atomic compare and swap. Simplify the + following code by always setting up the "i"ntegral variables. */ + if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type)) + { + iaddr = create_tmp_var (build_pointer_type (itype), NULL); + x = build_gimple_modify_stmt (iaddr, + fold_convert (TREE_TYPE (iaddr), addr)); + force_gimple_operand_bsi (&bsi, x, true, NULL_TREE, + true, BSI_SAME_STMT); + DECL_NO_TBAA_P (iaddr) = 1; + DECL_POINTER_ALIAS_SET (iaddr) = 0; + loadedi = create_tmp_var (itype, NULL); + if (gimple_in_ssa_p (cfun)) + { + add_referenced_var (iaddr); + add_referenced_var (loadedi); + loadedi = make_ssa_name (loadedi, NULL); + } + } + else + { + iaddr = addr; + loadedi = loaded_val; + } + initial = force_gimple_operand_bsi (&bsi, build_fold_indirect_ref (iaddr), + true, NULL_TREE, true, BSI_SAME_STMT); + + /* Move the value to the LOADEDI temporary. */ + if (gimple_in_ssa_p (cfun)) + { + gcc_assert (phi_nodes (loop_header) == NULL_TREE); + phi = create_phi_node (loadedi, loop_header); + SSA_NAME_DEF_STMT (loadedi) = phi; + SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)), + initial); + } + else + bsi_insert_before (&bsi, + build_gimple_modify_stmt (loadedi, initial), + BSI_SAME_STMT); + if (loadedi != loaded_val) + { + block_stmt_iterator bsi2; + + x = build1 (VIEW_CONVERT_EXPR, type, loadedi); + bsi2 = bsi_start (loop_header); + if (gimple_in_ssa_p (cfun)) + { + x = force_gimple_operand_bsi (&bsi2, x, true, NULL_TREE, + true, BSI_SAME_STMT); + x = build_gimple_modify_stmt (loaded_val, x); + bsi_insert_before (&bsi2, x, BSI_SAME_STMT); + SSA_NAME_DEF_STMT (loaded_val) = x; + } + else + { + x = build_gimple_modify_stmt (loaded_val, x); + force_gimple_operand_bsi (&bsi2, x, true, NULL_TREE, + true, BSI_SAME_STMT); + } + } + bsi_remove (&bsi, true); + + bsi = bsi_last (store_bb); + gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_STORE); + + if (iaddr == addr) + storedi = stored_val; + else + storedi = + force_gimple_operand_bsi (&bsi, + build1 (VIEW_CONVERT_EXPR, itype, + stored_val), true, NULL_TREE, true, + BSI_SAME_STMT); + + /* Build the compare&swap statement. */ + new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi); + new_storedi = force_gimple_operand_bsi (&bsi, + fold_convert (itype, new_storedi), + true, NULL_TREE, + true, BSI_SAME_STMT); + + if (gimple_in_ssa_p (cfun)) + old_vali = loadedi; + else + { + old_vali = create_tmp_var (itype, NULL); + if (gimple_in_ssa_p (cfun)) + add_referenced_var (old_vali); + x = build_gimple_modify_stmt (old_vali, loadedi); + force_gimple_operand_bsi (&bsi, x, true, NULL_TREE, + true, BSI_SAME_STMT); + + x = build_gimple_modify_stmt (loadedi, new_storedi); + force_gimple_operand_bsi (&bsi, x, true, NULL_TREE, + true, BSI_SAME_STMT); + } + + /* Note that we always perform the comparison as an integer, even for + floating point. This allows the atomic operation to properly + succeed even with NaNs and -0.0. */ + x = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali); + x = build3 (COND_EXPR, void_type_node, x, NULL_TREE, NULL_TREE); + bsi_insert_before (&bsi, x, BSI_SAME_STMT); + + /* Update cfg. */ + e = single_succ_edge (store_bb); + e->flags &= ~EDGE_FALLTHRU; + e->flags |= EDGE_FALSE_VALUE; + + e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE); + + /* Copy the new value to loadedi (we already did that before the condition + if we are not in SSA). */ + if (gimple_in_ssa_p (cfun)) + { + phi = phi_nodes (loop_header); + SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi); + } + + /* Remove OMP_ATOMIC_STORE. */ + bsi_remove (&bsi, true); + + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa_no_phi); + + return true; +} + +/* A subroutine of expand_omp_atomic. Implement the atomic operation as: + + GOMP_atomic_start (); + *addr = rhs; + GOMP_atomic_end (); + + The result is not globally atomic, but works so long as all parallel + references are within #pragma omp atomic directives. According to + responses received from omp@openmp.org, appears to be within spec. + Which makes sense, since that's how several other compilers handle + this situation as well. + LOADED_VAL and ADDR are the operands of OMP_ATOMIC_LOAD we're expanding. + STORED_VAL is the operand of the matching OMP_ATOMIC_STORE. + + We replace + OMP_ATOMIC_LOAD (loaded_val, addr) with + loaded_val = *addr; + + and replace + OMP_ATOMIC_ATORE (stored_val) with + *addr = stored_val; +*/ + +static bool +expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb, + tree addr, tree loaded_val, tree stored_val) +{ + block_stmt_iterator bsi; + tree t; + + bsi = bsi_last (load_bb); + gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_LOAD); + + t = built_in_decls[BUILT_IN_GOMP_ATOMIC_START]; + t = build_function_call_expr (t, 0); + force_gimple_operand_bsi (&bsi, t, true, NULL_TREE, true, BSI_SAME_STMT); + + t = build_gimple_modify_stmt (loaded_val, build_fold_indirect_ref (addr)); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (loaded_val) = t; + bsi_insert_before (&bsi, t, BSI_SAME_STMT); + bsi_remove (&bsi, true); + + bsi = bsi_last (store_bb); + gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_STORE); + + t = build_gimple_modify_stmt (build_fold_indirect_ref (unshare_expr (addr)), + stored_val); + bsi_insert_before (&bsi, t, BSI_SAME_STMT); + + t = built_in_decls[BUILT_IN_GOMP_ATOMIC_END]; + t = build_function_call_expr (t, 0); + force_gimple_operand_bsi (&bsi, t, true, NULL_TREE, true, BSI_SAME_STMT); + bsi_remove (&bsi, true); + + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa_no_phi); + return true; +} + +/* Expand an OMP_ATOMIC statement. We try to expand + using expand_omp_atomic_fetch_op. If it failed, we try to + call expand_omp_atomic_pipeline, and if it fails too, the + ultimate fallback is wrapping the operation in a mutex + (expand_omp_atomic_mutex). REGION is the atomic region built + by build_omp_regions_1(). */ + +static void +expand_omp_atomic (struct omp_region *region) +{ + basic_block load_bb = region->entry, store_bb = region->exit; + tree load = last_stmt (load_bb), store = last_stmt (store_bb); + tree loaded_val = TREE_OPERAND (load, 0); + tree addr = TREE_OPERAND (load, 1); + tree stored_val = TREE_OPERAND (store, 0); + tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr))); + HOST_WIDE_INT index; + + /* Make sure the type is one of the supported sizes. */ + index = tree_low_cst (TYPE_SIZE_UNIT (type), 1); + index = exact_log2 (index); + if (index >= 0 && index <= 4) + { + unsigned int align = TYPE_ALIGN_UNIT (type); + + /* __sync builtins require strict data alignment. */ + if (exact_log2 (align) >= index) + { + /* When possible, use specialized atomic update functions. */ + if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) + && store_bb == single_succ (load_bb)) + { + if (expand_omp_atomic_fetch_op (load_bb, addr, + loaded_val, stored_val, index)) + return; + } + + /* If we don't have specialized __sync builtins, try and implement + as a compare and swap loop. */ + if (expand_omp_atomic_pipeline (load_bb, store_bb, addr, + loaded_val, stored_val, index)) + return; + } + } + + /* The ultimate fallback is wrapping the operation in a mutex. */ + expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val); +} + /* Expand the parallel region tree rooted at REGION. Expansion proceeds in depth-first order. Innermost regions are expanded @@ -3389,13 +5042,28 @@ expand_omp (struct omp_region *region) { while (region) { + location_t saved_location; + + /* First, determine whether this is a combined parallel+workshare + region. */ + if (region->type == OMP_PARALLEL) + determine_parallel_type (region); + if (region->inner) expand_omp (region->inner); + saved_location = input_location; + if (EXPR_HAS_LOCATION (last_stmt (region->entry))) + input_location = EXPR_LOCATION (last_stmt (region->entry)); + switch (region->type) { case OMP_PARALLEL: - expand_omp_parallel (region); + expand_omp_taskreg (region); + break; + + case OMP_TASK: + expand_omp_taskreg (region); break; case OMP_FOR: @@ -3421,20 +5089,28 @@ expand_omp (struct omp_region *region) expand_omp_synch (region); break; + case OMP_ATOMIC_LOAD: + expand_omp_atomic (region); + break; + default: gcc_unreachable (); } + input_location = saved_location; region = region->next; } } /* Helper for build_omp_regions. Scan the dominator tree starting at - block BB. PARENT is the region that contains BB. */ + block BB. PARENT is the region that contains BB. If SINGLE_TREE is + true, the function ends once a single tree is built (otherwise, whole + forest of OMP constructs may be built). */ static void -build_omp_regions_1 (basic_block bb, struct omp_region *parent) +build_omp_regions_1 (basic_block bb, struct omp_region *parent, + bool single_tree) { block_stmt_iterator si; tree stmt; @@ -3448,7 +5124,6 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent) stmt = bsi_stmt (si); code = TREE_CODE (stmt); - if (code == OMP_RETURN) { /* STMT is the return point out of region PARENT. Mark it @@ -3458,17 +5133,28 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent) region = parent; region->exit = bb; parent = parent->outer; - - /* If REGION is a parallel region, determine whether it is - a combined parallel+workshare region. */ - if (region->type == OMP_PARALLEL) - determine_parallel_type (region); } + else if (code == OMP_ATOMIC_STORE) + { + /* OMP_ATOMIC_STORE is analogous to OMP_RETURN, but matches with + OMP_ATOMIC_LOAD. */ + gcc_assert (parent); + gcc_assert (parent->type == OMP_ATOMIC_LOAD); + region = parent; + region->exit = bb; + parent = parent->outer; + } + else if (code == OMP_CONTINUE) { gcc_assert (parent); parent->cont = bb; } + else if (code == OMP_SECTIONS_SWITCH) + { + /* OMP_SECTIONS_SWITCH is part of OMP_SECTIONS, and we do nothing for + it. */ ; + } else { /* Otherwise, this directive becomes the parent for a new @@ -3478,12 +5164,44 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent) } } + if (single_tree && !parent) + return; + for (son = first_dom_son (CDI_DOMINATORS, bb); son; son = next_dom_son (CDI_DOMINATORS, son)) - build_omp_regions_1 (son, parent); + build_omp_regions_1 (son, parent, single_tree); +} + +/* Builds the tree of OMP regions rooted at ROOT, storing it to + root_omp_region. */ + +static void +build_omp_regions_root (basic_block root) +{ + gcc_assert (root_omp_region == NULL); + build_omp_regions_1 (root, NULL, true); + gcc_assert (root_omp_region != NULL); } +/* Expands omp construct (and its subconstructs) starting in HEAD. */ + +void +omp_expand_local (basic_block head) +{ + build_omp_regions_root (head); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nOMP region tree\n\n"); + dump_omp_region (dump_file, root_omp_region, 0); + fprintf (dump_file, "\n"); + } + + remove_exit_barriers (root_omp_region); + expand_omp (root_omp_region); + + free_omp_regions (); +} /* Scan the CFG and build a tree of OMP regions. Return the root of the OMP region tree. */ @@ -3493,7 +5211,7 @@ build_omp_regions (void) { gcc_assert (root_omp_region == NULL); calculate_dominance_info (CDI_DOMINATORS); - build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL); + build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL, false); } @@ -3518,8 +5236,6 @@ execute_expand_omp (void) expand_omp (root_omp_region); - free_dominance_info (CDI_DOMINATORS); - free_dominance_info (CDI_POST_DOMINATORS); cleanup_tree_cfg (); free_omp_regions (); @@ -3527,14 +5243,18 @@ execute_expand_omp (void) return 0; } +/* OMP expansion -- the default pass, run before creation of SSA form. */ + static bool gate_expand_omp (void) { - return flag_openmp != 0 && errorcount == 0; + return (flag_openmp != 0 && errorcount == 0); } -struct tree_opt_pass pass_expand_omp = +struct gimple_opt_pass pass_expand_omp = { + { + GIMPLE_PASS, "ompexp", /* name */ gate_expand_omp, /* gate */ execute_expand_omp, /* execute */ @@ -3546,8 +5266,8 @@ struct tree_opt_pass pass_expand_omp = PROP_gimple_lomp, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ - TODO_dump_func, /* todo_flags_finish */ - 0 /* letter */ + TODO_dump_func /* todo_flags_finish */ + } }; /* Routines to lower OpenMP directives into OMP-GIMPLE. */ @@ -3557,14 +5277,15 @@ struct tree_opt_pass pass_expand_omp = static void lower_omp_sections (tree *stmt_p, omp_context *ctx) { - tree new_stmt, stmt, body, bind, block, ilist, olist, new_body; + tree new_stmt, stmt, body, bind, block, ilist, olist, new_body, control; tree t, dlist; tree_stmt_iterator tsi; unsigned i, len; + struct gimplify_ctx gctx; stmt = *stmt_p; - push_gimplify_context (); + push_gimplify_context (&gctx); dlist = NULL; ilist = NULL; @@ -3610,18 +5331,27 @@ lower_omp_sections (tree *stmt_p, omp_context *ctx) olist = NULL_TREE; lower_reduction_clauses (OMP_SECTIONS_CLAUSES (stmt), &olist, ctx); - pop_gimplify_context (NULL_TREE); - record_vars_into (ctx->block_vars, ctx->cb.dst_fn); - - new_stmt = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL); + block = make_node (BLOCK); + new_stmt = build3 (BIND_EXPR, void_type_node, NULL, NULL, block); TREE_SIDE_EFFECTS (new_stmt) = 1; + pop_gimplify_context (new_stmt); + + BIND_EXPR_VARS (new_stmt) + = chainon (BIND_EXPR_VARS (new_stmt), ctx->block_vars); + BLOCK_VARS (block) = BIND_EXPR_VARS (new_stmt); + if (BLOCK_VARS (block)) + TREE_USED (block) = 1; + new_body = alloc_stmt_list (); append_to_statement_list (ilist, &new_body); append_to_statement_list (stmt, &new_body); + append_to_statement_list (make_node (OMP_SECTIONS_SWITCH), &new_body); append_to_statement_list (bind, &new_body); - t = make_node (OMP_CONTINUE); + control = create_tmp_var (unsigned_type_node, ".section"); + t = build2 (OMP_CONTINUE, void_type_node, control, control); + OMP_SECTIONS_CONTROL (stmt) = control; append_to_statement_list (t, &new_body); append_to_statement_list (olist, &new_body); @@ -3658,6 +5388,9 @@ lower_omp_single_simple (tree single_stmt, tree *pre_p) tree t; t = build_call_expr (built_in_decls[BUILT_IN_GOMP_SINGLE_START], 0); + if (TREE_TYPE (t) != boolean_type_node) + t = fold_build2 (NE_EXPR, boolean_type_node, + t, build_int_cst (TREE_TYPE (t), 0)); t = build3 (COND_EXPR, void_type_node, t, OMP_SINGLE_BODY (single_stmt), NULL); gimplify_and_add (t, pre_p); @@ -3750,8 +5483,9 @@ static void lower_omp_single (tree *stmt_p, omp_context *ctx) { tree t, bind, block, single_stmt = *stmt_p, dlist; + struct gimplify_ctx gctx; - push_gimplify_context (); + push_gimplify_context (&gctx); block = make_node (BLOCK); *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block); @@ -3783,6 +5517,8 @@ lower_omp_single (tree *stmt_p, omp_context *ctx) BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars); BLOCK_VARS (block) = BIND_EXPR_VARS (bind); + if (BLOCK_VARS (block)) + TREE_USED (block) = 1; } @@ -3792,8 +5528,9 @@ static void lower_omp_master (tree *stmt_p, omp_context *ctx) { tree bind, block, stmt = *stmt_p, lab = NULL, x; + struct gimplify_ctx gctx; - push_gimplify_context (); + push_gimplify_context (&gctx); block = make_node (BLOCK); *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block); @@ -3831,8 +5568,9 @@ static void lower_omp_ordered (tree *stmt_p, omp_context *ctx) { tree bind, block, stmt = *stmt_p, x; + struct gimplify_ctx gctx; - push_gimplify_context (); + push_gimplify_context (&gctx); block = make_node (BLOCK); *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block); @@ -3875,6 +5613,7 @@ lower_omp_critical (tree *stmt_p, omp_context *ctx) { tree bind, block, stmt = *stmt_p; tree t, lock, unlock, name; + struct gimplify_ctx gctx; name = OMP_CRITICAL_NAME (stmt); if (name) @@ -3924,7 +5663,7 @@ lower_omp_critical (tree *stmt_p, omp_context *ctx) unlock = build_call_expr (unlock, 0); } - push_gimplify_context (); + push_gimplify_context (&gctx); block = make_node (BLOCK); *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block); @@ -3964,37 +5703,38 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, tree *body_p, tree clauses, cond, stmts, vinit, t; enum tree_code cond_code; - cond_code = fd->cond_code; + cond_code = fd->loop.cond_code; cond_code = cond_code == LT_EXPR ? GE_EXPR : LE_EXPR; /* When possible, use a strict equality expression. This can let VRP type optimizations deduce the value and remove a copy. */ - if (host_integerp (fd->step, 0)) + if (host_integerp (fd->loop.step, 0)) { - HOST_WIDE_INT step = TREE_INT_CST_LOW (fd->step); + HOST_WIDE_INT step = TREE_INT_CST_LOW (fd->loop.step); if (step == 1 || step == -1) cond_code = EQ_EXPR; } - cond = build2 (cond_code, boolean_type_node, fd->v, fd->n2); + cond = build2 (cond_code, boolean_type_node, fd->loop.v, fd->loop.n2); clauses = OMP_FOR_CLAUSES (fd->for_stmt); stmts = NULL; lower_lastprivate_clauses (clauses, cond, &stmts, ctx); if (stmts != NULL) { - append_to_statement_list (stmts, dlist); + append_to_statement_list (*dlist, &stmts); + *dlist = stmts; /* Optimize: v = 0; is usually cheaper than v = some_other_constant. */ - vinit = fd->n1; + vinit = fd->loop.n1; if (cond_code == EQ_EXPR - && host_integerp (fd->n2, 0) - && ! integer_zerop (fd->n2)) - vinit = build_int_cst (TREE_TYPE (fd->v), 0); + && host_integerp (fd->loop.n2, 0) + && ! integer_zerop (fd->loop.n2)) + vinit = build_int_cst (TREE_TYPE (fd->loop.v), 0); /* Initialize the iterator variable, so that threads that don't execute any iterations don't execute the lastprivate clauses by accident. */ - t = build_gimple_modify_stmt (fd->v, vinit); + t = build_gimple_modify_stmt (fd->loop.v, vinit); gimplify_and_add (t, body_p); } } @@ -4005,30 +5745,35 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, tree *body_p, static void lower_omp_for (tree *stmt_p, omp_context *ctx) { - tree t, stmt, ilist, dlist, new_stmt, *body_p, *rhs_p; + tree t, stmt, ilist, dlist, new_stmt, block, *body_p, *rhs_p; struct omp_for_data fd; + int i; + struct gimplify_ctx gctx; stmt = *stmt_p; - push_gimplify_context (); + push_gimplify_context (&gctx); lower_omp (&OMP_FOR_PRE_BODY (stmt), ctx); lower_omp (&OMP_FOR_BODY (stmt), ctx); + block = make_node (BLOCK); + new_stmt = build3 (BIND_EXPR, void_type_node, NULL, NULL, block); + TREE_SIDE_EFFECTS (new_stmt) = 1; + body_p = &BIND_EXPR_BODY (new_stmt); + /* Move declaration of temporaries in the loop body before we make it go away. */ if (TREE_CODE (OMP_FOR_BODY (stmt)) == BIND_EXPR) - record_vars_into (BIND_EXPR_VARS (OMP_FOR_BODY (stmt)), ctx->cb.dst_fn); - - new_stmt = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL); - TREE_SIDE_EFFECTS (new_stmt) = 1; - body_p = &BIND_EXPR_BODY (new_stmt); + BIND_EXPR_VARS (new_stmt) + = chainon (BIND_EXPR_VARS (new_stmt), + BIND_EXPR_VARS (OMP_FOR_BODY (stmt))); /* The pre-body and input clauses go before the lowered OMP_FOR. */ ilist = NULL; dlist = NULL; - append_to_statement_list (OMP_FOR_PRE_BODY (stmt), body_p); lower_rec_input_clauses (OMP_FOR_CLAUSES (stmt), body_p, &dlist, ctx); + append_to_statement_list (OMP_FOR_PRE_BODY (stmt), body_p); /* Lower the header expressions. At this point, we can assume that the header is of the form: @@ -4037,20 +5782,24 @@ lower_omp_for (tree *stmt_p, omp_context *ctx) We just need to make sure that VAL1, VAL2 and VAL3 are lowered using the .omp_data_s mapping, if needed. */ - rhs_p = &GIMPLE_STMT_OPERAND (OMP_FOR_INIT (stmt), 1); - if (!is_gimple_min_invariant (*rhs_p)) - *rhs_p = get_formal_tmp_var (*rhs_p, body_p); - - rhs_p = &TREE_OPERAND (OMP_FOR_COND (stmt), 1); - if (!is_gimple_min_invariant (*rhs_p)) - *rhs_p = get_formal_tmp_var (*rhs_p, body_p); - - rhs_p = &TREE_OPERAND (GIMPLE_STMT_OPERAND (OMP_FOR_INCR (stmt), 1), 1); - if (!is_gimple_min_invariant (*rhs_p)) - *rhs_p = get_formal_tmp_var (*rhs_p, body_p); + for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (stmt)); i++) + { + rhs_p = &GIMPLE_STMT_OPERAND (TREE_VEC_ELT (OMP_FOR_INIT (stmt), i), 1); + if (!is_gimple_min_invariant (*rhs_p)) + *rhs_p = get_formal_tmp_var (*rhs_p, body_p); + + rhs_p = &TREE_OPERAND (TREE_VEC_ELT (OMP_FOR_COND (stmt), i), 1); + if (!is_gimple_min_invariant (*rhs_p)) + *rhs_p = get_formal_tmp_var (*rhs_p, body_p); + + rhs_p = &TREE_OPERAND (GIMPLE_STMT_OPERAND + (TREE_VEC_ELT (OMP_FOR_INCR (stmt), i), 1), 1); + if (!is_gimple_min_invariant (*rhs_p)) + *rhs_p = get_formal_tmp_var (*rhs_p, body_p); + } /* Once lowered, extract the bounds and clauses. */ - extract_omp_for_data (stmt, &fd); + extract_omp_for_data (stmt, &fd, NULL); lower_omp_for_lastprivate (&fd, body_p, &dlist, ctx); @@ -4058,7 +5807,7 @@ lower_omp_for (tree *stmt_p, omp_context *ctx) append_to_statement_list (OMP_FOR_BODY (stmt), body_p); - t = make_node (OMP_CONTINUE); + t = build2 (OMP_CONTINUE, void_type_node, fd.loop.v, fd.loop.v); append_to_statement_list (t, body_p); /* After the loop, add exit clauses. */ @@ -4072,39 +5821,362 @@ lower_omp_for (tree *stmt_p, omp_context *ctx) OMP_RETURN_NOWAIT (t) = fd.have_nowait; append_to_statement_list (t, body_p); - pop_gimplify_context (NULL_TREE); - record_vars_into (ctx->block_vars, ctx->cb.dst_fn); + pop_gimplify_context (new_stmt); + BIND_EXPR_VARS (new_stmt) + = chainon (BIND_EXPR_VARS (new_stmt), ctx->block_vars); + BLOCK_VARS (block) = BIND_EXPR_VARS (new_stmt); + if (BLOCK_VARS (block)) + TREE_USED (block) = 1; OMP_FOR_BODY (stmt) = NULL_TREE; OMP_FOR_PRE_BODY (stmt) = NULL_TREE; *stmt_p = new_stmt; } +/* Callback for walk_stmts. Check if *TP only contains OMP_FOR + or OMP_PARALLEL. */ + +static tree +check_combined_parallel (tree *tp, int *walk_subtrees, void *data) +{ + struct walk_stmt_info *wi = (struct walk_stmt_info *) data; + int *info = (int *) wi->info; + + *walk_subtrees = 0; + switch (TREE_CODE (*tp)) + { + case OMP_FOR: + case OMP_SECTIONS: + *info = *info == 0 ? 1 : -1; + break; + default: + *info = -1; + break; + } + return NULL; +} + +struct omp_taskcopy_context +{ + /* This field must be at the beginning, as we do "inheritance": Some + callback functions for tree-inline.c (e.g., omp_copy_decl) + receive a copy_body_data pointer that is up-casted to an + omp_context pointer. */ + copy_body_data cb; + omp_context *ctx; +}; + +static tree +task_copyfn_copy_decl (tree var, copy_body_data *cb) +{ + struct omp_taskcopy_context *tcctx = (struct omp_taskcopy_context *) cb; + + if (splay_tree_lookup (tcctx->ctx->sfield_map, (splay_tree_key) var)) + return create_tmp_var (TREE_TYPE (var), NULL); + + return var; +} + +static tree +task_copyfn_remap_type (struct omp_taskcopy_context *tcctx, tree orig_type) +{ + tree name, new_fields = NULL, type, f; + + type = lang_hooks.types.make_type (RECORD_TYPE); + name = DECL_NAME (TYPE_NAME (orig_type)); + name = build_decl (TYPE_DECL, name, type); + TYPE_NAME (type) = name; + + for (f = TYPE_FIELDS (orig_type); f ; f = TREE_CHAIN (f)) + { + tree new_f = copy_node (f); + DECL_CONTEXT (new_f) = type; + TREE_TYPE (new_f) = remap_type (TREE_TYPE (f), &tcctx->cb); + TREE_CHAIN (new_f) = new_fields; + walk_tree (&DECL_SIZE (new_f), copy_body_r, &tcctx->cb, NULL); + walk_tree (&DECL_SIZE_UNIT (new_f), copy_body_r, &tcctx->cb, NULL); + walk_tree (&DECL_FIELD_OFFSET (new_f), copy_body_r, &tcctx->cb, NULL); + new_fields = new_f; + *pointer_map_insert (tcctx->cb.decl_map, f) = new_f; + } + TYPE_FIELDS (type) = nreverse (new_fields); + layout_type (type); + return type; +} + +/* Create task copyfn. */ + +static void +create_task_copyfn (tree task_stmt, omp_context *ctx) +{ + struct function *child_cfun; + tree child_fn, t, c, src, dst, f, sf, arg, sarg, decl; + tree record_type, srecord_type, bind, list; + bool record_needs_remap = false, srecord_needs_remap = false; + splay_tree_node n; + struct omp_taskcopy_context tcctx; + struct gimplify_ctx gctx; + + child_fn = OMP_TASK_COPYFN (task_stmt); + child_cfun = DECL_STRUCT_FUNCTION (child_fn); + gcc_assert (child_cfun->cfg == NULL); + child_cfun->dont_save_pending_sizes_p = 1; + DECL_SAVED_TREE (child_fn) = alloc_stmt_list (); + + /* Reset DECL_CONTEXT on function arguments. */ + for (t = DECL_ARGUMENTS (child_fn); t; t = TREE_CHAIN (t)) + DECL_CONTEXT (t) = child_fn; + + /* Populate the function. */ + push_gimplify_context (&gctx); + current_function_decl = child_fn; + + bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL); + TREE_SIDE_EFFECTS (bind) = 1; + list = NULL; + DECL_SAVED_TREE (child_fn) = bind; + DECL_SOURCE_LOCATION (child_fn) = EXPR_LOCATION (task_stmt); + + /* Remap src and dst argument types if needed. */ + record_type = ctx->record_type; + srecord_type = ctx->srecord_type; + for (f = TYPE_FIELDS (record_type); f ; f = TREE_CHAIN (f)) + if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn)) + { + record_needs_remap = true; + break; + } + for (f = TYPE_FIELDS (srecord_type); f ; f = TREE_CHAIN (f)) + if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn)) + { + srecord_needs_remap = true; + break; + } + + if (record_needs_remap || srecord_needs_remap) + { + memset (&tcctx, '\0', sizeof (tcctx)); + tcctx.cb.src_fn = ctx->cb.src_fn; + tcctx.cb.dst_fn = child_fn; + tcctx.cb.src_node = cgraph_node (tcctx.cb.src_fn); + tcctx.cb.dst_node = tcctx.cb.src_node; + tcctx.cb.src_cfun = ctx->cb.src_cfun; + tcctx.cb.copy_decl = task_copyfn_copy_decl; + tcctx.cb.eh_region = -1; + tcctx.cb.transform_call_graph_edges = CB_CGE_MOVE; + tcctx.cb.decl_map = pointer_map_create (); + tcctx.ctx = ctx; + + if (record_needs_remap) + record_type = task_copyfn_remap_type (&tcctx, record_type); + if (srecord_needs_remap) + srecord_type = task_copyfn_remap_type (&tcctx, srecord_type); + } + else + tcctx.cb.decl_map = NULL; + + push_cfun (child_cfun); + + arg = DECL_ARGUMENTS (child_fn); + TREE_TYPE (arg) = build_pointer_type (record_type); + sarg = TREE_CHAIN (arg); + TREE_TYPE (sarg) = build_pointer_type (srecord_type); + + /* First pass: initialize temporaries used in record_type and srecord_type + sizes and field offsets. */ + if (tcctx.cb.decl_map) + for (c = OMP_TASK_CLAUSES (task_stmt); c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) + { + tree *p; + + decl = OMP_CLAUSE_DECL (c); + p = (tree *) pointer_map_contains (tcctx.cb.decl_map, decl); + if (p == NULL) + continue; + n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl); + sf = (tree) n->value; + sf = *(tree *) pointer_map_contains (tcctx.cb.decl_map, sf); + src = build_fold_indirect_ref (sarg); + src = build3 (COMPONENT_REF, TREE_TYPE (sf), src, sf, NULL); + t = build_gimple_modify_stmt (*p, src); + append_to_statement_list (t, &list); + } + + /* Second pass: copy shared var pointers and copy construct non-VLA + firstprivate vars. */ + for (c = OMP_TASK_CLAUSES (task_stmt); c; c = OMP_CLAUSE_CHAIN (c)) + switch (OMP_CLAUSE_CODE (c)) + { + case OMP_CLAUSE_SHARED: + decl = OMP_CLAUSE_DECL (c); + n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl); + if (n == NULL) + break; + f = (tree) n->value; + if (tcctx.cb.decl_map) + f = *(tree *) pointer_map_contains (tcctx.cb.decl_map, f); + n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl); + sf = (tree) n->value; + if (tcctx.cb.decl_map) + sf = *(tree *) pointer_map_contains (tcctx.cb.decl_map, sf); + src = build_fold_indirect_ref (sarg); + src = build3 (COMPONENT_REF, TREE_TYPE (sf), src, sf, NULL); + dst = build_fold_indirect_ref (arg); + dst = build3 (COMPONENT_REF, TREE_TYPE (f), dst, f, NULL); + t = build_gimple_modify_stmt (dst, src); + append_to_statement_list (t, &list); + break; + case OMP_CLAUSE_FIRSTPRIVATE: + decl = OMP_CLAUSE_DECL (c); + if (is_variable_sized (decl)) + break; + n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl); + if (n == NULL) + break; + f = (tree) n->value; + if (tcctx.cb.decl_map) + f = *(tree *) pointer_map_contains (tcctx.cb.decl_map, f); + n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl); + if (n != NULL) + { + sf = (tree) n->value; + if (tcctx.cb.decl_map) + sf = *(tree *) pointer_map_contains (tcctx.cb.decl_map, sf); + src = build_fold_indirect_ref (sarg); + src = build3 (COMPONENT_REF, TREE_TYPE (sf), src, sf, NULL); + if (use_pointer_for_field (decl, NULL) || is_reference (decl)) + src = build_fold_indirect_ref (src); + } + else + src = decl; + dst = build_fold_indirect_ref (arg); + dst = build3 (COMPONENT_REF, TREE_TYPE (f), dst, f, NULL); + t = lang_hooks.decls.omp_clause_copy_ctor (c, dst, src); + append_to_statement_list (t, &list); + break; + case OMP_CLAUSE_PRIVATE: + if (! OMP_CLAUSE_PRIVATE_OUTER_REF (c)) + break; + decl = OMP_CLAUSE_DECL (c); + n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl); + f = (tree) n->value; + if (tcctx.cb.decl_map) + f = *(tree *) pointer_map_contains (tcctx.cb.decl_map, f); + n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl); + if (n != NULL) + { + sf = (tree) n->value; + if (tcctx.cb.decl_map) + sf = *(tree *) pointer_map_contains (tcctx.cb.decl_map, sf); + src = build_fold_indirect_ref (sarg); + src = build3 (COMPONENT_REF, TREE_TYPE (sf), src, sf, NULL); + if (use_pointer_for_field (decl, NULL)) + src = build_fold_indirect_ref (src); + } + else + src = decl; + dst = build_fold_indirect_ref (arg); + dst = build3 (COMPONENT_REF, TREE_TYPE (f), dst, f, NULL); + t = build_gimple_modify_stmt (dst, src); + append_to_statement_list (t, &list); + break; + default: + break; + } + + /* Last pass: handle VLA firstprivates. */ + if (tcctx.cb.decl_map) + for (c = OMP_TASK_CLAUSES (task_stmt); c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) + { + tree ind, ptr, df; + + decl = OMP_CLAUSE_DECL (c); + if (!is_variable_sized (decl)) + continue; + n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl); + if (n == NULL) + continue; + f = (tree) n->value; + f = *(tree *) pointer_map_contains (tcctx.cb.decl_map, f); + gcc_assert (DECL_HAS_VALUE_EXPR_P (decl)); + ind = DECL_VALUE_EXPR (decl); + gcc_assert (TREE_CODE (ind) == INDIRECT_REF); + gcc_assert (DECL_P (TREE_OPERAND (ind, 0))); + n = splay_tree_lookup (ctx->sfield_map, + (splay_tree_key) TREE_OPERAND (ind, 0)); + sf = (tree) n->value; + sf = *(tree *) pointer_map_contains (tcctx.cb.decl_map, sf); + src = build_fold_indirect_ref (sarg); + src = build3 (COMPONENT_REF, TREE_TYPE (sf), src, sf, NULL); + src = build_fold_indirect_ref (src); + dst = build_fold_indirect_ref (arg); + dst = build3 (COMPONENT_REF, TREE_TYPE (f), dst, f, NULL); + t = lang_hooks.decls.omp_clause_copy_ctor (c, dst, src); + append_to_statement_list (t, &list); + n = splay_tree_lookup (ctx->field_map, + (splay_tree_key) TREE_OPERAND (ind, 0)); + df = (tree) n->value; + df = *(tree *) pointer_map_contains (tcctx.cb.decl_map, df); + ptr = build_fold_indirect_ref (arg); + ptr = build3 (COMPONENT_REF, TREE_TYPE (df), ptr, df, NULL); + t = build_gimple_modify_stmt (ptr, build_fold_addr_expr (dst)); + append_to_statement_list (t, &list); + } + + t = build1 (RETURN_EXPR, void_type_node, NULL); + append_to_statement_list (t, &list); + + if (tcctx.cb.decl_map) + pointer_map_destroy (tcctx.cb.decl_map); + pop_gimplify_context (NULL); + BIND_EXPR_BODY (bind) = list; + pop_cfun (); + current_function_decl = ctx->cb.src_fn; +} -/* Lower the OpenMP parallel directive in *STMT_P. CTX holds context +/* Lower the OpenMP parallel or task directive in *STMT_P. CTX holds context information for the directive. */ static void -lower_omp_parallel (tree *stmt_p, omp_context *ctx) +lower_omp_taskreg (tree *stmt_p, omp_context *ctx) { tree clauses, par_bind, par_body, new_body, bind; tree olist, ilist, par_olist, par_ilist; tree stmt, child_fn, t; + struct gimplify_ctx gctx; stmt = *stmt_p; - clauses = OMP_PARALLEL_CLAUSES (stmt); - par_bind = OMP_PARALLEL_BODY (stmt); + clauses = OMP_TASKREG_CLAUSES (stmt); + par_bind = OMP_TASKREG_BODY (stmt); par_body = BIND_EXPR_BODY (par_bind); child_fn = ctx->cb.dst_fn; + if (TREE_CODE (stmt) == OMP_PARALLEL && !OMP_PARALLEL_COMBINED (stmt)) + { + struct walk_stmt_info wi; + int ws_num = 0; + + memset (&wi, 0, sizeof (wi)); + wi.callback = check_combined_parallel; + wi.info = &ws_num; + wi.val_only = true; + walk_stmts (&wi, &par_bind); + if (ws_num == 1) + OMP_PARALLEL_COMBINED (stmt) = 1; + } + if (ctx->srecord_type) + create_task_copyfn (stmt, ctx); - push_gimplify_context (); + push_gimplify_context (&gctx); par_olist = NULL_TREE; par_ilist = NULL_TREE; lower_rec_input_clauses (clauses, &par_ilist, &par_olist, ctx); lower_omp (&par_body, ctx); - lower_reduction_clauses (clauses, &par_olist, ctx); + if (TREE_CODE (stmt) == OMP_PARALLEL) + lower_reduction_clauses (clauses, &par_olist, ctx); /* Declare all the variables created by mapping and the variables declared in the scope of the parallel body. */ @@ -4113,8 +6185,10 @@ lower_omp_parallel (tree *stmt_p, omp_context *ctx) if (ctx->record_type) { - ctx->sender_decl = create_tmp_var (ctx->record_type, ".omp_data_o"); - OMP_PARALLEL_DATA_ARG (stmt) = ctx->sender_decl; + ctx->sender_decl + = create_tmp_var (ctx->srecord_type ? ctx->srecord_type + : ctx->record_type, ".omp_data_o"); + OMP_TASKREG_DATA_ARG (stmt) = ctx->sender_decl; } olist = NULL_TREE; @@ -4123,9 +6197,10 @@ lower_omp_parallel (tree *stmt_p, omp_context *ctx) lower_send_shared_vars (&ilist, &olist, ctx); /* Once all the expansions are done, sequence all the different - fragments inside OMP_PARALLEL_BODY. */ - bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL); - append_to_statement_list (ilist, &BIND_EXPR_BODY (bind)); + fragments inside OMP_TASKREG_BODY. */ + bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, + BIND_EXPR_BLOCK (par_bind)); + TREE_SIDE_EFFECTS (bind) = 1; new_body = alloc_stmt_list (); @@ -4144,194 +6219,204 @@ lower_omp_parallel (tree *stmt_p, omp_context *ctx) maybe_catch_exception (&new_body); t = make_node (OMP_RETURN); append_to_statement_list (t, &new_body); - OMP_PARALLEL_BODY (stmt) = new_body; + OMP_TASKREG_BODY (stmt) = new_body; append_to_statement_list (stmt, &BIND_EXPR_BODY (bind)); - append_to_statement_list (olist, &BIND_EXPR_BODY (bind)); + if (ilist || olist) + { + append_to_statement_list (bind, &ilist); + append_to_statement_list (olist, &ilist); + bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL); + TREE_SIDE_EFFECTS (bind) = 1; + append_to_statement_list (ilist, &BIND_EXPR_BODY (bind)); + } *stmt_p = bind; pop_gimplify_context (NULL_TREE); } - -/* Pass *TP back through the gimplifier within the context determined by WI. - This handles replacement of DECL_VALUE_EXPR, as well as adjusting the - flags on ADDR_EXPR. */ - -static void -lower_regimplify (tree *tp, struct walk_stmt_info *wi) -{ - enum gimplify_status gs; - tree pre = NULL; - - if (wi->is_lhs) - gs = gimplify_expr (tp, &pre, NULL, is_gimple_lvalue, fb_lvalue); - else if (wi->val_only) - gs = gimplify_expr (tp, &pre, NULL, is_gimple_val, fb_rvalue); - else - gs = gimplify_expr (tp, &pre, NULL, is_gimple_formal_tmp_var, fb_rvalue); - gcc_assert (gs == GS_ALL_DONE); - - if (pre) - tsi_link_before (&wi->tsi, pre, TSI_SAME_STMT); -} - -/* Copy EXP into a temporary. Insert the initialization statement before TSI. */ +/* Callback for lower_omp_1. Return non-NULL if *tp needs to be + regimplified. */ static tree -init_tmp_var (tree exp, tree_stmt_iterator *tsi) +lower_omp_2 (tree *tp, int *walk_subtrees, void *data) { - tree t, stmt; - - t = create_tmp_var (TREE_TYPE (exp), NULL); - DECL_GIMPLE_REG_P (t) = 1; - stmt = build_gimple_modify_stmt (t, exp); - SET_EXPR_LOCUS (stmt, EXPR_LOCUS (tsi_stmt (*tsi))); - tsi_link_before (tsi, stmt, TSI_SAME_STMT); - - return t; -} + tree t = *tp; + omp_context *ctx = (omp_context *) data; -/* Similarly, but copy from the temporary and insert the statement - after the iterator. */ + /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */ + if (TREE_CODE (t) == VAR_DECL && ctx && DECL_HAS_VALUE_EXPR_P (t)) + return t; -static tree -save_tmp_var (tree exp, tree_stmt_iterator *tsi) -{ - tree t, stmt; + if (task_shared_vars + && DECL_P (t) + && bitmap_bit_p (task_shared_vars, DECL_UID (t))) + return t; - t = create_tmp_var (TREE_TYPE (exp), NULL); - DECL_GIMPLE_REG_P (t) = 1; - stmt = build_gimple_modify_stmt (exp, t); - SET_EXPR_LOCUS (stmt, EXPR_LOCUS (tsi_stmt (*tsi))); - tsi_link_after (tsi, stmt, TSI_SAME_STMT); + /* If a global variable has been privatized, TREE_CONSTANT on + ADDR_EXPR might be wrong. */ + if (ctx && TREE_CODE (t) == ADDR_EXPR) + recompute_tree_invariant_for_addr_expr (t); - return t; + *walk_subtrees = !TYPE_P (t) && !DECL_P (t); + return NULL_TREE; } -/* Callback for walk_stmts. Lower the OpenMP directive pointed by TP. */ - -static tree -lower_omp_1 (tree *tp, int *walk_subtrees, void *data) +static void +lower_omp_1 (tree *tp, omp_context *ctx, tree_stmt_iterator *tsi) { - struct walk_stmt_info *wi = data; - omp_context *ctx = wi->info; tree t = *tp; + if (!t) + return; + + if (EXPR_HAS_LOCATION (t)) + input_location = EXPR_LOCATION (t); + /* If we have issued syntax errors, avoid doing any heavy lifting. Just replace the OpenMP directives with a NOP to avoid confusing RTL expansion. */ - if (errorcount && OMP_DIRECTIVE_P (*tp)) + if (errorcount && OMP_DIRECTIVE_P (t)) { *tp = build_empty_stmt (); - return NULL_TREE; + return; } - *walk_subtrees = 0; - switch (TREE_CODE (*tp)) + switch (TREE_CODE (t)) { + case STATEMENT_LIST: + { + tree_stmt_iterator i; + for (i = tsi_start (t); !tsi_end_p (i); tsi_next (&i)) + lower_omp_1 (tsi_stmt_ptr (i), ctx, &i); + } + break; + + case COND_EXPR: + lower_omp_1 (&COND_EXPR_THEN (t), ctx, NULL); + lower_omp_1 (&COND_EXPR_ELSE (t), ctx, NULL); + if ((ctx || task_shared_vars) + && walk_tree (&COND_EXPR_COND (t), lower_omp_2, ctx, NULL)) + { + tree pre = NULL; + gimplify_expr (&COND_EXPR_COND (t), &pre, NULL, + is_gimple_condexpr, fb_rvalue); + if (pre) + { + if (tsi) + tsi_link_before (tsi, pre, TSI_SAME_STMT); + else + { + append_to_statement_list (t, &pre); + *tp = pre; + } + } + } + break; + case CATCH_EXPR: + lower_omp_1 (&CATCH_BODY (t), ctx, NULL); + break; + case EH_FILTER_EXPR: + lower_omp_1 (&EH_FILTER_FAILURE (t), ctx, NULL); + break; + case TRY_CATCH_EXPR: + case TRY_FINALLY_EXPR: + lower_omp_1 (&TREE_OPERAND (t, 0), ctx, NULL); + lower_omp_1 (&TREE_OPERAND (t, 1), ctx, NULL); + break; + case BIND_EXPR: + lower_omp_1 (&BIND_EXPR_BODY (t), ctx, NULL); + break; + case RETURN_EXPR: + lower_omp_1 (&TREE_OPERAND (t, 0), ctx, NULL); + break; + case OMP_PARALLEL: + case OMP_TASK: ctx = maybe_lookup_ctx (t); - lower_omp_parallel (tp, ctx); + lower_omp_taskreg (tp, ctx); break; - case OMP_FOR: ctx = maybe_lookup_ctx (t); gcc_assert (ctx); lower_omp_for (tp, ctx); break; - case OMP_SECTIONS: ctx = maybe_lookup_ctx (t); gcc_assert (ctx); lower_omp_sections (tp, ctx); break; - case OMP_SINGLE: ctx = maybe_lookup_ctx (t); gcc_assert (ctx); lower_omp_single (tp, ctx); break; - case OMP_MASTER: ctx = maybe_lookup_ctx (t); gcc_assert (ctx); lower_omp_master (tp, ctx); break; - case OMP_ORDERED: ctx = maybe_lookup_ctx (t); gcc_assert (ctx); lower_omp_ordered (tp, ctx); break; - case OMP_CRITICAL: ctx = maybe_lookup_ctx (t); gcc_assert (ctx); lower_omp_critical (tp, ctx); break; - case VAR_DECL: - if (ctx && DECL_HAS_VALUE_EXPR_P (t)) + default: + if ((ctx || task_shared_vars) + && walk_tree (tp, lower_omp_2, ctx, NULL)) { - lower_regimplify (&t, wi); - if (wi->val_only) + /* The gimplifier doesn't gimplify CALL_EXPR_STATIC_CHAIN. + Handle that here. */ + tree call = get_call_expr_in (t); + if (call + && CALL_EXPR_STATIC_CHAIN (call) + && walk_tree (&CALL_EXPR_STATIC_CHAIN (call), lower_omp_2, + ctx, NULL)) { - if (wi->is_lhs) - t = save_tmp_var (t, &wi->tsi); - else - t = init_tmp_var (t, &wi->tsi); + tree pre = NULL; + gimplify_expr (&CALL_EXPR_STATIC_CHAIN (call), &pre, NULL, + is_gimple_val, fb_rvalue); + if (pre) + { + if (tsi) + tsi_link_before (tsi, pre, TSI_SAME_STMT); + else + { + append_to_statement_list (t, &pre); + lower_omp_1 (&pre, ctx, NULL); + *tp = pre; + return; + } + } } - *tp = t; - } - break; - case ADDR_EXPR: - if (ctx) - lower_regimplify (tp, wi); - break; - - case ARRAY_REF: - case ARRAY_RANGE_REF: - case REALPART_EXPR: - case IMAGPART_EXPR: - case COMPONENT_REF: - case VIEW_CONVERT_EXPR: - if (ctx) - lower_regimplify (tp, wi); - break; - - case INDIRECT_REF: - if (ctx) - { - wi->is_lhs = false; - wi->val_only = true; - lower_regimplify (&TREE_OPERAND (t, 0), wi); + if (tsi == NULL) + gimplify_stmt (tp); + else + { + tree pre = NULL; + gimplify_expr (tp, &pre, NULL, is_gimple_stmt, fb_none); + if (pre) + tsi_link_before (tsi, pre, TSI_SAME_STMT); + } } break; - - default: - if (!TYPE_P (t) && !DECL_P (t)) - *walk_subtrees = 1; - break; } - - return NULL_TREE; } static void lower_omp (tree *stmt_p, omp_context *ctx) { - struct walk_stmt_info wi; - - memset (&wi, 0, sizeof (wi)); - wi.callback = lower_omp_1; - wi.info = ctx; - wi.val_only = true; - wi.want_locations = true; - - walk_stmts (&wi, stmt_p); + location_t saved_location = input_location; + lower_omp_1 (stmt_p, ctx, NULL); + input_location = saved_location; } /* Main entry point. */ @@ -4343,16 +6428,25 @@ execute_lower_omp (void) delete_omp_context); scan_omp (&DECL_SAVED_TREE (current_function_decl), NULL); - gcc_assert (parallel_nesting_level == 0); + gcc_assert (taskreg_nesting_level == 0); if (all_contexts->root) - lower_omp (&DECL_SAVED_TREE (current_function_decl), NULL); + { + struct gimplify_ctx gctx; + + if (task_shared_vars) + push_gimplify_context (&gctx); + lower_omp (&DECL_SAVED_TREE (current_function_decl), NULL); + if (task_shared_vars) + pop_gimplify_context (NULL); + } if (all_contexts) { splay_tree_delete (all_contexts); all_contexts = NULL; } + BITMAP_FREE (task_shared_vars); return 0; } @@ -4362,8 +6456,10 @@ gate_lower_omp (void) return flag_openmp != 0; } -struct tree_opt_pass pass_lower_omp = +struct gimple_opt_pass pass_lower_omp = { + { + GIMPLE_PASS, "omplower", /* name */ gate_lower_omp, /* gate */ execute_lower_omp, /* execute */ @@ -4375,8 +6471,8 @@ struct tree_opt_pass pass_lower_omp = PROP_gimple_lomp, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ - TODO_dump_func, /* todo_flags_finish */ - 0 /* letter */ + TODO_dump_func /* todo_flags_finish */ + } }; /* The following is a utility to diagnose OpenMP structured block violations. @@ -4397,7 +6493,7 @@ diagnose_sb_0 (tree *stmt_p, tree branch_ctx, tree label_ctx) return false; /* Try to avoid confusing the user by producing and error message - with correct "exit" or "enter" verbage. We prefer "exit" + with correct "exit" or "enter" verbiage. We prefer "exit" unless we can show that LABEL_CTX is nested within BRANCH_CTX. */ if (branch_ctx == NULL) exit_p = false; @@ -4429,15 +6525,17 @@ diagnose_sb_0 (tree *stmt_p, tree branch_ctx, tree label_ctx) static tree diagnose_sb_1 (tree *tp, int *walk_subtrees, void *data) { - struct walk_stmt_info *wi = data; + struct walk_stmt_info *wi = (struct walk_stmt_info *) data; tree context = (tree) wi->info; tree inner_context; tree t = *tp; + int i; *walk_subtrees = 0; switch (TREE_CODE (t)) { case OMP_PARALLEL: + case OMP_TASK: case OMP_SECTIONS: case OMP_SINGLE: walk_tree (&OMP_CLAUSES (t), diagnose_sb_1, wi, NULL); @@ -4457,9 +6555,15 @@ diagnose_sb_1 (tree *tp, int *walk_subtrees, void *data) walk_tree (&OMP_FOR_CLAUSES (t), diagnose_sb_1, wi, NULL); inner_context = tree_cons (NULL, t, context); wi->info = inner_context; - walk_tree (&OMP_FOR_INIT (t), diagnose_sb_1, wi, NULL); - walk_tree (&OMP_FOR_COND (t), diagnose_sb_1, wi, NULL); - walk_tree (&OMP_FOR_INCR (t), diagnose_sb_1, wi, NULL); + for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (t)); i++) + { + walk_tree (&TREE_VEC_ELT (OMP_FOR_INIT (t), i), diagnose_sb_1, + wi, NULL); + walk_tree (&TREE_VEC_ELT (OMP_FOR_COND (t), i), diagnose_sb_1, + wi, NULL); + walk_tree (&TREE_VEC_ELT (OMP_FOR_INCR (t), i), diagnose_sb_1, + wi, NULL); + } walk_stmts (wi, &OMP_FOR_PRE_BODY (t)); walk_stmts (wi, &OMP_FOR_BODY (t)); wi->info = context; @@ -4483,15 +6587,17 @@ diagnose_sb_1 (tree *tp, int *walk_subtrees, void *data) static tree diagnose_sb_2 (tree *tp, int *walk_subtrees, void *data) { - struct walk_stmt_info *wi = data; + struct walk_stmt_info *wi = (struct walk_stmt_info *) data; tree context = (tree) wi->info; splay_tree_node n; tree t = *tp; + int i; *walk_subtrees = 0; switch (TREE_CODE (t)) { case OMP_PARALLEL: + case OMP_TASK: case OMP_SECTIONS: case OMP_SINGLE: walk_tree (&OMP_CLAUSES (t), diagnose_sb_2, wi, NULL); @@ -4508,9 +6614,15 @@ diagnose_sb_2 (tree *tp, int *walk_subtrees, void *data) case OMP_FOR: walk_tree (&OMP_FOR_CLAUSES (t), diagnose_sb_2, wi, NULL); wi->info = t; - walk_tree (&OMP_FOR_INIT (t), diagnose_sb_2, wi, NULL); - walk_tree (&OMP_FOR_COND (t), diagnose_sb_2, wi, NULL); - walk_tree (&OMP_FOR_INCR (t), diagnose_sb_2, wi, NULL); + for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (t)); i++) + { + walk_tree (&TREE_VEC_ELT (OMP_FOR_INIT (t), i), diagnose_sb_2, + wi, NULL); + walk_tree (&TREE_VEC_ELT (OMP_FOR_COND (t), i), diagnose_sb_2, + wi, NULL); + walk_tree (&TREE_VEC_ELT (OMP_FOR_INCR (t), i), diagnose_sb_2, + wi, NULL); + } walk_stmts (wi, &OMP_FOR_PRE_BODY (t)); walk_stmts (wi, &OMP_FOR_BODY (t)); wi->info = context;