marshalling to implement data sharing and copying clauses.
Contributed by Diego Novillo <dnovillo@redhat.com>
- Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc.
+ Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
This file is part of GCC.
#include "ggc.h"
#include "except.h"
#include "splay-tree.h"
-
+#include "optabs.h"
+#include "cfgloop.h"
/* Lowering of OpenMP parallel and workshare constructs proceeds in two
phases. The first phase scans the function looking for OMP statements
tree sender_decl;
tree receiver_decl;
+ /* These are used just by task contexts, if task firstprivate fn is
+ needed. srecord_type is used to communicate from the thread
+ that encountered the task construct to task firstprivate fn,
+ record_type is allocated by GOMP_task, initialized by task firstprivate
+ fn and passed to the task body fn. */
+ splay_tree sfield_map;
+ tree srecord_type;
+
/* A chain of variables to add to the top-level block surrounding the
construct. In the case of a parallel, this is in the child function. */
tree block_vars;
} omp_context;
+struct omp_for_data_loop
+{
+ tree v, n1, n2, step;
+ enum tree_code cond_code;
+};
+
/* A structure describing the main elements of a parallel loop. */
struct omp_for_data
{
- tree v, n1, n2, step, chunk_size, for_stmt;
- enum tree_code cond_code;
- tree pre;
+ struct omp_for_data_loop loop;
+ tree chunk_size, for_stmt;
+ tree pre, iter_type;
+ int collapse;
bool have_nowait, have_ordered;
enum omp_clause_schedule_kind sched_kind;
+ struct omp_for_data_loop *loops;
};
static splay_tree all_contexts;
-static int parallel_nesting_level;
+static int taskreg_nesting_level;
struct omp_region *root_omp_region;
+static bitmap task_shared_vars;
static void scan_omp (tree *, omp_context *);
static void lower_omp (tree *, omp_context *);
/* Find an OpenMP clause of type KIND within CLAUSES. */
-static tree
+tree
find_omp_clause (tree clauses, enum tree_code kind)
{
for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses))
}
+/* Return true if CTX is for an omp task. */
+
+static inline bool
+is_task_ctx (omp_context *ctx)
+{
+ return TREE_CODE (ctx->stmt) == OMP_TASK;
+}
+
+
+/* Return true if CTX is for an omp parallel or omp task. */
+
+static inline bool
+is_taskreg_ctx (omp_context *ctx)
+{
+ return TREE_CODE (ctx->stmt) == OMP_PARALLEL
+ || TREE_CODE (ctx->stmt) == OMP_TASK;
+}
+
+
/* Return true if REGION is a combined parallel+workshare region. */
static inline bool
them into *FD. */
static void
-extract_omp_for_data (tree for_stmt, struct omp_for_data *fd)
+extract_omp_for_data (tree for_stmt, struct omp_for_data *fd,
+ struct omp_for_data_loop *loops)
{
- tree t;
+ tree t, var, *collapse_iter, *collapse_count;
+ tree count = NULL_TREE, iter_type = long_integer_type_node;
+ struct omp_for_data_loop *loop;
+ int i;
+ struct omp_for_data_loop dummy_loop;
fd->for_stmt = for_stmt;
fd->pre = NULL;
-
- t = OMP_FOR_INIT (for_stmt);
- gcc_assert (TREE_CODE (t) == GIMPLE_MODIFY_STMT);
- fd->v = GIMPLE_STMT_OPERAND (t, 0);
- gcc_assert (DECL_P (fd->v));
- gcc_assert (TREE_CODE (TREE_TYPE (fd->v)) == INTEGER_TYPE);
- fd->n1 = GIMPLE_STMT_OPERAND (t, 1);
-
- t = OMP_FOR_COND (for_stmt);
- fd->cond_code = TREE_CODE (t);
- gcc_assert (TREE_OPERAND (t, 0) == fd->v);
- fd->n2 = TREE_OPERAND (t, 1);
- switch (fd->cond_code)
- {
- case LT_EXPR:
- case GT_EXPR:
- break;
- case LE_EXPR:
- fd->n2 = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->n2), fd->n2,
- build_int_cst (TREE_TYPE (fd->n2), 1));
- fd->cond_code = LT_EXPR;
- break;
- case GE_EXPR:
- fd->n2 = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->n2), fd->n2,
- build_int_cst (TREE_TYPE (fd->n2), 1));
- fd->cond_code = GT_EXPR;
- break;
- default:
- gcc_unreachable ();
- }
-
- t = OMP_FOR_INCR (fd->for_stmt);
- gcc_assert (TREE_CODE (t) == GIMPLE_MODIFY_STMT);
- gcc_assert (GIMPLE_STMT_OPERAND (t, 0) == fd->v);
- t = GIMPLE_STMT_OPERAND (t, 1);
- gcc_assert (TREE_OPERAND (t, 0) == fd->v);
- switch (TREE_CODE (t))
- {
- case PLUS_EXPR:
- fd->step = TREE_OPERAND (t, 1);
- break;
- case MINUS_EXPR:
- fd->step = TREE_OPERAND (t, 1);
- fd->step = fold_build1 (NEGATE_EXPR, TREE_TYPE (fd->step), fd->step);
- break;
- default:
- gcc_unreachable ();
- }
+ fd->collapse = TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt));
+ if (fd->collapse > 1)
+ fd->loops = loops;
+ else
+ fd->loops = &fd->loop;
fd->have_nowait = fd->have_ordered = false;
fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
fd->chunk_size = NULL_TREE;
+ collapse_iter = NULL;
+ collapse_count = NULL;
for (t = OMP_FOR_CLAUSES (for_stmt); t ; t = OMP_CLAUSE_CHAIN (t))
switch (OMP_CLAUSE_CODE (t))
fd->sched_kind = OMP_CLAUSE_SCHEDULE_KIND (t);
fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t);
break;
+ case OMP_CLAUSE_COLLAPSE:
+ if (fd->collapse > 1)
+ {
+ collapse_iter = &OMP_CLAUSE_COLLAPSE_ITERVAR (t);
+ collapse_count = &OMP_CLAUSE_COLLAPSE_COUNT (t);
+ }
default:
break;
}
+ /* FIXME: for now map schedule(auto) to schedule(static).
+ There should be analysis to determine whether all iterations
+ are approximately the same amount of work (then schedule(static)
+ is best) or if it varies (then schedule(dynamic,N) is better). */
+ if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_AUTO)
+ {
+ fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
+ gcc_assert (fd->chunk_size == NULL);
+ }
+ gcc_assert (fd->collapse == 1 || collapse_iter != NULL);
if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME)
gcc_assert (fd->chunk_size == NULL);
else if (fd->chunk_size == NULL)
{
/* We only need to compute a default chunk size for ordered
static loops and dynamic loops. */
- if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC || fd->have_ordered)
+ if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC
+ || fd->have_ordered
+ || fd->collapse > 1)
fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
? integer_zero_node : integer_one_node;
}
+
+ for (i = 0; i < fd->collapse; i++)
+ {
+ if (fd->collapse == 1)
+ loop = &fd->loop;
+ else if (loops != NULL)
+ loop = loops + i;
+ else
+ loop = &dummy_loop;
+
+ t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), i);
+ gcc_assert (TREE_CODE (t) == GIMPLE_MODIFY_STMT);
+ loop->v = GIMPLE_STMT_OPERAND (t, 0);
+ gcc_assert (SSA_VAR_P (loop->v));
+ gcc_assert (TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE
+ || TREE_CODE (TREE_TYPE (loop->v)) == POINTER_TYPE);
+ var = TREE_CODE (loop->v) == SSA_NAME ? SSA_NAME_VAR (loop->v) : loop->v;
+ loop->n1 = GIMPLE_STMT_OPERAND (t, 1);
+
+ t = TREE_VEC_ELT (OMP_FOR_COND (for_stmt), i);
+ loop->cond_code = TREE_CODE (t);
+ gcc_assert (TREE_OPERAND (t, 0) == var);
+ loop->n2 = TREE_OPERAND (t, 1);
+ switch (loop->cond_code)
+ {
+ case LT_EXPR:
+ case GT_EXPR:
+ break;
+ case LE_EXPR:
+ if (POINTER_TYPE_P (TREE_TYPE (loop->n2)))
+ loop->n2 = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (loop->n2),
+ loop->n2, size_one_node);
+ else
+ loop->n2 = fold_build2 (PLUS_EXPR, TREE_TYPE (loop->n2), loop->n2,
+ build_int_cst (TREE_TYPE (loop->n2), 1));
+ loop->cond_code = LT_EXPR;
+ break;
+ case GE_EXPR:
+ if (POINTER_TYPE_P (TREE_TYPE (loop->n2)))
+ loop->n2 = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (loop->n2),
+ loop->n2, size_int (-1));
+ else
+ loop->n2 = fold_build2 (MINUS_EXPR, TREE_TYPE (loop->n2), loop->n2,
+ build_int_cst (TREE_TYPE (loop->n2), 1));
+ loop->cond_code = GT_EXPR;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ t = TREE_VEC_ELT (OMP_FOR_INCR (for_stmt), i);
+ gcc_assert (TREE_CODE (t) == GIMPLE_MODIFY_STMT);
+ gcc_assert (GIMPLE_STMT_OPERAND (t, 0) == var);
+ t = GIMPLE_STMT_OPERAND (t, 1);
+ gcc_assert (TREE_OPERAND (t, 0) == var);
+ switch (TREE_CODE (t))
+ {
+ case PLUS_EXPR:
+ case POINTER_PLUS_EXPR:
+ loop->step = TREE_OPERAND (t, 1);
+ break;
+ case MINUS_EXPR:
+ loop->step = TREE_OPERAND (t, 1);
+ loop->step = fold_build1 (NEGATE_EXPR, TREE_TYPE (loop->step),
+ loop->step);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (iter_type != long_long_unsigned_type_node)
+ {
+ if (POINTER_TYPE_P (TREE_TYPE (loop->v)))
+ iter_type = long_long_unsigned_type_node;
+ else if (TYPE_UNSIGNED (TREE_TYPE (loop->v))
+ && TYPE_PRECISION (TREE_TYPE (loop->v))
+ >= TYPE_PRECISION (iter_type))
+ {
+ tree n;
+
+ if (loop->cond_code == LT_EXPR)
+ n = fold_build2 (PLUS_EXPR, TREE_TYPE (loop->v),
+ loop->n2, loop->step);
+ else
+ n = loop->n1;
+ if (TREE_CODE (n) != INTEGER_CST
+ || tree_int_cst_lt (TYPE_MAX_VALUE (iter_type), n))
+ iter_type = long_long_unsigned_type_node;
+ }
+ else if (TYPE_PRECISION (TREE_TYPE (loop->v))
+ > TYPE_PRECISION (iter_type))
+ {
+ tree n1, n2;
+
+ if (loop->cond_code == LT_EXPR)
+ {
+ n1 = loop->n1;
+ n2 = fold_build2 (PLUS_EXPR, TREE_TYPE (loop->v),
+ loop->n2, loop->step);
+ }
+ else
+ {
+ n1 = fold_build2 (MINUS_EXPR, TREE_TYPE (loop->v),
+ loop->n2, loop->step);
+ n2 = loop->n1;
+ }
+ if (TREE_CODE (n1) != INTEGER_CST
+ || TREE_CODE (n2) != INTEGER_CST
+ || !tree_int_cst_lt (TYPE_MIN_VALUE (iter_type), n1)
+ || !tree_int_cst_lt (n2, TYPE_MAX_VALUE (iter_type)))
+ iter_type = long_long_unsigned_type_node;
+ }
+ }
+
+ if (collapse_count && *collapse_count == NULL)
+ {
+ if ((i == 0 || count != NULL_TREE)
+ && TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE
+ && TREE_CONSTANT (loop->n1)
+ && TREE_CONSTANT (loop->n2)
+ && TREE_CODE (loop->step) == INTEGER_CST)
+ {
+ tree itype = TREE_TYPE (loop->v);
+
+ if (POINTER_TYPE_P (itype))
+ itype
+ = lang_hooks.types.type_for_size (TYPE_PRECISION (itype), 0);
+ t = build_int_cst (itype, (loop->cond_code == LT_EXPR ? -1 : 1));
+ t = fold_build2 (PLUS_EXPR, itype,
+ fold_convert (itype, loop->step), t);
+ t = fold_build2 (PLUS_EXPR, itype, t,
+ fold_convert (itype, loop->n2));
+ t = fold_build2 (MINUS_EXPR, itype, t,
+ fold_convert (itype, loop->n1));
+ if (TYPE_UNSIGNED (itype) && loop->cond_code == GT_EXPR)
+ t = fold_build2 (TRUNC_DIV_EXPR, itype,
+ fold_build1 (NEGATE_EXPR, itype, t),
+ fold_build1 (NEGATE_EXPR, itype,
+ fold_convert (itype,
+ loop->step)));
+ else
+ t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
+ fold_convert (itype, loop->step));
+ t = fold_convert (long_long_unsigned_type_node, t);
+ if (count != NULL_TREE)
+ count = fold_build2 (MULT_EXPR, long_long_unsigned_type_node,
+ count, t);
+ else
+ count = t;
+ if (TREE_CODE (count) != INTEGER_CST)
+ count = NULL_TREE;
+ }
+ else
+ count = NULL_TREE;
+ }
+ }
+
+ if (count)
+ {
+ if (!tree_int_cst_lt (count, TYPE_MAX_VALUE (long_integer_type_node)))
+ iter_type = long_long_unsigned_type_node;
+ else
+ iter_type = long_integer_type_node;
+ }
+ else if (collapse_iter && *collapse_iter != NULL)
+ iter_type = TREE_TYPE (*collapse_iter);
+ fd->iter_type = iter_type;
+ if (collapse_iter && *collapse_iter == NULL)
+ *collapse_iter = create_tmp_var (iter_type, ".iter");
+ if (collapse_count && *collapse_count == NULL)
+ {
+ if (count)
+ *collapse_count = fold_convert (iter_type, count);
+ else
+ *collapse_count = create_tmp_var (iter_type, ".count");
+ }
+
+ if (fd->collapse > 1)
+ {
+ fd->loop.v = *collapse_iter;
+ fd->loop.n1 = build_int_cst (TREE_TYPE (fd->loop.v), 0);
+ fd->loop.n2 = *collapse_count;
+ fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1);
+ fd->loop.cond_code = LT_EXPR;
+ }
}
gcc_assert (TREE_CODE (ws_stmt) == OMP_FOR);
- extract_omp_for_data (ws_stmt, &fd);
+ extract_omp_for_data (ws_stmt, &fd, NULL);
+
+ if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
+ return false;
+ if (fd.iter_type != long_integer_type_node)
+ return false;
/* FIXME. We give up too easily here. If any of these arguments
are not constants, they will likely involve variables that have
been mapped into fields of .omp_data_s for sharing with the child
function. With appropriate data flow, it would be possible to
see through this. */
- if (!is_gimple_min_invariant (fd.n1)
- || !is_gimple_min_invariant (fd.n2)
- || !is_gimple_min_invariant (fd.step)
+ if (!is_gimple_min_invariant (fd.loop.n1)
+ || !is_gimple_min_invariant (fd.loop.n2)
+ || !is_gimple_min_invariant (fd.loop.step)
|| (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
return false;
struct omp_for_data fd;
tree ws_args;
- extract_omp_for_data (ws_stmt, &fd);
+ extract_omp_for_data (ws_stmt, &fd, NULL);
ws_args = NULL_TREE;
if (fd.chunk_size)
ws_args = tree_cons (NULL, t, ws_args);
}
- t = fold_convert (long_integer_type_node, fd.step);
+ t = fold_convert (long_integer_type_node, fd.loop.step);
ws_args = tree_cons (NULL, t, ws_args);
- t = fold_convert (long_integer_type_node, fd.n2);
+ t = fold_convert (long_integer_type_node, fd.loop.n2);
ws_args = tree_cons (NULL, t, ws_args);
- t = fold_convert (long_integer_type_node, fd.n1);
+ t = fold_convert (long_integer_type_node, fd.loop.n1);
ws_args = tree_cons (NULL, t, ws_args);
return ws_args;
/* Return true if EXPR is variable sized. */
static inline bool
-is_variable_sized (tree expr)
+is_variable_sized (const_tree expr)
{
return !TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
}
}
static inline tree
-maybe_lookup_decl (tree var, omp_context *ctx)
+maybe_lookup_decl (const_tree var, omp_context *ctx)
{
tree *n;
n = (tree *) pointer_map_contains (ctx->cb.decl_map, var);
}
static inline tree
+lookup_sfield (tree var, omp_context *ctx)
+{
+ splay_tree_node n;
+ n = splay_tree_lookup (ctx->sfield_map
+ ? ctx->sfield_map : ctx->field_map,
+ (splay_tree_key) var);
+ return (tree) n->value;
+}
+
+static inline tree
maybe_lookup_field (tree var, omp_context *ctx)
{
splay_tree_node n;
return n ? (tree) n->value : NULL_TREE;
}
-/* Return true if DECL should be copied by pointer. SHARED_P is true
- if DECL is to be shared. */
+/* Return true if DECL should be copied by pointer. SHARED_CTX is
+ the parallel context if DECL is to be shared. */
static bool
-use_pointer_for_field (tree decl, bool shared_p)
+use_pointer_for_field (tree decl, omp_context *shared_ctx)
{
if (AGGREGATE_TYPE_P (TREE_TYPE (decl)))
return true;
/* We can only use copy-in/copy-out semantics for shared variables
when we know the value is not accessible from an outer scope. */
- if (shared_p)
+ if (shared_ctx)
{
/* ??? Trivially accessible from anywhere. But why would we even
be passing an address in this case? Should we simply assert
address taken. */
if (TREE_ADDRESSABLE (decl))
return true;
+
+ /* Disallow copy-in/out in nested parallel if
+ decl is shared in outer parallel, otherwise
+ each thread could store the shared variable
+ in its own copy-in location, making the
+ variable no longer really shared. */
+ if (!TREE_READONLY (decl) && shared_ctx->is_nested)
+ {
+ omp_context *up;
+
+ for (up = shared_ctx->outer; up; up = up->outer)
+ if (maybe_lookup_decl (decl, up))
+ break;
+
+ if (up && is_taskreg_ctx (up))
+ {
+ tree c;
+
+ for (c = OMP_TASKREG_CLAUSES (up->stmt);
+ c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED
+ && OMP_CLAUSE_DECL (c) == decl)
+ break;
+
+ if (c)
+ return true;
+ }
+ }
+
+ /* For tasks avoid using copy-in/out, unless they are readonly
+ (in which case just copy-in is used). As tasks can be
+ deferred or executed in different thread, when GOMP_task
+ returns, the task hasn't necessarily terminated. */
+ if (!TREE_READONLY (decl) && is_task_ctx (shared_ctx))
+ {
+ tree outer = maybe_lookup_decl_in_outer_ctx (decl, shared_ctx);
+ if (is_gimple_reg (outer))
+ {
+ /* Taking address of OUTER in lower_send_shared_vars
+ might need regimplification of everything that uses the
+ variable. */
+ if (!task_shared_vars)
+ task_shared_vars = BITMAP_ALLOC (NULL);
+ bitmap_set_bit (task_shared_vars, DECL_UID (outer));
+ TREE_ADDRESSABLE (outer) = 1;
+ }
+ return true;
+ }
}
return false;
}
-/* Construct a new automatic decl similar to VAR. */
+/* Create a new VAR_DECL and copy information from VAR to it. */
-static tree
-omp_copy_decl_2 (tree var, tree name, tree type, omp_context *ctx)
+tree
+copy_var_decl (tree var, tree name, tree type)
{
tree copy = build_decl (VAR_DECL, name, type);
TREE_ADDRESSABLE (copy) = TREE_ADDRESSABLE (var);
+ TREE_THIS_VOLATILE (copy) = TREE_THIS_VOLATILE (var);
DECL_GIMPLE_REG_P (copy) = DECL_GIMPLE_REG_P (var);
DECL_NO_TBAA_P (copy) = DECL_NO_TBAA_P (var);
DECL_ARTIFICIAL (copy) = DECL_ARTIFICIAL (var);
DECL_IGNORED_P (copy) = DECL_IGNORED_P (var);
+ DECL_CONTEXT (copy) = DECL_CONTEXT (var);
+ DECL_SOURCE_LOCATION (copy) = DECL_SOURCE_LOCATION (var);
TREE_USED (copy) = 1;
- DECL_CONTEXT (copy) = current_function_decl;
DECL_SEEN_IN_BIND_EXPR_P (copy) = 1;
+ return copy;
+}
+
+/* Construct a new automatic decl similar to VAR. */
+
+static tree
+omp_copy_decl_2 (tree var, tree name, tree type, omp_context *ctx)
+{
+ tree copy = copy_var_decl (var, name, type);
+
+ DECL_CONTEXT (copy) = current_function_decl;
TREE_CHAIN (copy) = ctx->block_vars;
ctx->block_vars = copy;
x = build_outer_var_ref (x, ctx);
x = build_fold_indirect_ref (x);
}
- else if (is_parallel_ctx (ctx))
+ else if (is_taskreg_ctx (ctx))
{
- bool by_ref = use_pointer_for_field (var, false);
+ bool by_ref = use_pointer_for_field (var, NULL);
x = build_receiver_ref (var, by_ref, ctx);
}
else if (ctx->outer)
static tree
build_sender_ref (tree var, omp_context *ctx)
{
- tree field = lookup_field (var, ctx);
+ tree field = lookup_sfield (var, ctx);
return build3 (COMPONENT_REF, TREE_TYPE (field),
ctx->sender_decl, field, NULL);
}
/* Add a new field for VAR inside the structure CTX->SENDER_DECL. */
static void
-install_var_field (tree var, bool by_ref, omp_context *ctx)
+install_var_field (tree var, bool by_ref, int mask, omp_context *ctx)
{
- tree field, type;
+ tree field, type, sfield = NULL_TREE;
- gcc_assert (!splay_tree_lookup (ctx->field_map, (splay_tree_key) var));
+ gcc_assert ((mask & 1) == 0
+ || !splay_tree_lookup (ctx->field_map, (splay_tree_key) var));
+ gcc_assert ((mask & 2) == 0 || !ctx->sfield_map
+ || !splay_tree_lookup (ctx->sfield_map, (splay_tree_key) var));
type = TREE_TYPE (var);
if (by_ref)
type = build_pointer_type (type);
+ else if ((mask & 3) == 1 && is_reference (var))
+ type = TREE_TYPE (type);
field = build_decl (FIELD_DECL, DECL_NAME (var), type);
side effect of making dwarf2out ignore this member, so for helpful
debugging we clear it later in delete_omp_context. */
DECL_ABSTRACT_ORIGIN (field) = var;
+ if (type == TREE_TYPE (var))
+ {
+ DECL_ALIGN (field) = DECL_ALIGN (var);
+ DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
+ TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
+ }
+ else
+ DECL_ALIGN (field) = TYPE_ALIGN (type);
+
+ if ((mask & 3) == 3)
+ {
+ insert_field_into_struct (ctx->record_type, field);
+ if (ctx->srecord_type)
+ {
+ sfield = build_decl (FIELD_DECL, DECL_NAME (var), type);
+ DECL_ABSTRACT_ORIGIN (sfield) = var;
+ DECL_ALIGN (sfield) = DECL_ALIGN (field);
+ DECL_USER_ALIGN (sfield) = DECL_USER_ALIGN (field);
+ TREE_THIS_VOLATILE (sfield) = TREE_THIS_VOLATILE (field);
+ insert_field_into_struct (ctx->srecord_type, sfield);
+ }
+ }
+ else
+ {
+ if (ctx->srecord_type == NULL_TREE)
+ {
+ tree t;
- insert_field_into_struct (ctx->record_type, field);
+ ctx->srecord_type = lang_hooks.types.make_type (RECORD_TYPE);
+ ctx->sfield_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
+ for (t = TYPE_FIELDS (ctx->record_type); t ; t = TREE_CHAIN (t))
+ {
+ sfield = build_decl (FIELD_DECL, DECL_NAME (t), TREE_TYPE (t));
+ DECL_ABSTRACT_ORIGIN (sfield) = DECL_ABSTRACT_ORIGIN (t);
+ insert_field_into_struct (ctx->srecord_type, sfield);
+ splay_tree_insert (ctx->sfield_map,
+ (splay_tree_key) DECL_ABSTRACT_ORIGIN (t),
+ (splay_tree_value) sfield);
+ }
+ }
+ sfield = field;
+ insert_field_into_struct ((mask & 1) ? ctx->record_type
+ : ctx->srecord_type, field);
+ }
- splay_tree_insert (ctx->field_map, (splay_tree_key) var,
- (splay_tree_value) field);
+ if (mask & 1)
+ splay_tree_insert (ctx->field_map, (splay_tree_key) var,
+ (splay_tree_value) field);
+ if ((mask & 2) && ctx->sfield_map)
+ splay_tree_insert (ctx->sfield_map, (splay_tree_key) var,
+ (splay_tree_value) sfield);
}
static tree
return new_var;
}
- while (!is_parallel_ctx (ctx))
+ while (!is_taskreg_ctx (ctx))
{
ctx = ctx->outer;
if (ctx == NULL)
if (ctx->field_map)
splay_tree_delete (ctx->field_map);
+ if (ctx->sfield_map)
+ splay_tree_delete (ctx->sfield_map);
/* We hijacked DECL_ABSTRACT_ORIGIN earlier. We need to clear it before
it produces corrupt debug information. */
for (t = TYPE_FIELDS (ctx->record_type); t ; t = TREE_CHAIN (t))
DECL_ABSTRACT_ORIGIN (t) = NULL;
}
+ if (ctx->srecord_type)
+ {
+ tree t;
+ for (t = TYPE_FIELDS (ctx->srecord_type); t ; t = TREE_CHAIN (t))
+ DECL_ABSTRACT_ORIGIN (t) = NULL;
+ }
XDELETE (ctx);
}
DECL_CONTEXT (new_f) = type;
TREE_TYPE (new_f) = remap_type (TREE_TYPE (f), &ctx->cb);
TREE_CHAIN (new_f) = new_fields;
+ walk_tree (&DECL_SIZE (new_f), copy_body_r, &ctx->cb, NULL);
+ walk_tree (&DECL_SIZE_UNIT (new_f), copy_body_r, &ctx->cb, NULL);
+ walk_tree (&DECL_FIELD_OFFSET (new_f), copy_body_r, &ctx->cb, NULL);
new_fields = new_f;
/* Arrange to be able to look up the receiver field
{
case OMP_CLAUSE_PRIVATE:
decl = OMP_CLAUSE_DECL (c);
- if (!is_variable_sized (decl))
+ if (OMP_CLAUSE_PRIVATE_OUTER_REF (c))
+ goto do_private;
+ else if (!is_variable_sized (decl))
install_var_local (decl, ctx);
break;
case OMP_CLAUSE_SHARED:
- gcc_assert (is_parallel_ctx (ctx));
+ gcc_assert (is_taskreg_ctx (ctx));
decl = OMP_CLAUSE_DECL (c);
- gcc_assert (!is_variable_sized (decl));
- by_ref = use_pointer_for_field (decl, true);
+ gcc_assert (!COMPLETE_TYPE_P (TREE_TYPE (decl))
+ || !is_variable_sized (decl));
/* Global variables don't need to be copied,
the receiver side will use them directly. */
if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
break;
+ by_ref = use_pointer_for_field (decl, ctx);
if (! TREE_READONLY (decl)
|| TREE_ADDRESSABLE (decl)
|| by_ref
|| is_reference (decl))
{
- install_var_field (decl, by_ref, ctx);
+ install_var_field (decl, by_ref, 3, ctx);
install_var_local (decl, ctx);
break;
}
decl = OMP_CLAUSE_DECL (c);
do_private:
if (is_variable_sized (decl))
- break;
- else if (is_parallel_ctx (ctx)
- && ! is_global_var (maybe_lookup_decl_in_outer_ctx (decl,
- ctx)))
{
- by_ref = use_pointer_for_field (decl, false);
- install_var_field (decl, by_ref, ctx);
+ if (is_task_ctx (ctx))
+ install_var_field (decl, false, 1, ctx);
+ break;
+ }
+ else if (is_taskreg_ctx (ctx))
+ {
+ bool global
+ = is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx));
+ by_ref = use_pointer_for_field (decl, NULL);
+
+ if (is_task_ctx (ctx)
+ && (global || by_ref || is_reference (decl)))
+ {
+ install_var_field (decl, false, 1, ctx);
+ if (!global)
+ install_var_field (decl, by_ref, 2, ctx);
+ }
+ else if (!global)
+ install_var_field (decl, by_ref, 3, ctx);
}
install_var_local (decl, ctx);
break;
case OMP_CLAUSE_COPYIN:
decl = OMP_CLAUSE_DECL (c);
- by_ref = use_pointer_for_field (decl, false);
- install_var_field (decl, by_ref, ctx);
+ by_ref = use_pointer_for_field (decl, NULL);
+ install_var_field (decl, by_ref, 3, ctx);
break;
case OMP_CLAUSE_DEFAULT:
case OMP_CLAUSE_NOWAIT:
case OMP_CLAUSE_ORDERED:
+ case OMP_CLAUSE_COLLAPSE:
+ case OMP_CLAUSE_UNTIED:
break;
default:
case OMP_CLAUSE_LASTPRIVATE:
/* Let the corresponding firstprivate clause create
the variable. */
+ if (OMP_CLAUSE_LASTPRIVATE_STMT (c))
+ scan_array_reductions = true;
if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
break;
/* FALLTHRU */
case OMP_CLAUSE_SCHEDULE:
case OMP_CLAUSE_NOWAIT:
case OMP_CLAUSE_ORDERED:
+ case OMP_CLAUSE_COLLAPSE:
+ case OMP_CLAUSE_UNTIED:
break;
default:
scan_omp (&OMP_CLAUSE_REDUCTION_INIT (c), ctx);
scan_omp (&OMP_CLAUSE_REDUCTION_MERGE (c), ctx);
}
+ else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
+ && OMP_CLAUSE_LASTPRIVATE_STMT (c))
+ scan_omp (&OMP_CLAUSE_LASTPRIVATE_STMT (c), ctx);
}
/* Create a new name for omp child function. Returns an identifier. */
static GTY(()) unsigned int tmp_ompfn_id_num;
static tree
-create_omp_child_function_name (void)
+create_omp_child_function_name (bool task_copy)
{
tree name = DECL_ASSEMBLER_NAME (current_function_decl);
size_t len = IDENTIFIER_LENGTH (name);
char *tmp_name, *prefix;
+ const char *suffix;
- prefix = alloca (len + sizeof ("_omp_fn"));
+ suffix = task_copy ? "_omp_cpyfn" : "_omp_fn";
+ prefix = alloca (len + strlen (suffix) + 1);
memcpy (prefix, IDENTIFIER_POINTER (name), len);
- strcpy (prefix + len, "_omp_fn");
+ strcpy (prefix + len, suffix);
#ifndef NO_DOT_IN_LABEL
prefix[len] = '.';
#elif !defined NO_DOLLAR_IN_LABEL
yet, just the bare decl. */
static void
-create_omp_child_function (omp_context *ctx)
+create_omp_child_function (omp_context *ctx, bool task_copy)
{
tree decl, type, name, t;
- name = create_omp_child_function_name ();
- type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
+ name = create_omp_child_function_name (task_copy);
+ if (task_copy)
+ type = build_function_type_list (void_type_node, ptr_type_node,
+ ptr_type_node, NULL_TREE);
+ else
+ type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
decl = build_decl (FUNCTION_DECL, name, type);
decl = lang_hooks.decls.pushdecl (decl);
- ctx->cb.dst_fn = decl;
+ if (!task_copy)
+ ctx->cb.dst_fn = decl;
+ else
+ OMP_TASK_COPYFN (ctx->stmt) = decl;
TREE_STATIC (decl) = 1;
TREE_USED (decl) = 1;
DECL_CONTEXT (t) = current_function_decl;
TREE_USED (t) = 1;
DECL_ARGUMENTS (decl) = t;
- ctx->receiver_decl = t;
+ if (!task_copy)
+ ctx->receiver_decl = t;
+ else
+ {
+ t = build_decl (PARM_DECL, get_identifier (".omp_data_o"),
+ ptr_type_node);
+ DECL_ARTIFICIAL (t) = 1;
+ DECL_ARG_TYPE (t) = ptr_type_node;
+ DECL_CONTEXT (t) = current_function_decl;
+ TREE_USED (t) = 1;
+ TREE_CHAIN (t) = DECL_ARGUMENTS (decl);
+ DECL_ARGUMENTS (decl) = t;
+ }
/* Allocate memory for the function structure. The call to
allocate_struct_function clobbers CFUN, so we need to restore
it afterward. */
- allocate_struct_function (decl);
+ push_struct_function (decl);
DECL_SOURCE_LOCATION (decl) = EXPR_LOCATION (ctx->stmt);
cfun->function_end_locus = EXPR_LOCATION (ctx->stmt);
- cfun = ctx->cb.src_cfun;
+ pop_cfun ();
}
}
ctx = new_omp_context (*stmt_p, outer_ctx);
- if (parallel_nesting_level > 1)
+ if (taskreg_nesting_level > 1)
ctx->is_nested = true;
ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED;
name = create_tmp_var_name (".omp_data_s");
name = build_decl (TYPE_DECL, name, ctx->record_type);
TYPE_NAME (ctx->record_type) = name;
- create_omp_child_function (ctx);
+ create_omp_child_function (ctx, false);
OMP_PARALLEL_FN (*stmt_p) = ctx->cb.dst_fn;
scan_sharing_clauses (OMP_PARALLEL_CLAUSES (*stmt_p), ctx);
}
}
+/* Scan an OpenMP task directive. */
+
+static void
+scan_omp_task (tree *stmt_p, omp_context *outer_ctx)
+{
+ omp_context *ctx;
+ tree name;
+
+ /* Ignore task directives with empty bodies. */
+ if (optimize > 0
+ && empty_body_p (OMP_TASK_BODY (*stmt_p)))
+ {
+ *stmt_p = build_empty_stmt ();
+ return;
+ }
+
+ ctx = new_omp_context (*stmt_p, outer_ctx);
+ if (taskreg_nesting_level > 1)
+ ctx->is_nested = true;
+ ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
+ ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED;
+ ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
+ name = create_tmp_var_name (".omp_data_s");
+ name = build_decl (TYPE_DECL, name, ctx->record_type);
+ TYPE_NAME (ctx->record_type) = name;
+ create_omp_child_function (ctx, false);
+ OMP_TASK_FN (*stmt_p) = ctx->cb.dst_fn;
+
+ scan_sharing_clauses (OMP_TASK_CLAUSES (*stmt_p), ctx);
+
+ if (ctx->srecord_type)
+ {
+ name = create_tmp_var_name (".omp_data_a");
+ name = build_decl (TYPE_DECL, name, ctx->srecord_type);
+ TYPE_NAME (ctx->srecord_type) = name;
+ create_omp_child_function (ctx, true);
+ }
+
+ scan_omp (&OMP_TASK_BODY (*stmt_p), ctx);
+
+ if (TYPE_FIELDS (ctx->record_type) == NULL)
+ {
+ ctx->record_type = ctx->receiver_decl = NULL;
+ OMP_TASK_ARG_SIZE (*stmt_p)
+ = build_int_cst (long_integer_type_node, 0);
+ OMP_TASK_ARG_ALIGN (*stmt_p)
+ = build_int_cst (long_integer_type_node, 1);
+ }
+ else
+ {
+ tree *p, vla_fields = NULL_TREE, *q = &vla_fields;
+ /* Move VLA fields to the end. */
+ p = &TYPE_FIELDS (ctx->record_type);
+ while (*p)
+ if (!TYPE_SIZE_UNIT (TREE_TYPE (*p))
+ || ! TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (*p))))
+ {
+ *q = *p;
+ *p = TREE_CHAIN (*p);
+ TREE_CHAIN (*q) = NULL_TREE;
+ q = &TREE_CHAIN (*q);
+ }
+ else
+ p = &TREE_CHAIN (*p);
+ *p = vla_fields;
+ layout_type (ctx->record_type);
+ fixup_child_record_type (ctx);
+ if (ctx->srecord_type)
+ layout_type (ctx->srecord_type);
+ OMP_TASK_ARG_SIZE (*stmt_p)
+ = fold_convert (long_integer_type_node,
+ TYPE_SIZE_UNIT (ctx->record_type));
+ OMP_TASK_ARG_ALIGN (*stmt_p)
+ = build_int_cst (long_integer_type_node,
+ TYPE_ALIGN_UNIT (ctx->record_type));
+ }
+}
+
/* Scan an OpenMP loop directive. */
{
omp_context *ctx;
tree stmt;
+ int i;
stmt = *stmt_p;
ctx = new_omp_context (stmt, outer_ctx);
scan_sharing_clauses (OMP_FOR_CLAUSES (stmt), ctx);
scan_omp (&OMP_FOR_PRE_BODY (stmt), ctx);
- scan_omp (&OMP_FOR_INIT (stmt), ctx);
- scan_omp (&OMP_FOR_COND (stmt), ctx);
- scan_omp (&OMP_FOR_INCR (stmt), ctx);
+ for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (stmt)); i++)
+ {
+ scan_omp (&TREE_VEC_ELT (OMP_FOR_INIT (stmt), i), ctx);
+ scan_omp (&TREE_VEC_ELT (OMP_FOR_COND (stmt), i), ctx);
+ scan_omp (&TREE_VEC_ELT (OMP_FOR_INCR (stmt), i), ctx);
+ }
scan_omp (&OMP_FOR_BODY (stmt), ctx);
}
case OMP_FOR:
case OMP_SECTIONS:
case OMP_SINGLE:
+ case CALL_EXPR:
for (; ctx != NULL; ctx = ctx->outer)
switch (TREE_CODE (ctx->stmt))
{
case OMP_SINGLE:
case OMP_ORDERED:
case OMP_MASTER:
+ case OMP_TASK:
+ if (TREE_CODE (t) == CALL_EXPR)
+ {
+ warning (0, "barrier region may not be closely nested inside "
+ "of work-sharing, critical, ordered, master or "
+ "explicit task region");
+ return;
+ }
warning (0, "work-sharing region may not be closely nested inside "
- "of work-sharing, critical, ordered or master region");
+ "of work-sharing, critical, ordered, master or explicit "
+ "task region");
return;
case OMP_PARALLEL:
return;
case OMP_FOR:
case OMP_SECTIONS:
case OMP_SINGLE:
+ case OMP_TASK:
warning (0, "master region may not be closely nested inside "
- "of work-sharing region");
+ "of work-sharing or explicit task region");
return;
case OMP_PARALLEL:
return;
switch (TREE_CODE (ctx->stmt))
{
case OMP_CRITICAL:
+ case OMP_TASK:
warning (0, "ordered region may not be closely nested inside "
- "of critical region");
+ "of critical or explicit task region");
return;
case OMP_FOR:
if (find_omp_clause (OMP_CLAUSES (ctx->stmt),
input_location = EXPR_LOCATION (t);
/* Check the OpenMP nesting restrictions. */
- if (OMP_DIRECTIVE_P (t) && ctx != NULL)
- check_omp_nesting_restrictions (t, ctx);
+ if (ctx != NULL)
+ {
+ if (OMP_DIRECTIVE_P (t))
+ check_omp_nesting_restrictions (t, ctx);
+ else if (TREE_CODE (t) == CALL_EXPR)
+ {
+ tree fndecl = get_callee_fndecl (t);
+ if (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
+ && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_GOMP_BARRIER)
+ check_omp_nesting_restrictions (t, ctx);
+ }
+ }
*walk_subtrees = 0;
switch (TREE_CODE (t))
{
case OMP_PARALLEL:
- parallel_nesting_level++;
+ taskreg_nesting_level++;
scan_omp_parallel (tp, ctx);
- parallel_nesting_level--;
+ taskreg_nesting_level--;
+ break;
+
+ case OMP_TASK:
+ taskreg_nesting_level++;
+ scan_omp_task (tp, ctx);
+ taskreg_nesting_level--;
break;
case OMP_FOR:
/* Build a call to GOMP_barrier. */
-static void
-build_omp_barrier (tree *stmt_list)
+static tree
+build_omp_barrier (void)
{
- tree t = build_call_expr (built_in_decls[BUILT_IN_GOMP_BARRIER], 0);
- gimplify_and_add (t, stmt_list);
+ return build_call_expr (built_in_decls[BUILT_IN_GOMP_BARRIER], 0);
}
/* If a context was created for STMT when it was scanned, return it. */
tree t;
omp_context *up;
- gcc_assert (ctx->is_nested);
-
for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
t = maybe_lookup_decl (decl, up);
- gcc_assert (t || is_global_var (decl));
+ gcc_assert (!ctx->is_nested || t || is_global_var (decl));
return t ? t : decl;
}
tree t = NULL;
omp_context *up;
- if (ctx->is_nested)
- for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
- t = maybe_lookup_decl (decl, up);
+ for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
+ t = maybe_lookup_decl (decl, up);
return t ? t : decl;
}
if (pass == 0)
continue;
- ptr = DECL_VALUE_EXPR (new_var);
- gcc_assert (TREE_CODE (ptr) == INDIRECT_REF);
- ptr = TREE_OPERAND (ptr, 0);
- gcc_assert (DECL_P (ptr));
-
- x = TYPE_SIZE_UNIT (TREE_TYPE (new_var));
- x = build_call_expr (built_in_decls[BUILT_IN_ALLOCA], 1, x);
- x = fold_convert (TREE_TYPE (ptr), x);
- x = build_gimple_modify_stmt (ptr, x);
- gimplify_and_add (x, ilist);
+ if (c_kind != OMP_CLAUSE_FIRSTPRIVATE || !is_task_ctx (ctx))
+ {
+ ptr = DECL_VALUE_EXPR (new_var);
+ gcc_assert (TREE_CODE (ptr) == INDIRECT_REF);
+ ptr = TREE_OPERAND (ptr, 0);
+ gcc_assert (DECL_P (ptr));
+ x = TYPE_SIZE_UNIT (TREE_TYPE (new_var));
+ x = build_call_expr (built_in_decls[BUILT_IN_ALLOCA], 1, x);
+ x = fold_convert (TREE_TYPE (ptr), x);
+ x = build_gimple_modify_stmt (ptr, x);
+ gimplify_and_add (x, ilist);
+ }
}
else if (is_reference (var))
{
continue;
x = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_var)));
- if (TREE_CONSTANT (x))
+ if (c_kind == OMP_CLAUSE_FIRSTPRIVATE && is_task_ctx (ctx))
+ {
+ x = build_receiver_ref (var, false, ctx);
+ x = build_fold_addr_expr (x);
+ }
+ else if (TREE_CONSTANT (x))
{
const char *name = NULL;
if (DECL_NAME (var))
/* Set up the DECL_VALUE_EXPR for shared variables now. This
needs to be delayed until after fixup_child_record_type so
that we get the correct type during the dereference. */
- by_ref = use_pointer_for_field (var, true);
+ by_ref = use_pointer_for_field (var, ctx);
x = build_receiver_ref (var, by_ref, ctx);
SET_DECL_VALUE_EXPR (new_var, x);
DECL_HAS_VALUE_EXPR_P (new_var) = 1;
/* FALLTHRU */
case OMP_CLAUSE_PRIVATE:
- x = lang_hooks.decls.omp_clause_default_ctor (c, new_var);
+ if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_PRIVATE)
+ x = build_outer_var_ref (var, ctx);
+ else if (OMP_CLAUSE_PRIVATE_OUTER_REF (c))
+ {
+ if (is_task_ctx (ctx))
+ x = build_receiver_ref (var, false, ctx);
+ else
+ x = build_outer_var_ref (var, ctx);
+ }
+ else
+ x = NULL;
+ x = lang_hooks.decls.omp_clause_default_ctor (c, new_var, x);
if (x)
gimplify_and_add (x, ilist);
/* FALLTHRU */
break;
case OMP_CLAUSE_FIRSTPRIVATE:
+ if (is_task_ctx (ctx))
+ {
+ if (is_reference (var) || is_variable_sized (var))
+ goto do_dtor;
+ else if (is_global_var (maybe_lookup_decl_in_outer_ctx (var,
+ ctx))
+ || use_pointer_for_field (var, NULL))
+ {
+ x = build_receiver_ref (var, false, ctx);
+ SET_DECL_VALUE_EXPR (new_var, x);
+ DECL_HAS_VALUE_EXPR_P (new_var) = 1;
+ goto do_dtor;
+ }
+ }
x = build_outer_var_ref (var, ctx);
x = lang_hooks.decls.omp_clause_copy_ctor (c, new_var, x);
gimplify_and_add (x, ilist);
break;
case OMP_CLAUSE_COPYIN:
- by_ref = use_pointer_for_field (var, false);
+ by_ref = use_pointer_for_field (var, NULL);
x = build_receiver_ref (var, by_ref, ctx);
x = lang_hooks.decls.omp_clause_assign_op (c, new_var, x);
append_to_statement_list (x, ©in_seq);
case OMP_CLAUSE_REDUCTION:
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
{
+ tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
+ x = build_outer_var_ref (var, ctx);
+
+ if (is_reference (var))
+ x = build_fold_addr_expr (x);
+ SET_DECL_VALUE_EXPR (placeholder, x);
+ DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
gimplify_and_add (OMP_CLAUSE_REDUCTION_INIT (c), ilist);
OMP_CLAUSE_REDUCTION_INIT (c) = NULL;
+ DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
}
else
{
lastprivate clauses we need to ensure the lastprivate copying
happens after firstprivate copying in all threads. */
if (copyin_by_ref || lastprivate_firstprivate)
- build_omp_barrier (ilist);
+ gimplify_and_add (build_omp_barrier (), ilist);
}
static void
lower_lastprivate_clauses (tree clauses, tree predicate, tree *stmt_list,
- omp_context *ctx)
+ omp_context *ctx)
{
tree sub_list, x, c;
+ bool par_clauses = false;
/* Early exit if there are no lastprivate clauses. */
clauses = find_omp_clause (clauses, OMP_CLAUSE_LASTPRIVATE);
OMP_CLAUSE_LASTPRIVATE);
if (clauses == NULL)
return;
+ par_clauses = true;
}
sub_list = alloc_stmt_list ();
- for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
+ for (c = clauses; c ;)
{
tree var, new_var;
- if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_LASTPRIVATE)
- continue;
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
+ {
+ var = OMP_CLAUSE_DECL (c);
+ new_var = lookup_decl (var, ctx);
- var = OMP_CLAUSE_DECL (c);
- new_var = lookup_decl (var, ctx);
+ if (OMP_CLAUSE_LASTPRIVATE_STMT (c))
+ gimplify_and_add (OMP_CLAUSE_LASTPRIVATE_STMT (c), &sub_list);
+ OMP_CLAUSE_LASTPRIVATE_STMT (c) = NULL;
- x = build_outer_var_ref (var, ctx);
- if (is_reference (var))
- new_var = build_fold_indirect_ref (new_var);
- x = lang_hooks.decls.omp_clause_assign_op (c, x, new_var);
- append_to_statement_list (x, &sub_list);
+ x = build_outer_var_ref (var, ctx);
+ if (is_reference (var))
+ new_var = build_fold_indirect_ref (new_var);
+ x = lang_hooks.decls.omp_clause_assign_op (c, x, new_var);
+ append_to_statement_list (x, &sub_list);
+ }
+ c = OMP_CLAUSE_CHAIN (c);
+ if (c == NULL && !par_clauses)
+ {
+ /* If this was a workshare clause, see if it had been combined
+ with its parallel. In that case, continue looking for the
+ clauses also on the parallel statement itself. */
+ if (is_parallel_ctx (ctx))
+ break;
+
+ ctx = ctx->outer;
+ if (ctx == NULL || !is_parallel_ctx (ctx))
+ break;
+
+ c = find_omp_clause (OMP_PARALLEL_CLAUSES (ctx->stmt),
+ OMP_CLAUSE_LASTPRIVATE);
+ par_clauses = true;
+ }
}
if (predicate)
continue;
var = OMP_CLAUSE_DECL (c);
- by_ref = use_pointer_for_field (var, false);
+ by_ref = use_pointer_for_field (var, NULL);
ref = build_sender_ref (var, ctx);
- x = (ctx->is_nested) ? lookup_decl_in_outer_ctx (var, ctx) : var;
+ x = lookup_decl_in_outer_ctx (var, ctx);
x = by_ref ? build_fold_addr_expr (x) : x;
x = build_gimple_modify_stmt (ref, x);
gimplify_and_add (x, slist);
switch (OMP_CLAUSE_CODE (c))
{
- case OMP_CLAUSE_FIRSTPRIVATE:
+ case OMP_CLAUSE_PRIVATE:
+ if (OMP_CLAUSE_PRIVATE_OUTER_REF (c))
+ break;
+ continue;
+ case OMP_CLAUSE_FIRSTPRIVATE:
case OMP_CLAUSE_COPYIN:
case OMP_CLAUSE_LASTPRIVATE:
case OMP_CLAUSE_REDUCTION:
continue;
}
- var = val = OMP_CLAUSE_DECL (c);
- if (ctx->is_nested)
- var = lookup_decl_in_outer_ctx (val, ctx);
+ val = OMP_CLAUSE_DECL (c);
+ var = lookup_decl_in_outer_ctx (val, ctx);
if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYIN
&& is_global_var (var))
continue;
if (is_variable_sized (val))
continue;
- by_ref = use_pointer_for_field (val, false);
+ by_ref = use_pointer_for_field (val, NULL);
switch (OMP_CLAUSE_CODE (c))
{
+ case OMP_CLAUSE_PRIVATE:
case OMP_CLAUSE_FIRSTPRIVATE:
case OMP_CLAUSE_COPYIN:
do_in = true;
do_in = true;
}
else
- do_out = true;
+ {
+ do_out = true;
+ if (lang_hooks.decls.omp_private_outer_ref (val))
+ do_in = true;
+ }
break;
case OMP_CLAUSE_REDUCTION:
x = by_ref ? build_fold_addr_expr (var) : var;
x = build_gimple_modify_stmt (ref, x);
gimplify_and_add (x, ilist);
+ if (is_task_ctx (ctx))
+ DECL_ABSTRACT_ORIGIN (TREE_OPERAND (ref, 1)) = NULL;
}
if (do_out)
static void
lower_send_shared_vars (tree *ilist, tree *olist, omp_context *ctx)
{
- tree var, ovar, nvar, f, x;
+ tree var, ovar, nvar, f, x, record_type;
if (ctx->record_type == NULL)
return;
- for (f = TYPE_FIELDS (ctx->record_type); f ; f = TREE_CHAIN (f))
+ record_type = ctx->srecord_type ? ctx->srecord_type : ctx->record_type;
+ for (f = TYPE_FIELDS (record_type); f ; f = TREE_CHAIN (f))
{
ovar = DECL_ABSTRACT_ORIGIN (f);
nvar = maybe_lookup_decl (ovar, ctx);
if (!nvar || !DECL_HAS_VALUE_EXPR_P (nvar))
continue;
- var = ovar;
-
/* If CTX is a nested parallel directive. Find the immediately
enclosing parallel or workshare construct that contains a
mapping for OVAR. */
- if (ctx->is_nested)
- var = lookup_decl_in_outer_ctx (ovar, ctx);
+ var = lookup_decl_in_outer_ctx (ovar, ctx);
- if (use_pointer_for_field (ovar, true))
+ if (use_pointer_for_field (ovar, ctx))
{
x = build_sender_ref (ovar, ctx);
var = build_fold_addr_expr (var);
x = build_gimple_modify_stmt (x, var);
gimplify_and_add (x, ilist);
- x = build_sender_ref (ovar, ctx);
- x = build_gimple_modify_stmt (var, x);
- gimplify_and_add (x, olist);
+ if (!TREE_READONLY (var))
+ {
+ x = build_sender_ref (ovar, ctx);
+ x = build_gimple_modify_stmt (var, x);
+ gimplify_and_add (x, olist);
+ }
}
}
}
expand_parallel_call (struct omp_region *region, basic_block bb,
tree entry_stmt, tree ws_args)
{
- tree t, t1, t2, val, cond, c, list, clauses;
+ tree t, t1, t2, val, cond, c, clauses;
block_stmt_iterator si;
int start_ix;
clauses = OMP_PARALLEL_CLAUSES (entry_stmt);
- push_gimplify_context ();
/* Determine what flavor of GOMP_parallel_start we will be
emitting. */
switch (region->inner->type)
{
case OMP_FOR:
+ gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
start_ix = BUILT_IN_GOMP_PARALLEL_LOOP_STATIC_START
- + region->inner->sched_kind;
+ + (region->inner->sched_kind
+ == OMP_CLAUSE_SCHEDULE_RUNTIME
+ ? 3 : region->inner->sched_kind);
break;
case OMP_SECTIONS:
start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS_START;
cond = gimple_boolify (cond);
if (integer_zerop (val))
- val = build2 (EQ_EXPR, unsigned_type_node, cond,
- build_int_cst (TREE_TYPE (cond), 0));
+ val = fold_build2 (EQ_EXPR, unsigned_type_node, cond,
+ build_int_cst (TREE_TYPE (cond), 0));
else
{
basic_block cond_bb, then_bb, else_bb;
- edge e;
- tree t, tmp;
+ edge e, e_then, e_else;
+ tree t, tmp_then, tmp_else, tmp_join, tmp_var;
+
+ tmp_var = create_tmp_var (TREE_TYPE (val), NULL);
+ if (gimple_in_ssa_p (cfun))
+ {
+ tmp_then = make_ssa_name (tmp_var, NULL_TREE);
+ tmp_else = make_ssa_name (tmp_var, NULL_TREE);
+ tmp_join = make_ssa_name (tmp_var, NULL_TREE);
+ }
+ else
+ {
+ tmp_then = tmp_var;
+ tmp_else = tmp_var;
+ tmp_join = tmp_var;
+ }
- tmp = create_tmp_var (TREE_TYPE (val), NULL);
e = split_block (bb, NULL);
cond_bb = e->src;
bb = e->dest;
then_bb = create_empty_bb (cond_bb);
else_bb = create_empty_bb (then_bb);
+ set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
+ set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
t = build3 (COND_EXPR, void_type_node,
cond, NULL_TREE, NULL_TREE);
bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
si = bsi_start (then_bb);
- t = build_gimple_modify_stmt (tmp, val);
+ t = build_gimple_modify_stmt (tmp_then, val);
+ if (gimple_in_ssa_p (cfun))
+ SSA_NAME_DEF_STMT (tmp_then) = t;
bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
si = bsi_start (else_bb);
- t = build_gimple_modify_stmt (tmp,
+ t = build_gimple_modify_stmt (tmp_else,
build_int_cst (unsigned_type_node, 1));
+ if (gimple_in_ssa_p (cfun))
+ SSA_NAME_DEF_STMT (tmp_else) = t;
bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
- make_edge (then_bb, bb, EDGE_FALLTHRU);
- make_edge (else_bb, bb, EDGE_FALLTHRU);
+ e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
+ e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
- val = tmp;
+ if (gimple_in_ssa_p (cfun))
+ {
+ tree phi = create_phi_node (tmp_join, bb);
+ SSA_NAME_DEF_STMT (tmp_join) = phi;
+ add_phi_arg (phi, tmp_then, e_then);
+ add_phi_arg (phi, tmp_else, e_else);
+ }
+
+ val = tmp_join;
}
- list = NULL_TREE;
- val = get_formal_tmp_var (val, &list);
si = bsi_start (bb);
- bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
+ val = force_gimple_operand_bsi (&si, val, true, NULL_TREE,
+ false, BSI_CONTINUE_LINKING);
}
- list = NULL_TREE;
+ si = bsi_last (bb);
t = OMP_PARALLEL_DATA_ARG (entry_stmt);
if (t == NULL)
t1 = null_pointer_node;
else
t = build_call_expr (built_in_decls[start_ix], 3, t2, t1, val);
- gimplify_and_add (t, &list);
+ force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ false, BSI_CONTINUE_LINKING);
t = OMP_PARALLEL_DATA_ARG (entry_stmt);
if (t == NULL)
else
t = build_fold_addr_expr (t);
t = build_call_expr (OMP_PARALLEL_FN (entry_stmt), 1, t);
- gimplify_and_add (t, &list);
+ force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ false, BSI_CONTINUE_LINKING);
t = build_call_expr (built_in_decls[BUILT_IN_GOMP_PARALLEL_END], 0);
- gimplify_and_add (t, &list);
+ force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ false, BSI_CONTINUE_LINKING);
+}
+
+
+static void maybe_catch_exception (tree *stmt_p);
+
+
+/* Finalize task copyfn. */
+
+static void
+expand_task_copyfn (tree task_stmt)
+{
+ struct function *child_cfun;
+ tree child_fn, old_fn;
+
+ child_fn = OMP_TASK_COPYFN (task_stmt);
+ child_cfun = DECL_STRUCT_FUNCTION (child_fn);
+
+ /* Inform the callgraph about the new function. */
+ DECL_STRUCT_FUNCTION (child_fn)->curr_properties
+ = cfun->curr_properties;
+
+ old_fn = current_function_decl;
+ push_cfun (child_cfun);
+ current_function_decl = child_fn;
+ gimplify_body (&DECL_SAVED_TREE (child_fn), child_fn, false);
+ maybe_catch_exception (&BIND_EXPR_BODY (DECL_SAVED_TREE (child_fn)));
+ pop_cfun ();
+ current_function_decl = old_fn;
+
+ cgraph_add_new_function (child_fn, false);
+}
+
+/* Build the function call to GOMP_task to actually
+ generate the task operation. BB is the block where to insert the code. */
+
+static void
+expand_task_call (basic_block bb, tree entry_stmt)
+{
+ tree t, t1, t2, t3, flags, cond, c, clauses;
+ block_stmt_iterator si;
+
+ clauses = OMP_TASK_CLAUSES (entry_stmt);
+
+ if (OMP_TASK_COPYFN (entry_stmt))
+ expand_task_copyfn (entry_stmt);
+
+ c = find_omp_clause (clauses, OMP_CLAUSE_IF);
+ if (c)
+ cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (c));
+ else
+ cond = boolean_true_node;
+
+ c = find_omp_clause (clauses, OMP_CLAUSE_UNTIED);
+ flags = build_int_cst (unsigned_type_node, (c ? 1 : 0));
si = bsi_last (bb);
- bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
+ t = OMP_TASK_DATA_ARG (entry_stmt);
+ if (t == NULL)
+ t2 = null_pointer_node;
+ else
+ t2 = build_fold_addr_expr (t);
+ t1 = build_fold_addr_expr (OMP_TASK_FN (entry_stmt));
+ t = OMP_TASK_COPYFN (entry_stmt);
+ if (t == NULL)
+ t3 = null_pointer_node;
+ else
+ t3 = build_fold_addr_expr (t);
- pop_gimplify_context (NULL_TREE);
+ t = build_call_expr (built_in_decls[BUILT_IN_GOMP_TASK], 7, t1, t2, t3,
+ OMP_TASK_ARG_SIZE (entry_stmt),
+ OMP_TASK_ARG_ALIGN (entry_stmt), cond, flags);
+
+ force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ false, BSI_CONTINUE_LINKING);
}
}
}
-/* Expand the OpenMP parallel directive starting at REGION. */
+/* Optimize omp_get_thread_num () and omp_get_num_threads ()
+ calls. These can't be declared as const functions, but
+ within one parallel body they are constant, so they can be
+ transformed there into __builtin_omp_get_{thread_num,num_threads} ()
+ which are declared const. Similarly for task body, except
+ that in untied task omp_get_thread_num () can change at any task
+ scheduling point. */
+
+static void
+optimize_omp_library_calls (tree entry_stmt)
+{
+ basic_block bb;
+ block_stmt_iterator bsi;
+ tree thr_num_id
+ = DECL_ASSEMBLER_NAME (built_in_decls [BUILT_IN_OMP_GET_THREAD_NUM]);
+ tree num_thr_id
+ = DECL_ASSEMBLER_NAME (built_in_decls [BUILT_IN_OMP_GET_NUM_THREADS]);
+ bool untied_task = (TREE_CODE (entry_stmt) == OMP_TASK
+ && find_omp_clause (OMP_TASK_CLAUSES (entry_stmt),
+ OMP_CLAUSE_UNTIED) != NULL);
+
+ FOR_EACH_BB (bb)
+ for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+ {
+ tree stmt = bsi_stmt (bsi);
+ tree call = get_call_expr_in (stmt);
+ tree decl;
+
+ if (call
+ && (decl = get_callee_fndecl (call))
+ && DECL_EXTERNAL (decl)
+ && TREE_PUBLIC (decl)
+ && DECL_INITIAL (decl) == NULL)
+ {
+ tree built_in;
+
+ if (DECL_NAME (decl) == thr_num_id)
+ {
+ /* In #pragma omp task untied omp_get_thread_num () can change
+ during the execution of the task region. */
+ if (untied_task)
+ continue;
+ built_in = built_in_decls [BUILT_IN_OMP_GET_THREAD_NUM];
+ }
+ else if (DECL_NAME (decl) == num_thr_id)
+ built_in = built_in_decls [BUILT_IN_OMP_GET_NUM_THREADS];
+ else
+ continue;
+
+ if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
+ || call_expr_nargs (call) != 0)
+ continue;
+
+ if (flag_exceptions && !TREE_NOTHROW (decl))
+ continue;
+
+ if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
+ || TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (decl)))
+ != TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (built_in))))
+ continue;
+
+ CALL_EXPR_FN (call) = build_fold_addr_expr (built_in);
+ }
+ }
+}
+
+/* Expand the OpenMP parallel or task directive starting at REGION. */
static void
-expand_omp_parallel (struct omp_region *region)
+expand_omp_taskreg (struct omp_region *region)
{
basic_block entry_bb, exit_bb, new_bb;
- struct function *child_cfun, *saved_cfun;
+ struct function *child_cfun;
tree child_fn, block, t, ws_args;
block_stmt_iterator si;
tree entry_stmt;
edge e;
- bool do_cleanup_cfg = false;
entry_stmt = last_stmt (region->entry);
- child_fn = OMP_PARALLEL_FN (entry_stmt);
+ child_fn = OMP_TASKREG_FN (entry_stmt);
child_cfun = DECL_STRUCT_FUNCTION (child_fn);
- saved_cfun = cfun;
+ /* If this function has been already instrumented, make sure
+ the child function isn't instrumented again. */
+ child_cfun->after_tree_profile = cfun->after_tree_profile;
entry_bb = region->entry;
exit_bb = region->exit;
entry_succ_e = single_succ_edge (entry_bb);
si = bsi_last (entry_bb);
- gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_PARALLEL);
+ gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_PARALLEL
+ || TREE_CODE (bsi_stmt (si)) == OMP_TASK);
bsi_remove (&si, true);
new_bb = entry_bb;
- remove_edge (entry_succ_e);
if (exit_bb)
{
exit_succ_e = single_succ_edge (exit_bb);
make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
}
- do_cleanup_cfg = true;
+ remove_edge_and_dominated_blocks (entry_succ_e);
}
else
{
a function call that has been inlined, the original PARM_DECL
.OMP_DATA_I may have been converted into a different local
variable. In which case, we need to keep the assignment. */
- if (OMP_PARALLEL_DATA_ARG (entry_stmt))
+ if (OMP_TASKREG_DATA_ARG (entry_stmt))
{
basic_block entry_succ_bb = single_succ (entry_bb);
block_stmt_iterator si;
+ tree parcopy_stmt = NULL_TREE, arg, narg;
for (si = bsi_start (entry_succ_bb); ; bsi_next (&si))
{
STRIP_NOPS (arg);
if (TREE_CODE (arg) == ADDR_EXPR
&& TREE_OPERAND (arg, 0)
- == OMP_PARALLEL_DATA_ARG (entry_stmt))
+ == OMP_TASKREG_DATA_ARG (entry_stmt))
{
- if (GIMPLE_STMT_OPERAND (stmt, 0)
- == DECL_ARGUMENTS (child_fn))
- bsi_remove (&si, true);
- else
- GIMPLE_STMT_OPERAND (stmt, 1) = DECL_ARGUMENTS (child_fn);
+ parcopy_stmt = stmt;
break;
}
}
+
+ gcc_assert (parcopy_stmt != NULL_TREE);
+ arg = DECL_ARGUMENTS (child_fn);
+
+ if (!gimple_in_ssa_p (cfun))
+ {
+ if (GIMPLE_STMT_OPERAND (parcopy_stmt, 0) == arg)
+ bsi_remove (&si, true);
+ else
+ GIMPLE_STMT_OPERAND (parcopy_stmt, 1) = arg;
+ }
+ else
+ {
+ /* If we are in ssa form, we must load the value from the default
+ definition of the argument. That should not be defined now,
+ since the argument is not used uninitialized. */
+ gcc_assert (gimple_default_def (cfun, arg) == NULL);
+ narg = make_ssa_name (arg, build_empty_stmt ());
+ set_default_def (arg, narg);
+ GIMPLE_STMT_OPERAND (parcopy_stmt, 1) = narg;
+ update_stmt (parcopy_stmt);
+ }
}
/* Declare local variables needed in CHILD_CFUN. */
block = DECL_INITIAL (child_fn);
- BLOCK_VARS (block) = list2chain (child_cfun->unexpanded_var_list);
+ BLOCK_VARS (block) = list2chain (child_cfun->local_decls);
DECL_SAVED_TREE (child_fn) = bb_stmt_list (single_succ (entry_bb));
- /* Reset DECL_CONTEXT on locals and function arguments. */
- for (t = BLOCK_VARS (block); t; t = TREE_CHAIN (t))
- DECL_CONTEXT (t) = child_fn;
-
+ /* Reset DECL_CONTEXT on function arguments. */
for (t = DECL_ARGUMENTS (child_fn); t; t = TREE_CHAIN (t))
DECL_CONTEXT (t) = child_fn;
- /* Split ENTRY_BB at OMP_PARALLEL so that it can be moved to the
- child function. */
+ /* Split ENTRY_BB at OMP_PARALLEL or OMP_TASK, so that it can be
+ moved to the child function. */
si = bsi_last (entry_bb);
t = bsi_stmt (si);
- gcc_assert (t && TREE_CODE (t) == OMP_PARALLEL);
+ gcc_assert (t && (TREE_CODE (t) == OMP_PARALLEL
+ || TREE_CODE (t) == OMP_TASK));
bsi_remove (&si, true);
e = split_block (entry_bb, t);
entry_bb = e->dest;
single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
- /* Move the parallel region into CHILD_CFUN. We need to reset
- dominance information because the expansion of the inner
- regions has invalidated it. */
- free_dominance_info (CDI_DOMINATORS);
- new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb);
- if (exit_bb)
- single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
- DECL_STRUCT_FUNCTION (child_fn)->curr_properties
- = cfun->curr_properties;
- cgraph_add_new_function (child_fn, true);
-
/* Convert OMP_RETURN into a RETURN_EXPR. */
if (exit_bb)
{
bsi_insert_after (&si, t, BSI_SAME_STMT);
bsi_remove (&si, true);
}
- }
- /* Emit a library call to launch the children threads. */
- expand_parallel_call (region, new_bb, entry_stmt, ws_args);
+ /* Move the parallel region into CHILD_CFUN. */
+
+ if (gimple_in_ssa_p (cfun))
+ {
+ push_cfun (child_cfun);
+ init_tree_ssa (child_cfun);
+ init_ssa_operands ();
+ cfun->gimple_df->in_ssa_p = true;
+ pop_cfun ();
+ }
+ new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb);
+ if (exit_bb)
+ single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
- if (do_cleanup_cfg)
- {
- /* Clean up the unreachable sub-graph we created above. */
- free_dominance_info (CDI_DOMINATORS);
- free_dominance_info (CDI_POST_DOMINATORS);
- cleanup_tree_cfg ();
+ /* Inform the callgraph about the new function. */
+ DECL_STRUCT_FUNCTION (child_fn)->curr_properties
+ = cfun->curr_properties;
+ cgraph_add_new_function (child_fn, true);
+
+ /* Fix the callgraph edges for child_cfun. Those for cfun will be
+ fixed in a following pass. */
+ push_cfun (child_cfun);
+ if (optimize)
+ optimize_omp_library_calls (entry_stmt);
+ rebuild_cgraph_edges ();
+
+ /* Some EH regions might become dead, see PR34608. If
+ pass_cleanup_cfg isn't the first pass to happen with the
+ new child, these dead EH edges might cause problems.
+ Clean them up now. */
+ if (flag_exceptions)
+ {
+ basic_block bb;
+ tree save_current = current_function_decl;
+ bool changed = false;
+
+ current_function_decl = child_fn;
+ FOR_EACH_BB (bb)
+ changed |= tree_purge_dead_eh_edges (bb);
+ if (changed)
+ cleanup_tree_cfg ();
+ current_function_decl = save_current;
+ }
+ pop_cfun ();
}
+
+ /* Emit a library call to launch the children threads. */
+ if (TREE_CODE (entry_stmt) == OMP_PARALLEL)
+ expand_parallel_call (region, new_bb, entry_stmt, ws_args);
+ else
+ expand_task_call (new_bb, entry_stmt);
+ update_ssa (TODO_update_ssa_only_virtuals);
}
L3:
If this is a combined omp parallel loop, instead of the call to
- GOMP_loop_foo_start, we emit 'goto L2'. */
+ GOMP_loop_foo_start, we call GOMP_loop_foo_next.
+
+ For collapsed loops, given parameters:
+ collapse(3)
+ for (V1 = N11; V1 cond1 N12; V1 += STEP1)
+ for (V2 = N21; V2 cond2 N22; V2 += STEP2)
+ for (V3 = N31; V3 cond3 N32; V3 += STEP3)
+ BODY;
+
+ we generate pseudocode
+
+ if (cond3 is <)
+ adj = STEP3 - 1;
+ else
+ adj = STEP3 + 1;
+ count3 = (adj + N32 - N31) / STEP3;
+ if (cond2 is <)
+ adj = STEP2 - 1;
+ else
+ adj = STEP2 + 1;
+ count2 = (adj + N22 - N21) / STEP2;
+ if (cond1 is <)
+ adj = STEP1 - 1;
+ else
+ adj = STEP1 + 1;
+ count1 = (adj + N12 - N11) / STEP1;
+ count = count1 * count2 * count3;
+ more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
+ if (more) goto L0; else goto L3;
+ L0:
+ V = istart0;
+ T = V;
+ V3 = N31 + (T % count3) * STEP3;
+ T = T / count3;
+ V2 = N21 + (T % count2) * STEP2;
+ T = T / count2;
+ V1 = N11 + T * STEP1;
+ iend = iend0;
+ L1:
+ BODY;
+ V += 1;
+ if (V < iend) goto L10; else goto L2;
+ L10:
+ V3 += STEP3;
+ if (V3 cond3 N32) goto L1; else goto L11;
+ L11:
+ V3 = N31;
+ V2 += STEP2;
+ if (V2 cond2 N22) goto L1; else goto L12;
+ L12:
+ V2 = N21;
+ V1 += STEP1;
+ goto L1;
+ L2:
+ if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
+ L3:
+
+ */
static void
expand_omp_for_generic (struct omp_region *region,
enum built_in_function start_fn,
enum built_in_function next_fn)
{
- tree type, istart0, iend0, iend;
- tree t, list;
- basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb;
+ tree type, istart0, iend0, iend, phi;
+ tree t, vmain, vback, bias = NULL_TREE;
+ basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
basic_block l2_bb = NULL, l3_bb = NULL;
block_stmt_iterator si;
bool in_combined_parallel = is_combined_parallel (region);
bool broken_loop = region->cont == NULL;
+ edge e, ne;
+ tree *counts = NULL;
+ int i;
gcc_assert (!broken_loop || !in_combined_parallel);
+ gcc_assert (fd->iter_type == long_integer_type_node
+ || !in_combined_parallel);
- type = TREE_TYPE (fd->v);
-
- istart0 = create_tmp_var (long_integer_type_node, ".istart0");
- iend0 = create_tmp_var (long_integer_type_node, ".iend0");
- iend = create_tmp_var (type, NULL);
+ type = TREE_TYPE (fd->loop.v);
+ istart0 = create_tmp_var (fd->iter_type, ".istart0");
+ iend0 = create_tmp_var (fd->iter_type, ".iend0");
TREE_ADDRESSABLE (istart0) = 1;
TREE_ADDRESSABLE (iend0) = 1;
+ if (gimple_in_ssa_p (cfun))
+ {
+ add_referenced_var (istart0);
+ add_referenced_var (iend0);
+ }
+
+ /* See if we need to bias by LLONG_MIN. */
+ if (fd->iter_type == long_long_unsigned_type_node
+ && TREE_CODE (type) == INTEGER_TYPE
+ && !TYPE_UNSIGNED (type))
+ {
+ tree n1, n2;
+
+ if (fd->loop.cond_code == LT_EXPR)
+ {
+ n1 = fd->loop.n1;
+ n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
+ }
+ else
+ {
+ n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
+ n2 = fd->loop.n1;
+ }
+ if (TREE_CODE (n1) != INTEGER_CST
+ || TREE_CODE (n2) != INTEGER_CST
+ || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
+ bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
+ }
entry_bb = region->entry;
cont_bb = region->cont;
+ collapse_bb = NULL;
gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
gcc_assert (broken_loop
|| BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
exit_bb = region->exit;
si = bsi_last (entry_bb);
+
gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
- if (!in_combined_parallel)
+ if (fd->collapse > 1)
+ {
+ /* collapsed loops need work for expansion in SSA form. */
+ gcc_assert (!gimple_in_ssa_p (cfun));
+ counts = (tree *) alloca (fd->collapse * sizeof (tree));
+ for (i = 0; i < fd->collapse; i++)
+ {
+ tree itype = TREE_TYPE (fd->loops[i].v);
+
+ if (POINTER_TYPE_P (itype))
+ itype = lang_hooks.types.type_for_size (TYPE_PRECISION (itype), 0);
+ t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
+ ? -1 : 1));
+ t = fold_build2 (PLUS_EXPR, itype,
+ fold_convert (itype, fd->loops[i].step), t);
+ t = fold_build2 (PLUS_EXPR, itype, t,
+ fold_convert (itype, fd->loops[i].n2));
+ t = fold_build2 (MINUS_EXPR, itype, t,
+ fold_convert (itype, fd->loops[i].n1));
+ if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
+ t = fold_build2 (TRUNC_DIV_EXPR, itype,
+ fold_build1 (NEGATE_EXPR, itype, t),
+ fold_build1 (NEGATE_EXPR, itype,
+ fold_convert (itype,
+ fd->loops[i].step)));
+ else
+ t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
+ fold_convert (itype, fd->loops[i].step));
+ t = fold_convert (type, t);
+ if (TREE_CODE (t) == INTEGER_CST)
+ counts[i] = t;
+ else
+ {
+ counts[i] = create_tmp_var (type, ".count");
+ t = build_gimple_modify_stmt (counts[i], t);
+ force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ true, BSI_SAME_STMT);
+ }
+ if (SSA_VAR_P (fd->loop.n2))
+ {
+ if (i == 0)
+ t = build_gimple_modify_stmt (fd->loop.n2, counts[0]);
+ else
+ {
+ t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
+ t = build_gimple_modify_stmt (fd->loop.n2, t);
+ }
+ force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ true, BSI_SAME_STMT);
+ }
+ }
+ }
+ if (in_combined_parallel)
+ {
+ /* In a combined parallel loop, emit a call to
+ GOMP_loop_foo_next. */
+ t = build_call_expr (built_in_decls[next_fn], 2,
+ build_fold_addr_expr (istart0),
+ build_fold_addr_expr (iend0));
+ }
+ else
{
tree t0, t1, t2, t3, t4;
/* If this is not a combined parallel loop, emit a call to
GOMP_loop_foo_start in ENTRY_BB. */
- list = alloc_stmt_list ();
t4 = build_fold_addr_expr (iend0);
t3 = build_fold_addr_expr (istart0);
- t2 = fold_convert (long_integer_type_node, fd->step);
- t1 = fold_convert (long_integer_type_node, fd->n2);
- t0 = fold_convert (long_integer_type_node, fd->n1);
- if (fd->chunk_size)
+ t2 = fold_convert (fd->iter_type, fd->loop.step);
+ t1 = fold_convert (fd->iter_type, fd->loop.n2);
+ t0 = fold_convert (fd->iter_type, fd->loop.n1);
+ if (bias)
{
- t = fold_convert (long_integer_type_node, fd->chunk_size);
- t = build_call_expr (built_in_decls[start_fn], 6,
- t0, t1, t2, t, t3, t4);
+ t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
+ t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
+ }
+ if (fd->iter_type == long_integer_type_node)
+ {
+ if (fd->chunk_size)
+ {
+ t = fold_convert (fd->iter_type, fd->chunk_size);
+ t = build_call_expr (built_in_decls[start_fn], 6,
+ t0, t1, t2, t, t3, t4);
+ }
+ else
+ t = build_call_expr (built_in_decls[start_fn], 5,
+ t0, t1, t2, t3, t4);
}
else
- t = build_call_expr (built_in_decls[start_fn], 5,
- t0, t1, t2, t3, t4);
- t = get_formal_tmp_var (t, &list);
- t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE);
- append_to_statement_list (t, &list);
- bsi_insert_after (&si, list, BSI_SAME_STMT);
+ {
+ tree t5;
+ tree c_bool_type;
+
+ /* The GOMP_loop_ull_*start functions have additional boolean
+ argument, true for < loops and false for > loops.
+ In Fortran, the C bool type can be different from
+ boolean_type_node. */
+ c_bool_type = TREE_TYPE (TREE_TYPE (built_in_decls[start_fn]));
+ t5 = build_int_cst (c_bool_type,
+ fd->loop.cond_code == LT_EXPR ? 1 : 0);
+ if (fd->chunk_size)
+ {
+ t = fold_convert (fd->iter_type, fd->chunk_size);
+ t = build_call_expr (built_in_decls[start_fn], 7,
+ t5, t0, t1, t2, t, t3, t4);
+ }
+ else
+ t = build_call_expr (built_in_decls[start_fn], 6,
+ t5, t0, t1, t2, t3, t4);
+ }
}
+ if (TREE_TYPE (t) != boolean_type_node)
+ t = fold_build2 (NE_EXPR, boolean_type_node,
+ t, build_int_cst (TREE_TYPE (t), 0));
+ t = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ true, BSI_SAME_STMT);
+ t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE);
+ bsi_insert_after (&si, t, BSI_SAME_STMT);
+
+ /* Remove the OMP_FOR statement. */
bsi_remove (&si, true);
/* Iteration setup for sequential loop goes in L0_BB. */
- list = alloc_stmt_list ();
- t = fold_convert (type, istart0);
- t = build_gimple_modify_stmt (fd->v, t);
- gimplify_and_add (t, &list);
+ si = bsi_start (l0_bb);
+ if (bias)
+ t = fold_convert (type, fold_build2 (MINUS_EXPR, fd->iter_type,
+ istart0, bias));
+ else
+ t = fold_convert (type, istart0);
+ t = force_gimple_operand_bsi (&si, t, false, NULL_TREE,
+ false, BSI_CONTINUE_LINKING);
+ t = build_gimple_modify_stmt (fd->loop.v, t);
+ bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
+ if (gimple_in_ssa_p (cfun))
+ SSA_NAME_DEF_STMT (fd->loop.v) = t;
- t = fold_convert (type, iend0);
- t = build_gimple_modify_stmt (iend, t);
- gimplify_and_add (t, &list);
+ if (bias)
+ t = fold_convert (type, fold_build2 (MINUS_EXPR, fd->iter_type,
+ iend0, bias));
+ else
+ t = fold_convert (type, iend0);
+ iend = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ false, BSI_CONTINUE_LINKING);
+ if (fd->collapse > 1)
+ {
+ tree tem = create_tmp_var (type, ".tem");
- si = bsi_start (l0_bb);
- bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
+ t = build_gimple_modify_stmt (tem, fd->loop.v);
+ bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
+ for (i = fd->collapse - 1; i >= 0; i--)
+ {
+ tree vtype = TREE_TYPE (fd->loops[i].v), itype;
+ itype = vtype;
+ if (POINTER_TYPE_P (vtype))
+ itype = lang_hooks.types.type_for_size (TYPE_PRECISION (vtype), 0);
+ t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
+ t = fold_convert (itype, t);
+ t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].step);
+ if (POINTER_TYPE_P (vtype))
+ t = fold_build2 (POINTER_PLUS_EXPR, vtype,
+ fd->loops[i].n1, fold_convert (sizetype, t));
+ else
+ t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
+ t = build_gimple_modify_stmt (fd->loops[i].v, t);
+ force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ false, BSI_CONTINUE_LINKING);
+ if (i != 0)
+ {
+ t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
+ t = build_gimple_modify_stmt (tem, t);
+ force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ false, BSI_CONTINUE_LINKING);
+ }
+ }
+ }
if (!broken_loop)
{
/* Code to control the increment and predicate for the sequential
loop goes in the CONT_BB. */
- list = alloc_stmt_list ();
+ si = bsi_last (cont_bb);
+ t = bsi_stmt (si);
+ gcc_assert (TREE_CODE (t) == OMP_CONTINUE);
+ vmain = TREE_OPERAND (t, 1);
+ vback = TREE_OPERAND (t, 0);
- t = build2 (PLUS_EXPR, type, fd->v, fd->step);
- t = build_gimple_modify_stmt (fd->v, t);
- gimplify_and_add (t, &list);
+ if (POINTER_TYPE_P (type))
+ t = fold_build2 (POINTER_PLUS_EXPR, type, vmain,
+ fold_convert (sizetype, fd->loop.step));
+ else
+ t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
+ t = force_gimple_operand_bsi (&si, t, false, NULL_TREE,
+ true, BSI_SAME_STMT);
+ t = build_gimple_modify_stmt (vback, t);
+ bsi_insert_before (&si, t, BSI_SAME_STMT);
+ if (gimple_in_ssa_p (cfun))
+ SSA_NAME_DEF_STMT (vback) = t;
- t = build2 (fd->cond_code, boolean_type_node, fd->v, iend);
- t = get_formal_tmp_var (t, &list);
+ t = build2 (fd->loop.cond_code, boolean_type_node, vback, iend);
t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE);
- append_to_statement_list (t, &list);
+ bsi_insert_before (&si, t, BSI_SAME_STMT);
- si = bsi_last (cont_bb);
- bsi_insert_after (&si, list, BSI_SAME_STMT);
- gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
+ /* Remove OMP_CONTINUE. */
bsi_remove (&si, true);
+ if (fd->collapse > 1)
+ {
+ basic_block last_bb, bb;
+
+ last_bb = cont_bb;
+ for (i = fd->collapse - 1; i >= 0; i--)
+ {
+ tree vtype = TREE_TYPE (fd->loops[i].v);
+
+ bb = create_empty_bb (last_bb);
+ si = bsi_start (bb);
+
+ if (i < fd->collapse - 1)
+ {
+ e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
+ e->probability = REG_BR_PROB_BASE / 8;
+
+ t = build_gimple_modify_stmt (fd->loops[i + 1].v,
+ fd->loops[i + 1].n1);
+ force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ false, BSI_CONTINUE_LINKING);
+ }
+ else
+ collapse_bb = bb;
+
+ set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
+
+ if (POINTER_TYPE_P (vtype))
+ t = fold_build2 (POINTER_PLUS_EXPR, vtype,
+ fd->loops[i].v,
+ fold_convert (sizetype, fd->loops[i].step));
+ else
+ t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v,
+ fd->loops[i].step);
+ t = build_gimple_modify_stmt (fd->loops[i].v, t);
+ force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ false, BSI_CONTINUE_LINKING);
+
+ if (i > 0)
+ {
+ t = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
+ fd->loops[i].v, fd->loops[i].n2);
+ t = force_gimple_operand_bsi (&si, t, false, NULL_TREE,
+ false, BSI_CONTINUE_LINKING);
+ t = build3 (COND_EXPR, void_type_node, t,
+ NULL_TREE, NULL_TREE);
+ bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
+ e = make_edge (bb, l1_bb, EDGE_TRUE_VALUE);
+ e->probability = REG_BR_PROB_BASE * 7 / 8;
+ }
+ else
+ make_edge (bb, l1_bb, EDGE_FALLTHRU);
+ last_bb = bb;
+ }
+ }
+
/* Emit code to get the next parallel iteration in L2_BB. */
- list = alloc_stmt_list ();
+ si = bsi_start (l2_bb);
t = build_call_expr (built_in_decls[next_fn], 2,
build_fold_addr_expr (istart0),
build_fold_addr_expr (iend0));
- t = get_formal_tmp_var (t, &list);
+ if (TREE_TYPE (t) != boolean_type_node)
+ t = fold_build2 (NE_EXPR, boolean_type_node,
+ t, build_int_cst (TREE_TYPE (t), 0));
+ t = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ false, BSI_CONTINUE_LINKING);
t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE);
- append_to_statement_list (t, &list);
-
- si = bsi_start (l2_bb);
- bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
+ bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
}
/* Add the loop cleanup function. */
bsi_remove (&si, true);
/* Connect the new blocks. */
- if (in_combined_parallel)
- {
- remove_edge (BRANCH_EDGE (entry_bb));
- redirect_edge_and_branch (single_succ_edge (entry_bb), l2_bb);
- }
- else
- {
- find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
- find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
- }
+ find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
+ find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
if (!broken_loop)
{
- find_edge (cont_bb, l1_bb)->flags = EDGE_TRUE_VALUE;
- remove_edge (find_edge (cont_bb, l3_bb));
- make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
+ e = find_edge (cont_bb, l3_bb);
+ ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
+
+ for (phi = phi_nodes (l3_bb); phi; phi = PHI_CHAIN (phi))
+ SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
+ PHI_ARG_DEF_FROM_EDGE (phi, e));
+ remove_edge (e);
+ make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
+ if (fd->collapse > 1)
+ {
+ e = find_edge (cont_bb, l1_bb);
+ remove_edge (e);
+ e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
+ }
+ else
+ {
+ e = find_edge (cont_bb, l1_bb);
+ e->flags = EDGE_TRUE_VALUE;
+ }
+ e->probability = REG_BR_PROB_BASE * 7 / 8;
+ find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8;
make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
- make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
+
+ set_immediate_dominator (CDI_DOMINATORS, l2_bb,
+ recompute_dominator (CDI_DOMINATORS, l2_bb));
+ set_immediate_dominator (CDI_DOMINATORS, l3_bb,
+ recompute_dominator (CDI_DOMINATORS, l3_bb));
+ set_immediate_dominator (CDI_DOMINATORS, l0_bb,
+ recompute_dominator (CDI_DOMINATORS, l0_bb));
+ set_immediate_dominator (CDI_DOMINATORS, l1_bb,
+ recompute_dominator (CDI_DOMINATORS, l1_bb));
}
}
adj = STEP - 1;
else
adj = STEP + 1;
- n = (adj + N2 - N1) / STEP;
+ if ((__typeof (V)) -1 > 0 && cond is >)
+ n = -(adj + N2 - N1) / -STEP;
+ else
+ n = (adj + N2 - N1) / STEP;
q = n / nthreads;
q += (q * nthreads != n);
s0 = q * threadid;
e0 = min(s0 + q, n);
+ V = s0 * STEP + N1;
if (s0 >= e0) goto L2; else goto L0;
L0:
- V = s0 * STEP + N1;
e = e0 * STEP + N1;
L1:
BODY;
struct omp_for_data *fd)
{
tree n, q, s0, e0, e, t, nthreads, threadid;
- tree type, list;
+ tree type, itype, vmain, vback;
basic_block entry_bb, exit_bb, seq_start_bb, body_bb, cont_bb;
basic_block fin_bb;
block_stmt_iterator si;
- type = TREE_TYPE (fd->v);
+ itype = type = TREE_TYPE (fd->loop.v);
+ if (POINTER_TYPE_P (type))
+ itype = lang_hooks.types.type_for_size (TYPE_PRECISION (type), 0);
entry_bb = region->entry;
cont_bb = region->cont;
exit_bb = region->exit;
/* Iteration space partitioning goes in ENTRY_BB. */
- list = alloc_stmt_list ();
+ si = bsi_last (entry_bb);
+ gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS], 0);
- t = fold_convert (type, t);
- nthreads = get_formal_tmp_var (t, &list);
+ t = fold_convert (itype, t);
+ nthreads = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ true, BSI_SAME_STMT);
t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM], 0);
- t = fold_convert (type, t);
- threadid = get_formal_tmp_var (t, &list);
-
- fd->n1 = fold_convert (type, fd->n1);
- if (!is_gimple_val (fd->n1))
- fd->n1 = get_formal_tmp_var (fd->n1, &list);
-
- fd->n2 = fold_convert (type, fd->n2);
- if (!is_gimple_val (fd->n2))
- fd->n2 = get_formal_tmp_var (fd->n2, &list);
-
- fd->step = fold_convert (type, fd->step);
- if (!is_gimple_val (fd->step))
- fd->step = get_formal_tmp_var (fd->step, &list);
-
- t = build_int_cst (type, (fd->cond_code == LT_EXPR ? -1 : 1));
- t = fold_build2 (PLUS_EXPR, type, fd->step, t);
- t = fold_build2 (PLUS_EXPR, type, t, fd->n2);
- t = fold_build2 (MINUS_EXPR, type, t, fd->n1);
- t = fold_build2 (TRUNC_DIV_EXPR, type, t, fd->step);
- t = fold_convert (type, t);
- if (is_gimple_val (t))
- n = t;
+ t = fold_convert (itype, t);
+ threadid = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ true, BSI_SAME_STMT);
+
+ fd->loop.n1
+ = force_gimple_operand_bsi (&si, fold_convert (type, fd->loop.n1),
+ true, NULL_TREE, true, BSI_SAME_STMT);
+ fd->loop.n2
+ = force_gimple_operand_bsi (&si, fold_convert (itype, fd->loop.n2),
+ true, NULL_TREE, true, BSI_SAME_STMT);
+ fd->loop.step
+ = force_gimple_operand_bsi (&si, fold_convert (itype, fd->loop.step),
+ true, NULL_TREE, true, BSI_SAME_STMT);
+
+ t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
+ t = fold_build2 (PLUS_EXPR, itype, fd->loop.step, t);
+ t = fold_build2 (PLUS_EXPR, itype, t, fd->loop.n2);
+ t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, fd->loop.n1));
+ if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
+ t = fold_build2 (TRUNC_DIV_EXPR, itype,
+ fold_build1 (NEGATE_EXPR, itype, t),
+ fold_build1 (NEGATE_EXPR, itype, fd->loop.step));
else
- n = get_formal_tmp_var (t, &list);
+ t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd->loop.step);
+ t = fold_convert (itype, t);
+ n = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT);
- t = build2 (TRUNC_DIV_EXPR, type, n, nthreads);
- q = get_formal_tmp_var (t, &list);
+ t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
+ q = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT);
- t = build2 (MULT_EXPR, type, q, nthreads);
- t = build2 (NE_EXPR, type, t, n);
- t = build2 (PLUS_EXPR, type, q, t);
- q = get_formal_tmp_var (t, &list);
+ t = fold_build2 (MULT_EXPR, itype, q, nthreads);
+ t = fold_build2 (NE_EXPR, itype, t, n);
+ t = fold_build2 (PLUS_EXPR, itype, q, t);
+ q = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT);
- t = build2 (MULT_EXPR, type, q, threadid);
- s0 = get_formal_tmp_var (t, &list);
+ t = build2 (MULT_EXPR, itype, q, threadid);
+ s0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT);
- t = build2 (PLUS_EXPR, type, s0, q);
- t = build2 (MIN_EXPR, type, t, n);
- e0 = get_formal_tmp_var (t, &list);
+ t = fold_build2 (PLUS_EXPR, itype, s0, q);
+ t = fold_build2 (MIN_EXPR, itype, t, n);
+ e0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT);
t = build2 (GE_EXPR, boolean_type_node, s0, e0);
t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE);
- append_to_statement_list (t, &list);
+ bsi_insert_before (&si, t, BSI_SAME_STMT);
- si = bsi_last (entry_bb);
- gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
- bsi_insert_after (&si, list, BSI_SAME_STMT);
+ /* Remove the OMP_FOR statement. */
bsi_remove (&si, true);
/* Setup code for sequential iteration goes in SEQ_START_BB. */
- list = alloc_stmt_list ();
-
- t = fold_convert (type, s0);
- t = build2 (MULT_EXPR, type, t, fd->step);
- t = build2 (PLUS_EXPR, type, t, fd->n1);
- t = build_gimple_modify_stmt (fd->v, t);
- gimplify_and_add (t, &list);
-
- t = fold_convert (type, e0);
- t = build2 (MULT_EXPR, type, t, fd->step);
- t = build2 (PLUS_EXPR, type, t, fd->n1);
- e = get_formal_tmp_var (t, &list);
-
si = bsi_start (seq_start_bb);
- bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
- /* The code controlling the sequential loop replaces the OMP_CONTINUE. */
- list = alloc_stmt_list ();
-
- t = build2 (PLUS_EXPR, type, fd->v, fd->step);
- t = build_gimple_modify_stmt (fd->v, t);
- gimplify_and_add (t, &list);
+ t = fold_convert (itype, s0);
+ t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
+ if (POINTER_TYPE_P (type))
+ t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1,
+ fold_convert (sizetype, t));
+ else
+ t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
+ t = force_gimple_operand_bsi (&si, t, false, NULL_TREE,
+ false, BSI_CONTINUE_LINKING);
+ t = build_gimple_modify_stmt (fd->loop.v, t);
+ bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
+ if (gimple_in_ssa_p (cfun))
+ SSA_NAME_DEF_STMT (fd->loop.v) = t;
+
+ t = fold_convert (itype, e0);
+ t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
+ if (POINTER_TYPE_P (type))
+ t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1,
+ fold_convert (sizetype, t));
+ else
+ t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
+ e = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ false, BSI_CONTINUE_LINKING);
- t = build2 (fd->cond_code, boolean_type_node, fd->v, e);
- t = get_formal_tmp_var (t, &list);
+ /* The code controlling the sequential loop replaces the OMP_CONTINUE. */
+ si = bsi_last (cont_bb);
+ t = bsi_stmt (si);
+ gcc_assert (TREE_CODE (t) == OMP_CONTINUE);
+ vmain = TREE_OPERAND (t, 1);
+ vback = TREE_OPERAND (t, 0);
+
+ if (POINTER_TYPE_P (type))
+ t = fold_build2 (POINTER_PLUS_EXPR, type, vmain,
+ fold_convert (sizetype, fd->loop.step));
+ else
+ t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
+ t = force_gimple_operand_bsi (&si, t, false, NULL_TREE,
+ true, BSI_SAME_STMT);
+ t = build_gimple_modify_stmt (vback, t);
+ bsi_insert_before (&si, t, BSI_SAME_STMT);
+ if (gimple_in_ssa_p (cfun))
+ SSA_NAME_DEF_STMT (vback) = t;
+
+ t = build2 (fd->loop.cond_code, boolean_type_node, vback, e);
t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE);
- append_to_statement_list (t, &list);
+ bsi_insert_before (&si, t, BSI_SAME_STMT);
- si = bsi_last (cont_bb);
- gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
- bsi_insert_after (&si, list, BSI_SAME_STMT);
+ /* Remove the OMP_CONTINUE statement. */
bsi_remove (&si, true);
/* Replace the OMP_RETURN with a barrier, or nothing. */
si = bsi_last (exit_bb);
if (!OMP_RETURN_NOWAIT (bsi_stmt (si)))
- {
- list = alloc_stmt_list ();
- build_omp_barrier (&list);
- bsi_insert_after (&si, list, BSI_SAME_STMT);
- }
+ force_gimple_operand_bsi (&si, build_omp_barrier (), false, NULL_TREE,
+ false, BSI_SAME_STMT);
bsi_remove (&si, true);
/* Connect all the blocks. */
find_edge (entry_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
find_edge (entry_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
-
+
find_edge (cont_bb, body_bb)->flags = EDGE_TRUE_VALUE;
find_edge (cont_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
+
+ set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, entry_bb);
+ set_immediate_dominator (CDI_DOMINATORS, body_bb,
+ recompute_dominator (CDI_DOMINATORS, body_bb));
+ set_immediate_dominator (CDI_DOMINATORS, fin_bb,
+ recompute_dominator (CDI_DOMINATORS, fin_bb));
}
adj = STEP - 1;
else
adj = STEP + 1;
- n = (adj + N2 - N1) / STEP;
+ if ((__typeof (V)) -1 > 0 && cond is >)
+ n = -(adj + N2 - N1) / -STEP;
+ else
+ n = (adj + N2 - N1) / STEP;
trip = 0;
+ V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
+ here so that V is defined
+ if the loop is not entered
L0:
s0 = (trip * nthreads + threadid) * CHUNK;
e0 = min(s0 + CHUNK, n);
*/
static void
-expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd)
+expand_omp_for_static_chunk (struct omp_region *region,
+ struct omp_for_data *fd)
{
- tree n, s0, e0, e, t;
- tree trip, nthreads, threadid;
- tree type;
+ tree n, s0, e0, e, t, phi, nphi, args;
+ tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
+ tree type, itype, cont, v_main, v_back, v_extra;
basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
basic_block trip_update_bb, cont_bb, fin_bb;
- tree list;
block_stmt_iterator si;
- edge se;
+ edge se, re, ene;
- type = TREE_TYPE (fd->v);
+ itype = type = TREE_TYPE (fd->loop.v);
+ if (POINTER_TYPE_P (type))
+ itype = lang_hooks.types.type_for_size (TYPE_PRECISION (type), 0);
entry_bb = region->entry;
se = split_block (entry_bb, last_stmt (entry_bb));
exit_bb = region->exit;
/* Trip and adjustment setup goes in ENTRY_BB. */
- list = alloc_stmt_list ();
+ si = bsi_last (entry_bb);
+ gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS], 0);
- t = fold_convert (type, t);
- nthreads = get_formal_tmp_var (t, &list);
+ t = fold_convert (itype, t);
+ nthreads = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ true, BSI_SAME_STMT);
t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM], 0);
- t = fold_convert (type, t);
- threadid = get_formal_tmp_var (t, &list);
-
- fd->n1 = fold_convert (type, fd->n1);
- if (!is_gimple_val (fd->n1))
- fd->n1 = get_formal_tmp_var (fd->n1, &list);
-
- fd->n2 = fold_convert (type, fd->n2);
- if (!is_gimple_val (fd->n2))
- fd->n2 = get_formal_tmp_var (fd->n2, &list);
-
- fd->step = fold_convert (type, fd->step);
- if (!is_gimple_val (fd->step))
- fd->step = get_formal_tmp_var (fd->step, &list);
-
- fd->chunk_size = fold_convert (type, fd->chunk_size);
- if (!is_gimple_val (fd->chunk_size))
- fd->chunk_size = get_formal_tmp_var (fd->chunk_size, &list);
-
- t = build_int_cst (type, (fd->cond_code == LT_EXPR ? -1 : 1));
- t = fold_build2 (PLUS_EXPR, type, fd->step, t);
- t = fold_build2 (PLUS_EXPR, type, t, fd->n2);
- t = fold_build2 (MINUS_EXPR, type, t, fd->n1);
- t = fold_build2 (TRUNC_DIV_EXPR, type, t, fd->step);
- t = fold_convert (type, t);
- if (is_gimple_val (t))
- n = t;
+ t = fold_convert (itype, t);
+ threadid = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ true, BSI_SAME_STMT);
+
+ fd->loop.n1
+ = force_gimple_operand_bsi (&si, fold_convert (type, fd->loop.n1),
+ true, NULL_TREE, true, BSI_SAME_STMT);
+ fd->loop.n2
+ = force_gimple_operand_bsi (&si, fold_convert (itype, fd->loop.n2),
+ true, NULL_TREE, true, BSI_SAME_STMT);
+ fd->loop.step
+ = force_gimple_operand_bsi (&si, fold_convert (itype, fd->loop.step),
+ true, NULL_TREE, true, BSI_SAME_STMT);
+ fd->chunk_size
+ = force_gimple_operand_bsi (&si, fold_convert (itype, fd->chunk_size),
+ true, NULL_TREE, true, BSI_SAME_STMT);
+
+ t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
+ t = fold_build2 (PLUS_EXPR, itype, fd->loop.step, t);
+ t = fold_build2 (PLUS_EXPR, itype, t, fd->loop.n2);
+ t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, fd->loop.n1));
+ if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
+ t = fold_build2 (TRUNC_DIV_EXPR, itype,
+ fold_build1 (NEGATE_EXPR, itype, t),
+ fold_build1 (NEGATE_EXPR, itype, fd->loop.step));
else
- n = get_formal_tmp_var (t, &list);
+ t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd->loop.step);
+ t = fold_convert (itype, t);
+ n = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ true, BSI_SAME_STMT);
- t = build_int_cst (type, 0);
- trip = get_initialized_tmp_var (t, &list, NULL);
+ trip_var = create_tmp_var (itype, ".trip");
+ if (gimple_in_ssa_p (cfun))
+ {
+ add_referenced_var (trip_var);
+ trip_init = make_ssa_name (trip_var, NULL_TREE);
+ trip_main = make_ssa_name (trip_var, NULL_TREE);
+ trip_back = make_ssa_name (trip_var, NULL_TREE);
+ }
+ else
+ {
+ trip_init = trip_var;
+ trip_main = trip_var;
+ trip_back = trip_var;
+ }
- si = bsi_last (entry_bb);
- gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
- bsi_insert_after (&si, list, BSI_SAME_STMT);
+ t = build_gimple_modify_stmt (trip_init, build_int_cst (itype, 0));
+ bsi_insert_before (&si, t, BSI_SAME_STMT);
+ if (gimple_in_ssa_p (cfun))
+ SSA_NAME_DEF_STMT (trip_init) = t;
+
+ t = fold_build2 (MULT_EXPR, itype, threadid, fd->chunk_size);
+ t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
+ if (POINTER_TYPE_P (type))
+ t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1,
+ fold_convert (sizetype, t));
+ else
+ t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
+ v_extra = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ true, BSI_SAME_STMT);
+
+ /* Remove the OMP_FOR. */
bsi_remove (&si, true);
/* Iteration space partitioning goes in ITER_PART_BB. */
- list = alloc_stmt_list ();
+ si = bsi_last (iter_part_bb);
- t = build2 (MULT_EXPR, type, trip, nthreads);
- t = build2 (PLUS_EXPR, type, t, threadid);
- t = build2 (MULT_EXPR, type, t, fd->chunk_size);
- s0 = get_formal_tmp_var (t, &list);
+ t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
+ t = fold_build2 (PLUS_EXPR, itype, t, threadid);
+ t = fold_build2 (MULT_EXPR, itype, t, fd->chunk_size);
+ s0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ false, BSI_CONTINUE_LINKING);
- t = build2 (PLUS_EXPR, type, s0, fd->chunk_size);
- t = build2 (MIN_EXPR, type, t, n);
- e0 = get_formal_tmp_var (t, &list);
+ t = fold_build2 (PLUS_EXPR, itype, s0, fd->chunk_size);
+ t = fold_build2 (MIN_EXPR, itype, t, n);
+ e0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ false, BSI_CONTINUE_LINKING);
t = build2 (LT_EXPR, boolean_type_node, s0, n);
t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE);
- append_to_statement_list (t, &list);
-
- si = bsi_start (iter_part_bb);
- bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
+ bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
/* Setup code for sequential iteration goes in SEQ_START_BB. */
- list = alloc_stmt_list ();
-
- t = fold_convert (type, s0);
- t = build2 (MULT_EXPR, type, t, fd->step);
- t = build2 (PLUS_EXPR, type, t, fd->n1);
- t = build_gimple_modify_stmt (fd->v, t);
- gimplify_and_add (t, &list);
-
- t = fold_convert (type, e0);
- t = build2 (MULT_EXPR, type, t, fd->step);
- t = build2 (PLUS_EXPR, type, t, fd->n1);
- e = get_formal_tmp_var (t, &list);
-
si = bsi_start (seq_start_bb);
- bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
+
+ t = fold_convert (itype, s0);
+ t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
+ if (POINTER_TYPE_P (type))
+ t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1,
+ fold_convert (sizetype, t));
+ else
+ t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
+ t = force_gimple_operand_bsi (&si, t, false, NULL_TREE,
+ false, BSI_CONTINUE_LINKING);
+ t = build_gimple_modify_stmt (fd->loop.v, t);
+ bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
+ if (gimple_in_ssa_p (cfun))
+ SSA_NAME_DEF_STMT (fd->loop.v) = t;
+
+ t = fold_convert (itype, e0);
+ t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
+ if (POINTER_TYPE_P (type))
+ t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1,
+ fold_convert (sizetype, t));
+ else
+ t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
+ e = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
+ false, BSI_CONTINUE_LINKING);
/* The code controlling the sequential loop goes in CONT_BB,
replacing the OMP_CONTINUE. */
- list = alloc_stmt_list ();
-
- t = build2 (PLUS_EXPR, type, fd->v, fd->step);
- t = build_gimple_modify_stmt (fd->v, t);
- gimplify_and_add (t, &list);
+ si = bsi_last (cont_bb);
+ cont = bsi_stmt (si);
+ gcc_assert (TREE_CODE (cont) == OMP_CONTINUE);
+ v_main = TREE_OPERAND (cont, 1);
+ v_back = TREE_OPERAND (cont, 0);
+
+ if (POINTER_TYPE_P (type))
+ t = fold_build2 (POINTER_PLUS_EXPR, type, v_main,
+ fold_convert (sizetype, fd->loop.step));
+ else
+ t = build2 (PLUS_EXPR, type, v_main, fd->loop.step);
+ t = build_gimple_modify_stmt (v_back, t);
+ bsi_insert_before (&si, t, BSI_SAME_STMT);
+ if (gimple_in_ssa_p (cfun))
+ SSA_NAME_DEF_STMT (v_back) = t;
- t = build2 (fd->cond_code, boolean_type_node, fd->v, e);
- t = get_formal_tmp_var (t, &list);
+ t = build2 (fd->loop.cond_code, boolean_type_node, v_back, e);
t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE);
- append_to_statement_list (t, &list);
+ bsi_insert_before (&si, t, BSI_SAME_STMT);
- si = bsi_last (cont_bb);
- gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
- bsi_insert_after (&si, list, BSI_SAME_STMT);
+ /* Remove OMP_CONTINUE. */
bsi_remove (&si, true);
/* Trip update code goes into TRIP_UPDATE_BB. */
- list = alloc_stmt_list ();
-
- t = build_int_cst (type, 1);
- t = build2 (PLUS_EXPR, type, trip, t);
- t = build_gimple_modify_stmt (trip, t);
- gimplify_and_add (t, &list);
-
si = bsi_start (trip_update_bb);
- bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
+
+ t = build_int_cst (itype, 1);
+ t = build2 (PLUS_EXPR, itype, trip_main, t);
+ t = build_gimple_modify_stmt (trip_back, t);
+ bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
+ if (gimple_in_ssa_p (cfun))
+ SSA_NAME_DEF_STMT (trip_back) = t;
/* Replace the OMP_RETURN with a barrier, or nothing. */
si = bsi_last (exit_bb);
if (!OMP_RETURN_NOWAIT (bsi_stmt (si)))
- {
- list = alloc_stmt_list ();
- build_omp_barrier (&list);
- bsi_insert_after (&si, list, BSI_SAME_STMT);
- }
+ force_gimple_operand_bsi (&si, build_omp_barrier (), false, NULL_TREE,
+ false, BSI_SAME_STMT);
bsi_remove (&si, true);
/* Connect the new blocks. */
find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
-
+
find_edge (cont_bb, body_bb)->flags = EDGE_TRUE_VALUE;
find_edge (cont_bb, trip_update_bb)->flags = EDGE_FALSE_VALUE;
-
+
redirect_edge_and_branch (single_succ_edge (trip_update_bb), iter_part_bb);
+
+ if (gimple_in_ssa_p (cfun))
+ {
+ /* When we redirect the edge from trip_update_bb to iter_part_bb, we
+ remove arguments of the phi nodes in fin_bb. We need to create
+ appropriate phi nodes in iter_part_bb instead. */
+ se = single_pred_edge (fin_bb);
+ re = single_succ_edge (trip_update_bb);
+ ene = single_succ_edge (entry_bb);
+
+ args = PENDING_STMT (re);
+ PENDING_STMT (re) = NULL_TREE;
+ for (phi = phi_nodes (fin_bb);
+ phi && args;
+ phi = PHI_CHAIN (phi), args = TREE_CHAIN (args))
+ {
+ t = PHI_RESULT (phi);
+ gcc_assert (t == TREE_PURPOSE (args));
+ nphi = create_phi_node (t, iter_part_bb);
+ SSA_NAME_DEF_STMT (t) = nphi;
+
+ t = PHI_ARG_DEF_FROM_EDGE (phi, se);
+ /* A special case -- fd->loop.v is not yet computed in
+ iter_part_bb, we need to use v_extra instead. */
+ if (t == fd->loop.v)
+ t = v_extra;
+ add_phi_arg (nphi, t, ene);
+ add_phi_arg (nphi, TREE_VALUE (args), re);
+ }
+ gcc_assert (!phi && !args);
+ while ((phi = phi_nodes (fin_bb)) != NULL_TREE)
+ remove_phi_node (phi, NULL_TREE, false);
+
+ /* Make phi node for trip. */
+ phi = create_phi_node (trip_main, iter_part_bb);
+ SSA_NAME_DEF_STMT (trip_main) = phi;
+ add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb));
+ add_phi_arg (phi, trip_init, single_succ_edge (entry_bb));
+ }
+
+ set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
+ set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
+ recompute_dominator (CDI_DOMINATORS, iter_part_bb));
+ set_immediate_dominator (CDI_DOMINATORS, fin_bb,
+ recompute_dominator (CDI_DOMINATORS, fin_bb));
+ set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
+ recompute_dominator (CDI_DOMINATORS, seq_start_bb));
+ set_immediate_dominator (CDI_DOMINATORS, body_bb,
+ recompute_dominator (CDI_DOMINATORS, body_bb));
}
expand_omp_for (struct omp_region *region)
{
struct omp_for_data fd;
+ struct omp_for_data_loop *loops;
- push_gimplify_context ();
+ loops
+ = (struct omp_for_data_loop *)
+ alloca (TREE_VEC_LENGTH (OMP_FOR_INIT (last_stmt (region->entry)))
+ * sizeof (struct omp_for_data_loop));
- extract_omp_for_data (last_stmt (region->entry), &fd);
+ extract_omp_for_data (last_stmt (region->entry), &fd, loops);
region->sched_kind = fd.sched_kind;
+ gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
+ BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
+ FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
+ if (region->cont)
+ {
+ gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
+ BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
+ FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
+ }
+
if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
&& !fd.have_ordered
+ && fd.collapse == 1
&& region->cont != NULL)
{
if (fd.chunk_size == NULL)
}
else
{
- int fn_index = fd.sched_kind + fd.have_ordered * 4;
- int start_ix = BUILT_IN_GOMP_LOOP_STATIC_START + fn_index;
- int next_ix = BUILT_IN_GOMP_LOOP_STATIC_NEXT + fn_index;
+ int fn_index, start_ix, next_ix;
+
+ gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
+ fn_index = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME)
+ ? 3 : fd.sched_kind;
+ fn_index += fd.have_ordered * 4;
+ start_ix = BUILT_IN_GOMP_LOOP_STATIC_START + fn_index;
+ next_ix = BUILT_IN_GOMP_LOOP_STATIC_NEXT + fn_index;
+ if (fd.iter_type == long_long_unsigned_type_node)
+ {
+ start_ix += BUILT_IN_GOMP_LOOP_ULL_STATIC_START
+ - BUILT_IN_GOMP_LOOP_STATIC_START;
+ next_ix += BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
+ - BUILT_IN_GOMP_LOOP_STATIC_NEXT;
+ }
expand_omp_for_generic (region, &fd, start_ix, next_ix);
}
- pop_gimplify_context (NULL);
+ update_ssa (TODO_update_ssa_only_virtuals);
}
reduction;
If this is a combined parallel sections, replace the call to
- GOMP_sections_start with 'goto L1'. */
+ GOMP_sections_start with call to GOMP_sections_next. */
static void
expand_omp_sections (struct omp_region *region)
{
- tree label_vec, l1, l2, t, u, v, sections_stmt;
+ tree label_vec, l1, l2, t, u, sections_stmt, vin, vmain, vnext, cont;
unsigned i, casei, len;
basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
block_stmt_iterator si;
+ edge_iterator ei;
+ edge e;
struct omp_region *inner;
bool exit_reachable = region->cont != NULL;
l2_bb = region->exit;
if (exit_reachable)
{
- gcc_assert (single_pred (l2_bb) == l0_bb);
+ if (single_pred (l2_bb) == l0_bb)
+ l2 = tree_block_label (l2_bb);
+ else
+ {
+ /* This can happen if there are reductions. */
+ len = EDGE_COUNT (l0_bb->succs);
+ gcc_assert (len > 0);
+ e = EDGE_SUCC (l0_bb, len - 1);
+ si = bsi_last (e->dest);
+ l2 = NULL_TREE;
+ if (bsi_end_p (si) || TREE_CODE (bsi_stmt (si)) != OMP_SECTION)
+ l2 = tree_block_label (e->dest);
+ else
+ FOR_EACH_EDGE (e, ei, l0_bb->succs)
+ {
+ si = bsi_last (e->dest);
+ if (bsi_end_p (si) || TREE_CODE (bsi_stmt (si)) != OMP_SECTION)
+ {
+ l2 = tree_block_label (e->dest);
+ break;
+ }
+ }
+ }
default_bb = create_empty_bb (l1_bb->prev_bb);
l1 = tree_block_label (l1_bb);
- l2 = tree_block_label (l2_bb);
}
else
{
si = bsi_last (entry_bb);
sections_stmt = bsi_stmt (si);
gcc_assert (TREE_CODE (sections_stmt) == OMP_SECTIONS);
- v = OMP_SECTIONS_CONTROL (sections_stmt);
+ vin = OMP_SECTIONS_CONTROL (sections_stmt);
if (!is_combined_parallel (region))
{
/* If we are not inside a combined parallel+sections region,
exit_reachable ? len - 1 : len);
u = built_in_decls[BUILT_IN_GOMP_SECTIONS_START];
t = build_call_expr (u, 1, t);
- t = build_gimple_modify_stmt (v, t);
- bsi_insert_after (&si, t, BSI_SAME_STMT);
}
+ else
+ {
+ /* Otherwise, call GOMP_sections_next. */
+ u = built_in_decls[BUILT_IN_GOMP_SECTIONS_NEXT];
+ t = build_call_expr (u, 0);
+ }
+ t = build_gimple_modify_stmt (vin, t);
+ bsi_insert_after (&si, t, BSI_SAME_STMT);
+ if (gimple_in_ssa_p (cfun))
+ SSA_NAME_DEF_STMT (vin) = t;
bsi_remove (&si, true);
/* The switch() statement replacing OMP_SECTIONS_SWITCH goes in L0_BB. */
si = bsi_last (l0_bb);
gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SECTIONS_SWITCH);
+ if (exit_reachable)
+ {
+ cont = last_stmt (l1_bb);
+ gcc_assert (TREE_CODE (cont) == OMP_CONTINUE);
+ vmain = TREE_OPERAND (cont, 1);
+ vnext = TREE_OPERAND (cont, 0);
+ }
+ else
+ {
+ vmain = vin;
+ vnext = NULL_TREE;
+ }
- t = build3 (SWITCH_EXPR, void_type_node, v, NULL, label_vec);
+ t = build3 (SWITCH_EXPR, void_type_node, vmain, NULL, label_vec);
bsi_insert_after (&si, t, BSI_SAME_STMT);
bsi_remove (&si, true);
{
basic_block s_entry_bb, s_exit_bb;
+ /* Skip optional reduction region. */
+ if (inner->type == OMP_ATOMIC_LOAD)
+ {
+ --i;
+ --casei;
+ continue;
+ }
+
s_entry_bb = inner->entry;
s_exit_bb = inner->exit;
gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
t = build_call_expr (built_in_decls[BUILT_IN_GOMP_SECTIONS_NEXT], 0);
- t = build_gimple_modify_stmt (v, t);
+ t = build_gimple_modify_stmt (vnext, t);
bsi_insert_after (&si, t, BSI_SAME_STMT);
+ if (gimple_in_ssa_p (cfun))
+ SSA_NAME_DEF_STMT (vnext) = t;
bsi_remove (&si, true);
single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
bsi_remove (&si, true);
}
- /* Connect the new blocks. */
- if (is_combined_parallel (region))
- {
- /* If this was a combined parallel+sections region, we did not
- emit a GOMP_sections_start in the entry block, so we just
- need to jump to L1_BB to get the next section. */
- gcc_assert (exit_reachable);
- redirect_edge_and_branch (single_succ_edge (entry_bb), l1_bb);
- }
+ set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
}
si = bsi_last (exit_bb);
if (!OMP_RETURN_NOWAIT (bsi_stmt (si)) || need_barrier)
- {
- tree t = alloc_stmt_list ();
- build_omp_barrier (&t);
- bsi_insert_after (&si, t, BSI_SAME_STMT);
- }
+ force_gimple_operand_bsi (&si, build_omp_barrier (), false, NULL_TREE,
+ false, BSI_SAME_STMT);
bsi_remove (&si, true);
single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
}
-
-/* Generic expansion for OpenMP synchronization directives: master,
- ordered and critical. All we need to do here is remove the entry
- and exit markers for REGION. */
+
+/* Generic expansion for OpenMP synchronization directives: master,
+ ordered and critical. All we need to do here is remove the entry
+ and exit markers for REGION. */
+
+static void
+expand_omp_synch (struct omp_region *region)
+{
+ basic_block entry_bb, exit_bb;
+ block_stmt_iterator si;
+
+ entry_bb = region->entry;
+ exit_bb = region->exit;
+
+ si = bsi_last (entry_bb);
+ gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SINGLE
+ || TREE_CODE (bsi_stmt (si)) == OMP_MASTER
+ || TREE_CODE (bsi_stmt (si)) == OMP_ORDERED
+ || TREE_CODE (bsi_stmt (si)) == OMP_CRITICAL);
+ bsi_remove (&si, true);
+ single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
+
+ if (exit_bb)
+ {
+ si = bsi_last (exit_bb);
+ gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
+ bsi_remove (&si, true);
+ single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
+ }
+}
+
+/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
+ operation as a __sync_fetch_and_op builtin. INDEX is log2 of the
+ size of the data type, and thus usable to find the index of the builtin
+ decl. Returns false if the expression is not of the proper form. */
+
+static bool
+expand_omp_atomic_fetch_op (basic_block load_bb,
+ tree addr, tree loaded_val,
+ tree stored_val, int index)
+{
+ enum built_in_function base;
+ tree decl, itype, call;
+ enum insn_code *optab;
+ tree rhs;
+ basic_block store_bb = single_succ (load_bb);
+ block_stmt_iterator bsi;
+ tree stmt;
+
+ /* We expect to find the following sequences:
+
+ load_bb:
+ OMP_ATOMIC_LOAD (tmp, mem)
+
+ store_bb:
+ val = tmp OP something; (or: something OP tmp)
+ OMP_STORE (val)
+
+ ???FIXME: Allow a more flexible sequence.
+ Perhaps use data flow to pick the statements.
+
+ */
+
+ bsi = bsi_after_labels (store_bb);
+ stmt = bsi_stmt (bsi);
+ if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
+ return false;
+ bsi_next (&bsi);
+ if (TREE_CODE (bsi_stmt (bsi)) != OMP_ATOMIC_STORE)
+ return false;
+
+ if (!operand_equal_p (GIMPLE_STMT_OPERAND (stmt, 0), stored_val, 0))
+ return false;
+
+ rhs = GIMPLE_STMT_OPERAND (stmt, 1);
+
+ /* Check for one of the supported fetch-op operations. */
+ switch (TREE_CODE (rhs))
+ {
+ case PLUS_EXPR:
+ case POINTER_PLUS_EXPR:
+ base = BUILT_IN_FETCH_AND_ADD_N;
+ optab = sync_add_optab;
+ break;
+ case MINUS_EXPR:
+ base = BUILT_IN_FETCH_AND_SUB_N;
+ optab = sync_add_optab;
+ break;
+ case BIT_AND_EXPR:
+ base = BUILT_IN_FETCH_AND_AND_N;
+ optab = sync_and_optab;
+ break;
+ case BIT_IOR_EXPR:
+ base = BUILT_IN_FETCH_AND_OR_N;
+ optab = sync_ior_optab;
+ break;
+ case BIT_XOR_EXPR:
+ base = BUILT_IN_FETCH_AND_XOR_N;
+ optab = sync_xor_optab;
+ break;
+ default:
+ return false;
+ }
+ /* Make sure the expression is of the proper form. */
+ if (operand_equal_p (TREE_OPERAND (rhs, 0), loaded_val, 0))
+ rhs = TREE_OPERAND (rhs, 1);
+ else if (commutative_tree_code (TREE_CODE (rhs))
+ && operand_equal_p (TREE_OPERAND (rhs, 1), loaded_val, 0))
+ rhs = TREE_OPERAND (rhs, 0);
+ else
+ return false;
+
+ decl = built_in_decls[base + index + 1];
+ itype = TREE_TYPE (TREE_TYPE (decl));
+
+ if (optab[TYPE_MODE (itype)] == CODE_FOR_nothing)
+ return false;
+
+ bsi = bsi_last (load_bb);
+ gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_LOAD);
+ call = build_call_expr (decl, 2, addr, fold_convert (itype, rhs));
+ force_gimple_operand_bsi (&bsi, call, true, NULL_TREE, true, BSI_SAME_STMT);
+ bsi_remove (&bsi, true);
+
+ bsi = bsi_last (store_bb);
+ gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_STORE);
+ bsi_remove (&bsi, true);
+ bsi = bsi_last (store_bb);
+ bsi_remove (&bsi, true);
+
+ if (gimple_in_ssa_p (cfun))
+ update_ssa (TODO_update_ssa_no_phi);
+
+ return true;
+}
+
+/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
+
+ oldval = *addr;
+ repeat:
+ newval = rhs; // with oldval replacing *addr in rhs
+ oldval = __sync_val_compare_and_swap (addr, oldval, newval);
+ if (oldval != newval)
+ goto repeat;
+
+ INDEX is log2 of the size of the data type, and thus usable to find the
+ index of the builtin decl. */
+
+static bool
+expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
+ tree addr, tree loaded_val, tree stored_val,
+ int index)
+{
+ tree loadedi, storedi, initial, new_storedi, old_vali;
+ tree type, itype, cmpxchg, iaddr;
+ block_stmt_iterator bsi;
+ basic_block loop_header = single_succ (load_bb);
+ tree phi, x;
+ edge e;
+
+ cmpxchg = built_in_decls[BUILT_IN_VAL_COMPARE_AND_SWAP_N + index + 1];
+ type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
+ itype = TREE_TYPE (TREE_TYPE (cmpxchg));
+
+ if (sync_compare_and_swap[TYPE_MODE (itype)] == CODE_FOR_nothing)
+ return false;
+
+ /* Load the initial value, replacing the OMP_ATOMIC_LOAD. */
+ bsi = bsi_last (load_bb);
+ gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_LOAD);
+ /* For floating-point values, we'll need to view-convert them to integers
+ so that we can perform the atomic compare and swap. Simplify the
+ following code by always setting up the "i"ntegral variables. */
+ if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
+ {
+ iaddr = create_tmp_var (build_pointer_type (itype), NULL);
+ x = build_gimple_modify_stmt (iaddr,
+ fold_convert (TREE_TYPE (iaddr), addr));
+ force_gimple_operand_bsi (&bsi, x, true, NULL_TREE,
+ true, BSI_SAME_STMT);
+ DECL_NO_TBAA_P (iaddr) = 1;
+ DECL_POINTER_ALIAS_SET (iaddr) = 0;
+ loadedi = create_tmp_var (itype, NULL);
+ if (gimple_in_ssa_p (cfun))
+ {
+ add_referenced_var (iaddr);
+ add_referenced_var (loadedi);
+ loadedi = make_ssa_name (loadedi, NULL);
+ }
+ }
+ else
+ {
+ iaddr = addr;
+ loadedi = loaded_val;
+ }
+ initial = force_gimple_operand_bsi (&bsi, build_fold_indirect_ref (iaddr),
+ true, NULL_TREE, true, BSI_SAME_STMT);
+
+ /* Move the value to the LOADEDI temporary. */
+ if (gimple_in_ssa_p (cfun))
+ {
+ gcc_assert (phi_nodes (loop_header) == NULL_TREE);
+ phi = create_phi_node (loadedi, loop_header);
+ SSA_NAME_DEF_STMT (loadedi) = phi;
+ SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
+ initial);
+ }
+ else
+ bsi_insert_before (&bsi,
+ build_gimple_modify_stmt (loadedi, initial),
+ BSI_SAME_STMT);
+ if (loadedi != loaded_val)
+ {
+ block_stmt_iterator bsi2;
+
+ x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
+ bsi2 = bsi_start (loop_header);
+ if (gimple_in_ssa_p (cfun))
+ {
+ x = force_gimple_operand_bsi (&bsi2, x, true, NULL_TREE,
+ true, BSI_SAME_STMT);
+ x = build_gimple_modify_stmt (loaded_val, x);
+ bsi_insert_before (&bsi2, x, BSI_SAME_STMT);
+ SSA_NAME_DEF_STMT (loaded_val) = x;
+ }
+ else
+ {
+ x = build_gimple_modify_stmt (loaded_val, x);
+ force_gimple_operand_bsi (&bsi2, x, true, NULL_TREE,
+ true, BSI_SAME_STMT);
+ }
+ }
+ bsi_remove (&bsi, true);
+
+ bsi = bsi_last (store_bb);
+ gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_STORE);
+
+ if (iaddr == addr)
+ storedi = stored_val;
+ else
+ storedi =
+ force_gimple_operand_bsi (&bsi,
+ build1 (VIEW_CONVERT_EXPR, itype,
+ stored_val), true, NULL_TREE, true,
+ BSI_SAME_STMT);
+
+ /* Build the compare&swap statement. */
+ new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
+ new_storedi = force_gimple_operand_bsi (&bsi,
+ fold_convert (itype, new_storedi),
+ true, NULL_TREE,
+ true, BSI_SAME_STMT);
+
+ if (gimple_in_ssa_p (cfun))
+ old_vali = loadedi;
+ else
+ {
+ old_vali = create_tmp_var (itype, NULL);
+ if (gimple_in_ssa_p (cfun))
+ add_referenced_var (old_vali);
+ x = build_gimple_modify_stmt (old_vali, loadedi);
+ force_gimple_operand_bsi (&bsi, x, true, NULL_TREE,
+ true, BSI_SAME_STMT);
+
+ x = build_gimple_modify_stmt (loadedi, new_storedi);
+ force_gimple_operand_bsi (&bsi, x, true, NULL_TREE,
+ true, BSI_SAME_STMT);
+ }
+
+ /* Note that we always perform the comparison as an integer, even for
+ floating point. This allows the atomic operation to properly
+ succeed even with NaNs and -0.0. */
+ x = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
+ x = build3 (COND_EXPR, void_type_node, x, NULL_TREE, NULL_TREE);
+ bsi_insert_before (&bsi, x, BSI_SAME_STMT);
+
+ /* Update cfg. */
+ e = single_succ_edge (store_bb);
+ e->flags &= ~EDGE_FALLTHRU;
+ e->flags |= EDGE_FALSE_VALUE;
+
+ e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
+
+ /* Copy the new value to loadedi (we already did that before the condition
+ if we are not in SSA). */
+ if (gimple_in_ssa_p (cfun))
+ {
+ phi = phi_nodes (loop_header);
+ SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
+ }
+
+ /* Remove OMP_ATOMIC_STORE. */
+ bsi_remove (&bsi, true);
+
+ if (gimple_in_ssa_p (cfun))
+ update_ssa (TODO_update_ssa_no_phi);
+
+ return true;
+}
+
+/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
+
+ GOMP_atomic_start ();
+ *addr = rhs;
+ GOMP_atomic_end ();
+
+ The result is not globally atomic, but works so long as all parallel
+ references are within #pragma omp atomic directives. According to
+ responses received from omp@openmp.org, appears to be within spec.
+ Which makes sense, since that's how several other compilers handle
+ this situation as well.
+ LOADED_VAL and ADDR are the operands of OMP_ATOMIC_LOAD we're expanding.
+ STORED_VAL is the operand of the matching OMP_ATOMIC_STORE.
+
+ We replace
+ OMP_ATOMIC_LOAD (loaded_val, addr) with
+ loaded_val = *addr;
+
+ and replace
+ OMP_ATOMIC_ATORE (stored_val) with
+ *addr = stored_val;
+*/
+
+static bool
+expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
+ tree addr, tree loaded_val, tree stored_val)
+{
+ block_stmt_iterator bsi;
+ tree t;
+
+ bsi = bsi_last (load_bb);
+ gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_LOAD);
+
+ t = built_in_decls[BUILT_IN_GOMP_ATOMIC_START];
+ t = build_function_call_expr (t, 0);
+ force_gimple_operand_bsi (&bsi, t, true, NULL_TREE, true, BSI_SAME_STMT);
+
+ t = build_gimple_modify_stmt (loaded_val, build_fold_indirect_ref (addr));
+ if (gimple_in_ssa_p (cfun))
+ SSA_NAME_DEF_STMT (loaded_val) = t;
+ bsi_insert_before (&bsi, t, BSI_SAME_STMT);
+ bsi_remove (&bsi, true);
+
+ bsi = bsi_last (store_bb);
+ gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_STORE);
+
+ t = build_gimple_modify_stmt (build_fold_indirect_ref (unshare_expr (addr)),
+ stored_val);
+ bsi_insert_before (&bsi, t, BSI_SAME_STMT);
+
+ t = built_in_decls[BUILT_IN_GOMP_ATOMIC_END];
+ t = build_function_call_expr (t, 0);
+ force_gimple_operand_bsi (&bsi, t, true, NULL_TREE, true, BSI_SAME_STMT);
+ bsi_remove (&bsi, true);
+
+ if (gimple_in_ssa_p (cfun))
+ update_ssa (TODO_update_ssa_no_phi);
+ return true;
+}
+
+/* Expand an OMP_ATOMIC statement. We try to expand
+ using expand_omp_atomic_fetch_op. If it failed, we try to
+ call expand_omp_atomic_pipeline, and if it fails too, the
+ ultimate fallback is wrapping the operation in a mutex
+ (expand_omp_atomic_mutex). REGION is the atomic region built
+ by build_omp_regions_1(). */
static void
-expand_omp_synch (struct omp_region *region)
+expand_omp_atomic (struct omp_region *region)
{
- basic_block entry_bb, exit_bb;
- block_stmt_iterator si;
-
- entry_bb = region->entry;
- exit_bb = region->exit;
+ basic_block load_bb = region->entry, store_bb = region->exit;
+ tree load = last_stmt (load_bb), store = last_stmt (store_bb);
+ tree loaded_val = TREE_OPERAND (load, 0);
+ tree addr = TREE_OPERAND (load, 1);
+ tree stored_val = TREE_OPERAND (store, 0);
+ tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
+ HOST_WIDE_INT index;
+
+ /* Make sure the type is one of the supported sizes. */
+ index = tree_low_cst (TYPE_SIZE_UNIT (type), 1);
+ index = exact_log2 (index);
+ if (index >= 0 && index <= 4)
+ {
+ unsigned int align = TYPE_ALIGN_UNIT (type);
- si = bsi_last (entry_bb);
- gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SINGLE
- || TREE_CODE (bsi_stmt (si)) == OMP_MASTER
- || TREE_CODE (bsi_stmt (si)) == OMP_ORDERED
- || TREE_CODE (bsi_stmt (si)) == OMP_CRITICAL);
- bsi_remove (&si, true);
- single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
+ /* __sync builtins require strict data alignment. */
+ if (exact_log2 (align) >= index)
+ {
+ /* When possible, use specialized atomic update functions. */
+ if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
+ && store_bb == single_succ (load_bb))
+ {
+ if (expand_omp_atomic_fetch_op (load_bb, addr,
+ loaded_val, stored_val, index))
+ return;
+ }
- if (exit_bb)
- {
- si = bsi_last (exit_bb);
- gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
- bsi_remove (&si, true);
- single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
+ /* If we don't have specialized __sync builtins, try and implement
+ as a compare and swap loop. */
+ if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
+ loaded_val, stored_val, index))
+ return;
+ }
}
+
+ /* The ultimate fallback is wrapping the operation in a mutex. */
+ expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
}
{
while (region)
{
+ /* First, determine whether this is a combined parallel+workshare
+ region. */
+ if (region->type == OMP_PARALLEL)
+ determine_parallel_type (region);
+
if (region->inner)
expand_omp (region->inner);
switch (region->type)
{
case OMP_PARALLEL:
- expand_omp_parallel (region);
+ expand_omp_taskreg (region);
+ break;
+
+ case OMP_TASK:
+ expand_omp_taskreg (region);
break;
case OMP_FOR:
expand_omp_synch (region);
break;
+ case OMP_ATOMIC_LOAD:
+ expand_omp_atomic (region);
+ break;
+
+
default:
gcc_unreachable ();
}
/* Helper for build_omp_regions. Scan the dominator tree starting at
- block BB. PARENT is the region that contains BB. */
+ block BB. PARENT is the region that contains BB. If SINGLE_TREE is
+ true, the function ends once a single tree is built (otherwise, whole
+ forest of OMP constructs may be built). */
static void
-build_omp_regions_1 (basic_block bb, struct omp_region *parent)
+build_omp_regions_1 (basic_block bb, struct omp_region *parent,
+ bool single_tree)
{
block_stmt_iterator si;
tree stmt;
stmt = bsi_stmt (si);
code = TREE_CODE (stmt);
-
if (code == OMP_RETURN)
{
/* STMT is the return point out of region PARENT. Mark it
region = parent;
region->exit = bb;
parent = parent->outer;
-
- /* If REGION is a parallel region, determine whether it is
- a combined parallel+workshare region. */
- if (region->type == OMP_PARALLEL)
- determine_parallel_type (region);
}
+ else if (code == OMP_ATOMIC_STORE)
+ {
+ /* OMP_ATOMIC_STORE is analogous to OMP_RETURN, but matches with
+ OMP_ATOMIC_LOAD. */
+ gcc_assert (parent);
+ gcc_assert (parent->type == OMP_ATOMIC_LOAD);
+ region = parent;
+ region->exit = bb;
+ parent = parent->outer;
+ }
+
else if (code == OMP_CONTINUE)
{
gcc_assert (parent);
else if (code == OMP_SECTIONS_SWITCH)
{
/* OMP_SECTIONS_SWITCH is part of OMP_SECTIONS, and we do nothing for
- it. */
+ it. */ ;
}
else
{
}
}
+ if (single_tree && !parent)
+ return;
+
for (son = first_dom_son (CDI_DOMINATORS, bb);
son;
son = next_dom_son (CDI_DOMINATORS, son))
- build_omp_regions_1 (son, parent);
+ build_omp_regions_1 (son, parent, single_tree);
+}
+
+/* Builds the tree of OMP regions rooted at ROOT, storing it to
+ root_omp_region. */
+
+static void
+build_omp_regions_root (basic_block root)
+{
+ gcc_assert (root_omp_region == NULL);
+ build_omp_regions_1 (root, NULL, true);
+ gcc_assert (root_omp_region != NULL);
}
+/* Expands omp construct (and its subconstructs) starting in HEAD. */
+
+void
+omp_expand_local (basic_block head)
+{
+ build_omp_regions_root (head);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nOMP region tree\n\n");
+ dump_omp_region (dump_file, root_omp_region, 0);
+ fprintf (dump_file, "\n");
+ }
+
+ remove_exit_barriers (root_omp_region);
+ expand_omp (root_omp_region);
+
+ free_omp_regions ();
+}
/* Scan the CFG and build a tree of OMP regions. Return the root of
the OMP region tree. */
{
gcc_assert (root_omp_region == NULL);
calculate_dominance_info (CDI_DOMINATORS);
- build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL);
+ build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL, false);
}
expand_omp (root_omp_region);
- free_dominance_info (CDI_DOMINATORS);
- free_dominance_info (CDI_POST_DOMINATORS);
cleanup_tree_cfg ();
free_omp_regions ();
return 0;
}
+/* OMP expansion -- the default pass, run before creation of SSA form. */
+
static bool
gate_expand_omp (void)
{
- return flag_openmp != 0 && errorcount == 0;
+ return (flag_openmp != 0 && errorcount == 0);
}
-struct tree_opt_pass pass_expand_omp =
+struct gimple_opt_pass pass_expand_omp =
{
+ {
+ GIMPLE_PASS,
"ompexp", /* name */
gate_expand_omp, /* gate */
execute_expand_omp, /* execute */
PROP_gimple_lomp, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
- TODO_dump_func, /* todo_flags_finish */
- 0 /* letter */
+ TODO_dump_func /* todo_flags_finish */
+ }
};
\f
/* Routines to lower OpenMP directives into OMP-GIMPLE. */
tree t;
t = build_call_expr (built_in_decls[BUILT_IN_GOMP_SINGLE_START], 0);
+ if (TREE_TYPE (t) != boolean_type_node)
+ t = fold_build2 (NE_EXPR, boolean_type_node,
+ t, build_int_cst (TREE_TYPE (t), 0));
t = build3 (COND_EXPR, void_type_node, t,
OMP_SINGLE_BODY (single_stmt), NULL);
gimplify_and_add (t, pre_p);
tree clauses, cond, stmts, vinit, t;
enum tree_code cond_code;
- cond_code = fd->cond_code;
+ cond_code = fd->loop.cond_code;
cond_code = cond_code == LT_EXPR ? GE_EXPR : LE_EXPR;
/* When possible, use a strict equality expression. This can let VRP
type optimizations deduce the value and remove a copy. */
- if (host_integerp (fd->step, 0))
+ if (host_integerp (fd->loop.step, 0))
{
- HOST_WIDE_INT step = TREE_INT_CST_LOW (fd->step);
+ HOST_WIDE_INT step = TREE_INT_CST_LOW (fd->loop.step);
if (step == 1 || step == -1)
cond_code = EQ_EXPR;
}
- cond = build2 (cond_code, boolean_type_node, fd->v, fd->n2);
+ cond = build2 (cond_code, boolean_type_node, fd->loop.v, fd->loop.n2);
clauses = OMP_FOR_CLAUSES (fd->for_stmt);
stmts = NULL;
lower_lastprivate_clauses (clauses, cond, &stmts, ctx);
if (stmts != NULL)
{
- append_to_statement_list (stmts, dlist);
+ append_to_statement_list (*dlist, &stmts);
+ *dlist = stmts;
/* Optimize: v = 0; is usually cheaper than v = some_other_constant. */
- vinit = fd->n1;
+ vinit = fd->loop.n1;
if (cond_code == EQ_EXPR
- && host_integerp (fd->n2, 0)
- && ! integer_zerop (fd->n2))
- vinit = build_int_cst (TREE_TYPE (fd->v), 0);
+ && host_integerp (fd->loop.n2, 0)
+ && ! integer_zerop (fd->loop.n2))
+ vinit = build_int_cst (TREE_TYPE (fd->loop.v), 0);
/* Initialize the iterator variable, so that threads that don't execute
any iterations don't execute the lastprivate clauses by accident. */
- t = build_gimple_modify_stmt (fd->v, vinit);
+ t = build_gimple_modify_stmt (fd->loop.v, vinit);
gimplify_and_add (t, body_p);
}
}
{
tree t, stmt, ilist, dlist, new_stmt, *body_p, *rhs_p;
struct omp_for_data fd;
+ int i;
stmt = *stmt_p;
/* The pre-body and input clauses go before the lowered OMP_FOR. */
ilist = NULL;
dlist = NULL;
- append_to_statement_list (OMP_FOR_PRE_BODY (stmt), body_p);
lower_rec_input_clauses (OMP_FOR_CLAUSES (stmt), body_p, &dlist, ctx);
+ append_to_statement_list (OMP_FOR_PRE_BODY (stmt), body_p);
/* Lower the header expressions. At this point, we can assume that
the header is of the form:
We just need to make sure that VAL1, VAL2 and VAL3 are lowered
using the .omp_data_s mapping, if needed. */
- rhs_p = &GIMPLE_STMT_OPERAND (OMP_FOR_INIT (stmt), 1);
- if (!is_gimple_min_invariant (*rhs_p))
- *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
-
- rhs_p = &TREE_OPERAND (OMP_FOR_COND (stmt), 1);
- if (!is_gimple_min_invariant (*rhs_p))
- *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
-
- rhs_p = &TREE_OPERAND (GIMPLE_STMT_OPERAND (OMP_FOR_INCR (stmt), 1), 1);
- if (!is_gimple_min_invariant (*rhs_p))
- *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
+ for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (stmt)); i++)
+ {
+ rhs_p = &GIMPLE_STMT_OPERAND (TREE_VEC_ELT (OMP_FOR_INIT (stmt), i), 1);
+ if (!is_gimple_min_invariant (*rhs_p))
+ *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
+
+ rhs_p = &TREE_OPERAND (TREE_VEC_ELT (OMP_FOR_COND (stmt), i), 1);
+ if (!is_gimple_min_invariant (*rhs_p))
+ *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
+
+ rhs_p = &TREE_OPERAND (GIMPLE_STMT_OPERAND
+ (TREE_VEC_ELT (OMP_FOR_INCR (stmt), i), 1), 1);
+ if (!is_gimple_min_invariant (*rhs_p))
+ *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
+ }
/* Once lowered, extract the bounds and clauses. */
- extract_omp_for_data (stmt, &fd);
+ extract_omp_for_data (stmt, &fd, NULL);
lower_omp_for_lastprivate (&fd, body_p, &dlist, ctx);
append_to_statement_list (OMP_FOR_BODY (stmt), body_p);
- t = build2 (OMP_CONTINUE, void_type_node, fd.v, fd.v);
+ t = build2 (OMP_CONTINUE, void_type_node, fd.loop.v, fd.loop.v);
append_to_statement_list (t, body_p);
/* After the loop, add exit clauses. */
return NULL;
}
-/* Lower the OpenMP parallel directive in *STMT_P. CTX holds context
+struct omp_taskcopy_context
+{
+ /* This field must be at the beginning, as we do "inheritance": Some
+ callback functions for tree-inline.c (e.g., omp_copy_decl)
+ receive a copy_body_data pointer that is up-casted to an
+ omp_context pointer. */
+ copy_body_data cb;
+ omp_context *ctx;
+};
+
+static tree
+task_copyfn_copy_decl (tree var, copy_body_data *cb)
+{
+ struct omp_taskcopy_context *tcctx = (struct omp_taskcopy_context *) cb;
+
+ if (splay_tree_lookup (tcctx->ctx->sfield_map, (splay_tree_key) var))
+ return create_tmp_var (TREE_TYPE (var), NULL);
+
+ return var;
+}
+
+static tree
+task_copyfn_remap_type (struct omp_taskcopy_context *tcctx, tree orig_type)
+{
+ tree name, new_fields = NULL, type, f;
+
+ type = lang_hooks.types.make_type (RECORD_TYPE);
+ name = DECL_NAME (TYPE_NAME (orig_type));
+ name = build_decl (TYPE_DECL, name, type);
+ TYPE_NAME (type) = name;
+
+ for (f = TYPE_FIELDS (orig_type); f ; f = TREE_CHAIN (f))
+ {
+ tree new_f = copy_node (f);
+ DECL_CONTEXT (new_f) = type;
+ TREE_TYPE (new_f) = remap_type (TREE_TYPE (f), &tcctx->cb);
+ TREE_CHAIN (new_f) = new_fields;
+ walk_tree (&DECL_SIZE (new_f), copy_body_r, &tcctx->cb, NULL);
+ walk_tree (&DECL_SIZE_UNIT (new_f), copy_body_r, &tcctx->cb, NULL);
+ walk_tree (&DECL_FIELD_OFFSET (new_f), copy_body_r, &tcctx->cb, NULL);
+ new_fields = new_f;
+ *pointer_map_insert (tcctx->cb.decl_map, f) = new_f;
+ }
+ TYPE_FIELDS (type) = nreverse (new_fields);
+ layout_type (type);
+ return type;
+}
+
+/* Create task copyfn. */
+
+static void
+create_task_copyfn (tree task_stmt, omp_context *ctx)
+{
+ struct function *child_cfun;
+ tree child_fn, t, c, src, dst, f, sf, arg, sarg, decl;
+ tree record_type, srecord_type, bind, list;
+ bool record_needs_remap = false, srecord_needs_remap = false;
+ splay_tree_node n;
+ struct omp_taskcopy_context tcctx;
+
+ child_fn = OMP_TASK_COPYFN (task_stmt);
+ child_cfun = DECL_STRUCT_FUNCTION (child_fn);
+ gcc_assert (child_cfun->cfg == NULL);
+ child_cfun->dont_save_pending_sizes_p = 1;
+ DECL_SAVED_TREE (child_fn) = alloc_stmt_list ();
+
+ /* Reset DECL_CONTEXT on function arguments. */
+ for (t = DECL_ARGUMENTS (child_fn); t; t = TREE_CHAIN (t))
+ DECL_CONTEXT (t) = child_fn;
+
+ /* Populate the function. */
+ push_gimplify_context ();
+ current_function_decl = child_fn;
+
+ bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
+ TREE_SIDE_EFFECTS (bind) = 1;
+ list = NULL;
+ DECL_SAVED_TREE (child_fn) = bind;
+ DECL_SOURCE_LOCATION (child_fn) = EXPR_LOCATION (task_stmt);
+
+ /* Remap src and dst argument types if needed. */
+ record_type = ctx->record_type;
+ srecord_type = ctx->srecord_type;
+ for (f = TYPE_FIELDS (record_type); f ; f = TREE_CHAIN (f))
+ if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn))
+ {
+ record_needs_remap = true;
+ break;
+ }
+ for (f = TYPE_FIELDS (srecord_type); f ; f = TREE_CHAIN (f))
+ if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn))
+ {
+ srecord_needs_remap = true;
+ break;
+ }
+
+ if (record_needs_remap || srecord_needs_remap)
+ {
+ memset (&tcctx, '\0', sizeof (tcctx));
+ tcctx.cb.src_fn = ctx->cb.src_fn;
+ tcctx.cb.dst_fn = child_fn;
+ tcctx.cb.src_node = cgraph_node (tcctx.cb.src_fn);
+ tcctx.cb.dst_node = tcctx.cb.src_node;
+ tcctx.cb.src_cfun = ctx->cb.src_cfun;
+ tcctx.cb.copy_decl = task_copyfn_copy_decl;
+ tcctx.cb.eh_region = -1;
+ tcctx.cb.transform_call_graph_edges = CB_CGE_MOVE;
+ tcctx.cb.decl_map = pointer_map_create ();
+ tcctx.ctx = ctx;
+
+ if (record_needs_remap)
+ record_type = task_copyfn_remap_type (&tcctx, record_type);
+ if (srecord_needs_remap)
+ srecord_type = task_copyfn_remap_type (&tcctx, srecord_type);
+ }
+ else
+ tcctx.cb.decl_map = NULL;
+
+ push_cfun (child_cfun);
+
+ arg = DECL_ARGUMENTS (child_fn);
+ TREE_TYPE (arg) = build_pointer_type (record_type);
+ sarg = TREE_CHAIN (arg);
+ TREE_TYPE (sarg) = build_pointer_type (srecord_type);
+
+ /* First pass: initialize temporaries used in record_type and srecord_type
+ sizes and field offsets. */
+ if (tcctx.cb.decl_map)
+ for (c = OMP_TASK_CLAUSES (task_stmt); c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
+ {
+ tree *p;
+
+ decl = OMP_CLAUSE_DECL (c);
+ p = (tree *) pointer_map_contains (tcctx.cb.decl_map, decl);
+ if (p == NULL)
+ continue;
+ n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl);
+ sf = (tree) n->value;
+ sf = *(tree *) pointer_map_contains (tcctx.cb.decl_map, sf);
+ src = build_fold_indirect_ref (sarg);
+ src = build3 (COMPONENT_REF, TREE_TYPE (sf), src, sf, NULL);
+ t = build_gimple_modify_stmt (*p, src);
+ append_to_statement_list (t, &list);
+ }
+
+ /* Second pass: copy shared var pointers and copy construct non-VLA
+ firstprivate vars. */
+ for (c = OMP_TASK_CLAUSES (task_stmt); c; c = OMP_CLAUSE_CHAIN (c))
+ switch (OMP_CLAUSE_CODE (c))
+ {
+ case OMP_CLAUSE_SHARED:
+ decl = OMP_CLAUSE_DECL (c);
+ n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
+ if (n == NULL)
+ break;
+ f = (tree) n->value;
+ if (tcctx.cb.decl_map)
+ f = *(tree *) pointer_map_contains (tcctx.cb.decl_map, f);
+ n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl);
+ sf = (tree) n->value;
+ if (tcctx.cb.decl_map)
+ sf = *(tree *) pointer_map_contains (tcctx.cb.decl_map, sf);
+ src = build_fold_indirect_ref (sarg);
+ src = build3 (COMPONENT_REF, TREE_TYPE (sf), src, sf, NULL);
+ dst = build_fold_indirect_ref (arg);
+ dst = build3 (COMPONENT_REF, TREE_TYPE (f), dst, f, NULL);
+ t = build_gimple_modify_stmt (dst, src);
+ append_to_statement_list (t, &list);
+ break;
+ case OMP_CLAUSE_FIRSTPRIVATE:
+ decl = OMP_CLAUSE_DECL (c);
+ if (is_variable_sized (decl))
+ break;
+ n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
+ if (n == NULL)
+ break;
+ f = (tree) n->value;
+ if (tcctx.cb.decl_map)
+ f = *(tree *) pointer_map_contains (tcctx.cb.decl_map, f);
+ n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl);
+ if (n != NULL)
+ {
+ sf = (tree) n->value;
+ if (tcctx.cb.decl_map)
+ sf = *(tree *) pointer_map_contains (tcctx.cb.decl_map, sf);
+ src = build_fold_indirect_ref (sarg);
+ src = build3 (COMPONENT_REF, TREE_TYPE (sf), src, sf, NULL);
+ if (use_pointer_for_field (decl, NULL) || is_reference (decl))
+ src = build_fold_indirect_ref (src);
+ }
+ else
+ src = decl;
+ dst = build_fold_indirect_ref (arg);
+ dst = build3 (COMPONENT_REF, TREE_TYPE (f), dst, f, NULL);
+ t = lang_hooks.decls.omp_clause_copy_ctor (c, dst, src);
+ append_to_statement_list (t, &list);
+ break;
+ case OMP_CLAUSE_PRIVATE:
+ if (! OMP_CLAUSE_PRIVATE_OUTER_REF (c))
+ break;
+ decl = OMP_CLAUSE_DECL (c);
+ n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
+ f = (tree) n->value;
+ if (tcctx.cb.decl_map)
+ f = *(tree *) pointer_map_contains (tcctx.cb.decl_map, f);
+ n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl);
+ if (n != NULL)
+ {
+ sf = (tree) n->value;
+ if (tcctx.cb.decl_map)
+ sf = *(tree *) pointer_map_contains (tcctx.cb.decl_map, sf);
+ src = build_fold_indirect_ref (sarg);
+ src = build3 (COMPONENT_REF, TREE_TYPE (sf), src, sf, NULL);
+ if (use_pointer_for_field (decl, NULL))
+ src = build_fold_indirect_ref (src);
+ }
+ else
+ src = decl;
+ dst = build_fold_indirect_ref (arg);
+ dst = build3 (COMPONENT_REF, TREE_TYPE (f), dst, f, NULL);
+ t = build_gimple_modify_stmt (dst, src);
+ append_to_statement_list (t, &list);
+ break;
+ default:
+ break;
+ }
+
+ /* Last pass: handle VLA firstprivates. */
+ if (tcctx.cb.decl_map)
+ for (c = OMP_TASK_CLAUSES (task_stmt); c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
+ {
+ tree ind, ptr, df;
+
+ decl = OMP_CLAUSE_DECL (c);
+ if (!is_variable_sized (decl))
+ continue;
+ n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
+ if (n == NULL)
+ continue;
+ f = (tree) n->value;
+ f = *(tree *) pointer_map_contains (tcctx.cb.decl_map, f);
+ gcc_assert (DECL_HAS_VALUE_EXPR_P (decl));
+ ind = DECL_VALUE_EXPR (decl);
+ gcc_assert (TREE_CODE (ind) == INDIRECT_REF);
+ gcc_assert (DECL_P (TREE_OPERAND (ind, 0)));
+ n = splay_tree_lookup (ctx->sfield_map,
+ (splay_tree_key) TREE_OPERAND (ind, 0));
+ sf = (tree) n->value;
+ sf = *(tree *) pointer_map_contains (tcctx.cb.decl_map, sf);
+ src = build_fold_indirect_ref (sarg);
+ src = build3 (COMPONENT_REF, TREE_TYPE (sf), src, sf, NULL);
+ src = build_fold_indirect_ref (src);
+ dst = build_fold_indirect_ref (arg);
+ dst = build3 (COMPONENT_REF, TREE_TYPE (f), dst, f, NULL);
+ t = lang_hooks.decls.omp_clause_copy_ctor (c, dst, src);
+ append_to_statement_list (t, &list);
+ n = splay_tree_lookup (ctx->field_map,
+ (splay_tree_key) TREE_OPERAND (ind, 0));
+ df = (tree) n->value;
+ df = *(tree *) pointer_map_contains (tcctx.cb.decl_map, df);
+ ptr = build_fold_indirect_ref (arg);
+ ptr = build3 (COMPONENT_REF, TREE_TYPE (df), ptr, df, NULL);
+ t = build_gimple_modify_stmt (ptr, build_fold_addr_expr (dst));
+ append_to_statement_list (t, &list);
+ }
+
+ t = build1 (RETURN_EXPR, void_type_node, NULL);
+ append_to_statement_list (t, &list);
+
+ if (tcctx.cb.decl_map)
+ pointer_map_destroy (tcctx.cb.decl_map);
+ pop_gimplify_context (NULL);
+ BIND_EXPR_BODY (bind) = list;
+ pop_cfun ();
+ current_function_decl = ctx->cb.src_fn;
+}
+
+/* Lower the OpenMP parallel or task directive in *STMT_P. CTX holds context
information for the directive. */
static void
-lower_omp_parallel (tree *stmt_p, omp_context *ctx)
+lower_omp_taskreg (tree *stmt_p, omp_context *ctx)
{
tree clauses, par_bind, par_body, new_body, bind;
tree olist, ilist, par_olist, par_ilist;
stmt = *stmt_p;
- clauses = OMP_PARALLEL_CLAUSES (stmt);
- par_bind = OMP_PARALLEL_BODY (stmt);
+ clauses = OMP_TASKREG_CLAUSES (stmt);
+ par_bind = OMP_TASKREG_BODY (stmt);
par_body = BIND_EXPR_BODY (par_bind);
child_fn = ctx->cb.dst_fn;
- if (!OMP_PARALLEL_COMBINED (stmt))
+ if (TREE_CODE (stmt) == OMP_PARALLEL && !OMP_PARALLEL_COMBINED (stmt))
{
struct walk_stmt_info wi;
int ws_num = 0;
if (ws_num == 1)
OMP_PARALLEL_COMBINED (stmt) = 1;
}
+ if (ctx->srecord_type)
+ create_task_copyfn (stmt, ctx);
push_gimplify_context ();
par_ilist = NULL_TREE;
lower_rec_input_clauses (clauses, &par_ilist, &par_olist, ctx);
lower_omp (&par_body, ctx);
- lower_reduction_clauses (clauses, &par_olist, ctx);
+ if (TREE_CODE (stmt) == OMP_PARALLEL)
+ lower_reduction_clauses (clauses, &par_olist, ctx);
/* Declare all the variables created by mapping and the variables
declared in the scope of the parallel body. */
if (ctx->record_type)
{
- ctx->sender_decl = create_tmp_var (ctx->record_type, ".omp_data_o");
- OMP_PARALLEL_DATA_ARG (stmt) = ctx->sender_decl;
+ ctx->sender_decl
+ = create_tmp_var (ctx->srecord_type ? ctx->srecord_type
+ : ctx->record_type, ".omp_data_o");
+ OMP_TASKREG_DATA_ARG (stmt) = ctx->sender_decl;
}
olist = NULL_TREE;
lower_send_shared_vars (&ilist, &olist, ctx);
/* Once all the expansions are done, sequence all the different
- fragments inside OMP_PARALLEL_BODY. */
+ fragments inside OMP_TASKREG_BODY. */
bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
append_to_statement_list (ilist, &BIND_EXPR_BODY (bind));
maybe_catch_exception (&new_body);
t = make_node (OMP_RETURN);
append_to_statement_list (t, &new_body);
- OMP_PARALLEL_BODY (stmt) = new_body;
+ OMP_TASKREG_BODY (stmt) = new_body;
append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
append_to_statement_list (olist, &BIND_EXPR_BODY (bind));
pop_gimplify_context (NULL_TREE);
}
-
-/* Pass *TP back through the gimplifier within the context determined by WI.
- This handles replacement of DECL_VALUE_EXPR, as well as adjusting the
- flags on ADDR_EXPR. */
-
-static void
-lower_regimplify (tree *tp, struct walk_stmt_info *wi)
-{
- enum gimplify_status gs;
- tree pre = NULL;
-
- if (wi->is_lhs)
- gs = gimplify_expr (tp, &pre, NULL, is_gimple_lvalue, fb_lvalue);
- else if (wi->val_only)
- gs = gimplify_expr (tp, &pre, NULL, is_gimple_val, fb_rvalue);
- else
- gs = gimplify_expr (tp, &pre, NULL, is_gimple_formal_tmp_var, fb_rvalue);
- gcc_assert (gs == GS_ALL_DONE);
-
- if (pre)
- tsi_link_before (&wi->tsi, pre, TSI_SAME_STMT);
-}
-
-/* Copy EXP into a temporary. Insert the initialization statement before TSI. */
+/* Callback for lower_omp_1. Return non-NULL if *tp needs to be
+ regimplified. */
static tree
-init_tmp_var (tree exp, tree_stmt_iterator *tsi)
+lower_omp_2 (tree *tp, int *walk_subtrees, void *data)
{
- tree t, stmt;
-
- t = create_tmp_var (TREE_TYPE (exp), NULL);
- DECL_GIMPLE_REG_P (t) = 1;
- stmt = build_gimple_modify_stmt (t, exp);
- SET_EXPR_LOCUS (stmt, EXPR_LOCUS (tsi_stmt (*tsi)));
- tsi_link_before (tsi, stmt, TSI_SAME_STMT);
-
- return t;
-}
-
-/* Similarly, but copy from the temporary and insert the statement
- after the iterator. */
+ tree t = *tp;
+ omp_context *ctx = data;
-static tree
-save_tmp_var (tree exp, tree_stmt_iterator *tsi)
-{
- tree t, stmt;
+ /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
+ if (TREE_CODE (t) == VAR_DECL
+ && ((ctx && DECL_HAS_VALUE_EXPR_P (t))
+ || (task_shared_vars
+ && bitmap_bit_p (task_shared_vars, DECL_UID (t)))))
+ return t;
- t = create_tmp_var (TREE_TYPE (exp), NULL);
- DECL_GIMPLE_REG_P (t) = 1;
- stmt = build_gimple_modify_stmt (exp, t);
- SET_EXPR_LOCUS (stmt, EXPR_LOCUS (tsi_stmt (*tsi)));
- tsi_link_after (tsi, stmt, TSI_SAME_STMT);
+ /* If a global variable has been privatized, TREE_CONSTANT on
+ ADDR_EXPR might be wrong. */
+ if (ctx && TREE_CODE (t) == ADDR_EXPR)
+ recompute_tree_invariant_for_addr_expr (t);
- return t;
+ *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
+ return NULL_TREE;
}
-/* Callback for walk_stmts. Lower the OpenMP directive pointed by TP. */
-
-static tree
-lower_omp_1 (tree *tp, int *walk_subtrees, void *data)
+static void
+lower_omp_1 (tree *tp, omp_context *ctx, tree_stmt_iterator *tsi)
{
- struct walk_stmt_info *wi = data;
- omp_context *ctx = wi->info;
tree t = *tp;
+ if (!t)
+ return;
+
+ if (EXPR_HAS_LOCATION (t))
+ input_location = EXPR_LOCATION (t);
+
/* If we have issued syntax errors, avoid doing any heavy lifting.
Just replace the OpenMP directives with a NOP to avoid
confusing RTL expansion. */
- if (errorcount && OMP_DIRECTIVE_P (*tp))
+ if (errorcount && OMP_DIRECTIVE_P (t))
{
*tp = build_empty_stmt ();
- return NULL_TREE;
+ return;
}
- *walk_subtrees = 0;
- switch (TREE_CODE (*tp))
+ switch (TREE_CODE (t))
{
+ case STATEMENT_LIST:
+ {
+ tree_stmt_iterator i;
+ for (i = tsi_start (t); !tsi_end_p (i); tsi_next (&i))
+ lower_omp_1 (tsi_stmt_ptr (i), ctx, &i);
+ }
+ break;
+
+ case COND_EXPR:
+ lower_omp_1 (&COND_EXPR_THEN (t), ctx, NULL);
+ lower_omp_1 (&COND_EXPR_ELSE (t), ctx, NULL);
+ if ((ctx || task_shared_vars)
+ && walk_tree (&COND_EXPR_COND (t), lower_omp_2, ctx, NULL))
+ {
+ tree pre = NULL;
+ gimplify_expr (&COND_EXPR_COND (t), &pre, NULL,
+ is_gimple_condexpr, fb_rvalue);
+ if (pre)
+ {
+ if (tsi)
+ tsi_link_before (tsi, pre, TSI_SAME_STMT);
+ else
+ {
+ append_to_statement_list (t, &pre);
+ *tp = pre;
+ }
+ }
+ }
+ break;
+ case CATCH_EXPR:
+ lower_omp_1 (&CATCH_BODY (t), ctx, NULL);
+ break;
+ case EH_FILTER_EXPR:
+ lower_omp_1 (&EH_FILTER_FAILURE (t), ctx, NULL);
+ break;
+ case TRY_CATCH_EXPR:
+ case TRY_FINALLY_EXPR:
+ lower_omp_1 (&TREE_OPERAND (t, 0), ctx, NULL);
+ lower_omp_1 (&TREE_OPERAND (t, 1), ctx, NULL);
+ break;
+ case BIND_EXPR:
+ lower_omp_1 (&BIND_EXPR_BODY (t), ctx, NULL);
+ break;
+ case RETURN_EXPR:
+ lower_omp_1 (&TREE_OPERAND (t, 0), ctx, NULL);
+ break;
+
case OMP_PARALLEL:
+ case OMP_TASK:
ctx = maybe_lookup_ctx (t);
- lower_omp_parallel (tp, ctx);
+ lower_omp_taskreg (tp, ctx);
break;
-
case OMP_FOR:
ctx = maybe_lookup_ctx (t);
gcc_assert (ctx);
lower_omp_for (tp, ctx);
break;
-
case OMP_SECTIONS:
ctx = maybe_lookup_ctx (t);
gcc_assert (ctx);
lower_omp_sections (tp, ctx);
break;
-
case OMP_SINGLE:
ctx = maybe_lookup_ctx (t);
gcc_assert (ctx);
lower_omp_single (tp, ctx);
break;
-
case OMP_MASTER:
ctx = maybe_lookup_ctx (t);
gcc_assert (ctx);
lower_omp_master (tp, ctx);
break;
-
case OMP_ORDERED:
ctx = maybe_lookup_ctx (t);
gcc_assert (ctx);
lower_omp_ordered (tp, ctx);
break;
-
case OMP_CRITICAL:
ctx = maybe_lookup_ctx (t);
gcc_assert (ctx);
lower_omp_critical (tp, ctx);
break;
- case VAR_DECL:
- if (ctx && DECL_HAS_VALUE_EXPR_P (t))
+ default:
+ if ((ctx || task_shared_vars)
+ && walk_tree (tp, lower_omp_2, ctx, NULL))
{
- lower_regimplify (&t, wi);
- if (wi->val_only)
+ /* The gimplifier doesn't gimplify CALL_EXPR_STATIC_CHAIN.
+ Handle that here. */
+ tree call = get_call_expr_in (t);
+ if (call
+ && CALL_EXPR_STATIC_CHAIN (call)
+ && walk_tree (&CALL_EXPR_STATIC_CHAIN (call), lower_omp_2,
+ ctx, NULL))
{
- if (wi->is_lhs)
- t = save_tmp_var (t, &wi->tsi);
- else
- t = init_tmp_var (t, &wi->tsi);
+ tree pre = NULL;
+ gimplify_expr (&CALL_EXPR_STATIC_CHAIN (call), &pre, NULL,
+ is_gimple_val, fb_rvalue);
+ if (pre)
+ {
+ if (tsi)
+ tsi_link_before (tsi, pre, TSI_SAME_STMT);
+ else
+ {
+ append_to_statement_list (t, &pre);
+ lower_omp_1 (&pre, ctx, NULL);
+ *tp = pre;
+ return;
+ }
+ }
}
- *tp = t;
- }
- break;
-
- case ADDR_EXPR:
- if (ctx)
- lower_regimplify (tp, wi);
- break;
-
- case ARRAY_REF:
- case ARRAY_RANGE_REF:
- case REALPART_EXPR:
- case IMAGPART_EXPR:
- case COMPONENT_REF:
- case VIEW_CONVERT_EXPR:
- if (ctx)
- lower_regimplify (tp, wi);
- break;
- case INDIRECT_REF:
- if (ctx)
- {
- wi->is_lhs = false;
- wi->val_only = true;
- lower_regimplify (&TREE_OPERAND (t, 0), wi);
+ if (tsi == NULL)
+ gimplify_stmt (tp);
+ else
+ {
+ tree pre = NULL;
+ gimplify_expr (tp, &pre, NULL, is_gimple_stmt, fb_none);
+ if (pre)
+ tsi_link_before (tsi, pre, TSI_SAME_STMT);
+ }
}
break;
-
- default:
- if (!TYPE_P (t) && !DECL_P (t))
- *walk_subtrees = 1;
- break;
}
-
- return NULL_TREE;
}
static void
lower_omp (tree *stmt_p, omp_context *ctx)
{
- struct walk_stmt_info wi;
-
- memset (&wi, 0, sizeof (wi));
- wi.callback = lower_omp_1;
- wi.info = ctx;
- wi.val_only = true;
- wi.want_locations = true;
-
- walk_stmts (&wi, stmt_p);
+ lower_omp_1 (stmt_p, ctx, NULL);
}
\f
/* Main entry point. */
delete_omp_context);
scan_omp (&DECL_SAVED_TREE (current_function_decl), NULL);
- gcc_assert (parallel_nesting_level == 0);
+ gcc_assert (taskreg_nesting_level == 0);
if (all_contexts->root)
- lower_omp (&DECL_SAVED_TREE (current_function_decl), NULL);
+ {
+ if (task_shared_vars)
+ push_gimplify_context ();
+ lower_omp (&DECL_SAVED_TREE (current_function_decl), NULL);
+ if (task_shared_vars)
+ pop_gimplify_context (NULL);
+ }
if (all_contexts)
{
splay_tree_delete (all_contexts);
all_contexts = NULL;
}
+ BITMAP_FREE (task_shared_vars);
return 0;
}
return flag_openmp != 0;
}
-struct tree_opt_pass pass_lower_omp =
+struct gimple_opt_pass pass_lower_omp =
{
+ {
+ GIMPLE_PASS,
"omplower", /* name */
gate_lower_omp, /* gate */
execute_lower_omp, /* execute */
PROP_gimple_lomp, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
- TODO_dump_func, /* todo_flags_finish */
- 0 /* letter */
+ TODO_dump_func /* todo_flags_finish */
+ }
};
\f
/* The following is a utility to diagnose OpenMP structured block violations.
return false;
/* Try to avoid confusing the user by producing and error message
- with correct "exit" or "enter" verbage. We prefer "exit"
+ with correct "exit" or "enter" verbiage. We prefer "exit"
unless we can show that LABEL_CTX is nested within BRANCH_CTX. */
if (branch_ctx == NULL)
exit_p = false;
tree context = (tree) wi->info;
tree inner_context;
tree t = *tp;
+ int i;
*walk_subtrees = 0;
switch (TREE_CODE (t))
{
case OMP_PARALLEL:
+ case OMP_TASK:
case OMP_SECTIONS:
case OMP_SINGLE:
walk_tree (&OMP_CLAUSES (t), diagnose_sb_1, wi, NULL);
walk_tree (&OMP_FOR_CLAUSES (t), diagnose_sb_1, wi, NULL);
inner_context = tree_cons (NULL, t, context);
wi->info = inner_context;
- walk_tree (&OMP_FOR_INIT (t), diagnose_sb_1, wi, NULL);
- walk_tree (&OMP_FOR_COND (t), diagnose_sb_1, wi, NULL);
- walk_tree (&OMP_FOR_INCR (t), diagnose_sb_1, wi, NULL);
+ for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (t)); i++)
+ {
+ walk_tree (&TREE_VEC_ELT (OMP_FOR_INIT (t), i), diagnose_sb_1,
+ wi, NULL);
+ walk_tree (&TREE_VEC_ELT (OMP_FOR_COND (t), i), diagnose_sb_1,
+ wi, NULL);
+ walk_tree (&TREE_VEC_ELT (OMP_FOR_INCR (t), i), diagnose_sb_1,
+ wi, NULL);
+ }
walk_stmts (wi, &OMP_FOR_PRE_BODY (t));
walk_stmts (wi, &OMP_FOR_BODY (t));
wi->info = context;
tree context = (tree) wi->info;
splay_tree_node n;
tree t = *tp;
+ int i;
*walk_subtrees = 0;
switch (TREE_CODE (t))
{
case OMP_PARALLEL:
+ case OMP_TASK:
case OMP_SECTIONS:
case OMP_SINGLE:
walk_tree (&OMP_CLAUSES (t), diagnose_sb_2, wi, NULL);
case OMP_FOR:
walk_tree (&OMP_FOR_CLAUSES (t), diagnose_sb_2, wi, NULL);
wi->info = t;
- walk_tree (&OMP_FOR_INIT (t), diagnose_sb_2, wi, NULL);
- walk_tree (&OMP_FOR_COND (t), diagnose_sb_2, wi, NULL);
- walk_tree (&OMP_FOR_INCR (t), diagnose_sb_2, wi, NULL);
+ for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (t)); i++)
+ {
+ walk_tree (&TREE_VEC_ELT (OMP_FOR_INIT (t), i), diagnose_sb_2,
+ wi, NULL);
+ walk_tree (&TREE_VEC_ELT (OMP_FOR_COND (t), i), diagnose_sb_2,
+ wi, NULL);
+ walk_tree (&TREE_VEC_ELT (OMP_FOR_INCR (t), i), diagnose_sb_2,
+ wi, NULL);
+ }
walk_stmts (wi, &OMP_FOR_PRE_BODY (t));
walk_stmts (wi, &OMP_FOR_BODY (t));
wi->info = context;