X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Fomp-low.c;h=2e1a1b8aa8df46199af8a93a3e0f2cfa9d57b5bd;hb=ba587cde841db751f6f9c307798b538c761f54b6;hp=c6d186bee71507cd81867541436ff9622b72320b;hpb=9438af576b2c64026be187b075868da20d53a545;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/omp-low.c b/gcc/omp-low.c index c6d186bee71..2e1a1b8aa8d 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -3,13 +3,13 @@ marshalling to implement data sharing and copying clauses. Contributed by Diego Novillo - Copyright (C) 2005, 2006 Free Software Foundation, Inc. + Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later +Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY @@ -18,9 +18,8 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING. If not, write to the Free -Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA -02110-1301, USA. */ +along with GCC; see the file COPYING3. If not see +. */ #include "config.h" #include "system.h" @@ -41,7 +40,9 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "tree-pass.h" #include "ggc.h" #include "except.h" - +#include "splay-tree.h" +#include "optabs.h" +#include "cfgloop.h" /* Lowering of OpenMP parallel and workshare constructs proceeds in two phases. The first phase scans the function looking for OMP statements @@ -117,7 +118,7 @@ static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *); /* Find an OpenMP clause of type KIND within CLAUSES. */ -static tree +tree find_omp_clause (tree clauses, enum tree_code kind) { for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses)) @@ -151,21 +152,22 @@ is_combined_parallel (struct omp_region *region) static void extract_omp_for_data (tree for_stmt, struct omp_for_data *fd) { - tree t; + tree t, var; fd->for_stmt = for_stmt; fd->pre = NULL; t = OMP_FOR_INIT (for_stmt); - gcc_assert (TREE_CODE (t) == MODIFY_EXPR); - fd->v = TREE_OPERAND (t, 0); - gcc_assert (DECL_P (fd->v)); + gcc_assert (TREE_CODE (t) == GIMPLE_MODIFY_STMT); + fd->v = GIMPLE_STMT_OPERAND (t, 0); + gcc_assert (SSA_VAR_P (fd->v)); gcc_assert (TREE_CODE (TREE_TYPE (fd->v)) == INTEGER_TYPE); - fd->n1 = TREE_OPERAND (t, 1); + var = TREE_CODE (fd->v) == SSA_NAME ? SSA_NAME_VAR (fd->v) : fd->v; + fd->n1 = GIMPLE_STMT_OPERAND (t, 1); t = OMP_FOR_COND (for_stmt); fd->cond_code = TREE_CODE (t); - gcc_assert (TREE_OPERAND (t, 0) == fd->v); + gcc_assert (TREE_OPERAND (t, 0) == var); fd->n2 = TREE_OPERAND (t, 1); switch (fd->cond_code) { @@ -187,10 +189,10 @@ extract_omp_for_data (tree for_stmt, struct omp_for_data *fd) } t = OMP_FOR_INCR (fd->for_stmt); - gcc_assert (TREE_CODE (t) == MODIFY_EXPR); - gcc_assert (TREE_OPERAND (t, 0) == fd->v); - t = TREE_OPERAND (t, 1); - gcc_assert (TREE_OPERAND (t, 0) == fd->v); + gcc_assert (TREE_CODE (t) == GIMPLE_MODIFY_STMT); + gcc_assert (GIMPLE_STMT_OPERAND (t, 0) == var); + t = GIMPLE_STMT_OPERAND (t, 1); + gcc_assert (TREE_OPERAND (t, 0) == var); switch (TREE_CODE (t)) { case PLUS_EXPR: @@ -347,8 +349,11 @@ get_ws_args_for (tree ws_stmt) } else if (TREE_CODE (ws_stmt) == OMP_SECTIONS) { - basic_block bb = bb_for_stmt (ws_stmt); - t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs)); + /* Number of sections is equal to the number of edges from the + OMP_SECTIONS_SWITCH statement, except for the one to the exit + of the sections region. */ + basic_block bb = single_succ (bb_for_stmt (ws_stmt)); + t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1); t = tree_cons (NULL, t, NULL); return t; } @@ -365,7 +370,9 @@ determine_parallel_type (struct omp_region *region) basic_block par_entry_bb, par_exit_bb; basic_block ws_entry_bb, ws_exit_bb; - if (region == NULL || region->inner == NULL) + if (region == NULL || region->inner == NULL + || region->exit == NULL || region->inner->exit == NULL + || region->inner->cont == NULL) return; /* We only support parallel+for and parallel+sections. */ @@ -383,9 +390,12 @@ determine_parallel_type (struct omp_region *region) if (single_succ (par_entry_bb) == ws_entry_bb && single_succ (ws_exit_bb) == par_exit_bb - && workshare_safe_to_combine_p (par_entry_bb, ws_entry_bb)) + && workshare_safe_to_combine_p (par_entry_bb, ws_entry_bb) + && (OMP_PARALLEL_COMBINED (last_stmt (par_entry_bb)) + || (last_and_only_stmt (ws_entry_bb) + && last_and_only_stmt (par_exit_bb)))) { - tree ws_stmt = last_stmt (region->inner->entry); + tree ws_stmt = last_stmt (ws_entry_bb); if (region->inner->type == OMP_FOR) { @@ -420,7 +430,7 @@ determine_parallel_type (struct omp_region *region) /* Return true if EXPR is variable sized. */ static inline bool -is_variable_sized (tree expr) +is_variable_sized (const_tree expr) { return !TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (expr))); } @@ -440,17 +450,17 @@ is_reference (tree decl) static inline tree lookup_decl (tree var, omp_context *ctx) { - splay_tree_node n; - n = splay_tree_lookup (ctx->cb.decl_map, (splay_tree_key) var); - return (tree) n->value; + tree *n; + n = (tree *) pointer_map_contains (ctx->cb.decl_map, var); + return *n; } static inline tree maybe_lookup_decl (tree var, omp_context *ctx) { - splay_tree_node n; - n = splay_tree_lookup (ctx->cb.decl_map, (splay_tree_key) var); - return n ? (tree) n->value : NULL_TREE; + tree *n; + n = (tree *) pointer_map_contains (ctx->cb.decl_map, var); + return n ? *n : NULL_TREE; } static inline tree @@ -473,12 +483,12 @@ maybe_lookup_field (tree var, omp_context *ctx) if DECL is to be shared. */ static bool -use_pointer_for_field (tree decl, bool shared_p) +use_pointer_for_field (const_tree decl, bool shared_p) { if (AGGREGATE_TYPE_P (TREE_TYPE (decl))) return true; - /* We can only use copy-in/copy-out semantics for shared varibles + /* We can only use copy-in/copy-out semantics for shared variables when we know the value is not accessible from an outer scope. */ if (shared_p) { @@ -505,21 +515,35 @@ use_pointer_for_field (tree decl, bool shared_p) return false; } -/* Construct a new automatic decl similar to VAR. */ +/* Create a new VAR_DECL and copy information from VAR to it. */ -static tree -omp_copy_decl_2 (tree var, tree name, tree type, omp_context *ctx) +tree +copy_var_decl (tree var, tree name, tree type) { tree copy = build_decl (VAR_DECL, name, type); TREE_ADDRESSABLE (copy) = TREE_ADDRESSABLE (var); - DECL_COMPLEX_GIMPLE_REG_P (copy) = DECL_COMPLEX_GIMPLE_REG_P (var); + TREE_THIS_VOLATILE (copy) = TREE_THIS_VOLATILE (var); + DECL_GIMPLE_REG_P (copy) = DECL_GIMPLE_REG_P (var); + DECL_NO_TBAA_P (copy) = DECL_NO_TBAA_P (var); DECL_ARTIFICIAL (copy) = DECL_ARTIFICIAL (var); DECL_IGNORED_P (copy) = DECL_IGNORED_P (var); + DECL_CONTEXT (copy) = DECL_CONTEXT (var); + DECL_SOURCE_LOCATION (copy) = DECL_SOURCE_LOCATION (var); TREE_USED (copy) = 1; - DECL_CONTEXT (copy) = current_function_decl; DECL_SEEN_IN_BIND_EXPR_P (copy) = 1; + return copy; +} + +/* Construct a new automatic decl similar to VAR. */ + +static tree +omp_copy_decl_2 (tree var, tree name, tree type, omp_context *ctx) +{ + tree copy = copy_var_decl (var, name, type); + + DECL_CONTEXT (copy) = current_function_decl; TREE_CHAIN (copy) = ctx->block_vars; ctx->block_vars = copy; @@ -843,7 +867,7 @@ new_omp_context (tree stmt, omp_context *outer_ctx) ctx->depth = 1; } - ctx->cb.decl_map = splay_tree_new (splay_tree_compare_pointers, 0, 0); + ctx->cb.decl_map = pointer_map_create (); return ctx; } @@ -856,7 +880,7 @@ delete_omp_context (splay_tree_value value) { omp_context *ctx = (omp_context *) value; - splay_tree_delete (ctx->cb.decl_map); + pointer_map_destroy (ctx->cb.decl_map); if (ctx->field_map) splay_tree_delete (ctx->field_map); @@ -1135,10 +1159,10 @@ create_omp_child_function (omp_context *ctx) /* Allocate memory for the function structure. The call to allocate_struct_function clobbers CFUN, so we need to restore it afterward. */ - allocate_struct_function (decl); + push_struct_function (decl); DECL_SOURCE_LOCATION (decl) = EXPR_LOCATION (ctx->stmt); cfun->function_end_locus = EXPR_LOCATION (ctx->stmt); - cfun = ctx->cb.src_cfun; + pop_cfun (); } @@ -1245,6 +1269,84 @@ scan_omp_single (tree *stmt_p, omp_context *outer_ctx) } +/* Check OpenMP nesting restrictions. */ +static void +check_omp_nesting_restrictions (tree t, omp_context *ctx) +{ + switch (TREE_CODE (t)) + { + case OMP_FOR: + case OMP_SECTIONS: + case OMP_SINGLE: + for (; ctx != NULL; ctx = ctx->outer) + switch (TREE_CODE (ctx->stmt)) + { + case OMP_FOR: + case OMP_SECTIONS: + case OMP_SINGLE: + case OMP_ORDERED: + case OMP_MASTER: + warning (0, "work-sharing region may not be closely nested inside " + "of work-sharing, critical, ordered or master region"); + return; + case OMP_PARALLEL: + return; + default: + break; + } + break; + case OMP_MASTER: + for (; ctx != NULL; ctx = ctx->outer) + switch (TREE_CODE (ctx->stmt)) + { + case OMP_FOR: + case OMP_SECTIONS: + case OMP_SINGLE: + warning (0, "master region may not be closely nested inside " + "of work-sharing region"); + return; + case OMP_PARALLEL: + return; + default: + break; + } + break; + case OMP_ORDERED: + for (; ctx != NULL; ctx = ctx->outer) + switch (TREE_CODE (ctx->stmt)) + { + case OMP_CRITICAL: + warning (0, "ordered region may not be closely nested inside " + "of critical region"); + return; + case OMP_FOR: + if (find_omp_clause (OMP_CLAUSES (ctx->stmt), + OMP_CLAUSE_ORDERED) == NULL) + warning (0, "ordered region must be closely nested inside " + "a loop region with an ordered clause"); + return; + case OMP_PARALLEL: + return; + default: + break; + } + break; + case OMP_CRITICAL: + for (; ctx != NULL; ctx = ctx->outer) + if (TREE_CODE (ctx->stmt) == OMP_CRITICAL + && OMP_CRITICAL_NAME (t) == OMP_CRITICAL_NAME (ctx->stmt)) + { + warning (0, "critical region may not be nested inside a critical " + "region with the same name"); + return; + } + break; + default: + break; + } +} + + /* Callback for walk_stmts used to scan for OpenMP directives at TP. */ static tree @@ -1257,6 +1359,10 @@ scan_omp_1 (tree *tp, int *walk_subtrees, void *data) if (EXPR_HAS_LOCATION (t)) input_location = EXPR_LOCATION (t); + /* Check the OpenMP nesting restrictions. */ + if (OMP_DIRECTIVE_P (t) && ctx != NULL) + check_omp_nesting_restrictions (t, ctx); + *walk_subtrees = 0; switch (TREE_CODE (t)) { @@ -1341,14 +1447,10 @@ scan_omp (tree *stmt_p, omp_context *ctx) /* Build a call to GOMP_barrier. */ -static void -build_omp_barrier (tree *stmt_list) +static tree +build_omp_barrier (void) { - tree t; - - t = built_in_decls[BUILT_IN_GOMP_BARRIER]; - t = build_function_call_expr (t, NULL); - gimplify_and_add (t, stmt_list); + return build_call_expr (built_in_decls[BUILT_IN_GOMP_BARRIER], 0); } /* If a context was created for STMT when it was scanned, return it. */ @@ -1417,14 +1519,12 @@ lookup_decl_in_outer_ctx (tree decl, omp_context *ctx) tree t; omp_context *up; - gcc_assert (ctx->is_nested); - for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer) t = maybe_lookup_decl (decl, up); - gcc_assert (t); + gcc_assert (!ctx->is_nested || t || is_global_var (decl)); - return t; + return t ? t : decl; } @@ -1437,9 +1537,8 @@ maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx) tree t = NULL; omp_context *up; - if (ctx->is_nested) - for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer) - t = maybe_lookup_decl (decl, up); + for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer) + t = maybe_lookup_decl (decl, up); return t ? t : decl; } @@ -1518,10 +1617,10 @@ omp_reduction_init (tree clause, tree type) static void lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, - omp_context *ctx) + omp_context *ctx) { tree_stmt_iterator diter; - tree c, dtor, copyin_seq, x, args, ptr; + tree c, dtor, copyin_seq, x, ptr; bool copyin_by_ref = false; bool lastprivate_firstprivate = false; int pass; @@ -1593,11 +1692,9 @@ lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, gcc_assert (DECL_P (ptr)); x = TYPE_SIZE_UNIT (TREE_TYPE (new_var)); - args = tree_cons (NULL, x, NULL); - x = built_in_decls[BUILT_IN_ALLOCA]; - x = build_function_call_expr (x, args); + x = build_call_expr (built_in_decls[BUILT_IN_ALLOCA], 1, x); x = fold_convert (TREE_TYPE (ptr), x); - x = build2 (MODIFY_EXPR, void_type_node, ptr, x); + x = build_gimple_modify_stmt (ptr, x); gimplify_and_add (x, ilist); } else if (is_reference (var)) @@ -1627,13 +1724,11 @@ lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, } else { - args = tree_cons (NULL, x, NULL); - x = built_in_decls[BUILT_IN_ALLOCA]; - x = build_function_call_expr (x, args); + x = build_call_expr (built_in_decls[BUILT_IN_ALLOCA], 1, x); x = fold_convert (TREE_TYPE (new_var), x); } - x = build2 (MODIFY_EXPR, void_type_node, new_var, x); + x = build_gimple_modify_stmt (new_var, x); gimplify_and_add (x, ilist); new_var = build_fold_indirect_ref (new_var); @@ -1716,7 +1811,7 @@ lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, { x = omp_reduction_init (c, TREE_TYPE (new_var)); gcc_assert (TREE_CODE (TREE_TYPE (new_var)) != ARRAY_TYPE); - x = build2 (MODIFY_EXPR, void_type_node, new_var, x); + x = build_gimple_modify_stmt (new_var, x); gimplify_and_add (x, ilist); } break; @@ -1732,8 +1827,7 @@ lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, but it certainly is to C++ operator=. */ if (copyin_seq) { - x = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM]; - x = build_function_call_expr (x, NULL); + x = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM], 0); x = build2 (NE_EXPR, boolean_type_node, x, build_int_cst (TREE_TYPE (x), 0)); x = build3 (COND_EXPR, void_type_node, x, copyin_seq, NULL); @@ -1746,7 +1840,7 @@ lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, lastprivate clauses we need to ensure the lastprivate copying happens after firstprivate copying in all threads. */ if (copyin_by_ref || lastprivate_firstprivate) - build_omp_barrier (ilist); + gimplify_and_add (build_omp_barrier (), ilist); } @@ -1881,19 +1975,17 @@ lower_reduction_clauses (tree clauses, tree *stmt_list, omp_context *ctx) { x = build2 (code, TREE_TYPE (ref), ref, new_var); ref = build_outer_var_ref (var, ctx); - x = build2 (MODIFY_EXPR, void_type_node, ref, x); + x = build_gimple_modify_stmt (ref, x); append_to_statement_list (x, &sub_list); } } - x = built_in_decls[BUILT_IN_GOMP_ATOMIC_START]; - x = build_function_call_expr (x, NULL); + x = build_call_expr (built_in_decls[BUILT_IN_GOMP_ATOMIC_START], 0); gimplify_and_add (x, stmt_list); gimplify_and_add (sub_list, stmt_list); - x = built_in_decls[BUILT_IN_GOMP_ATOMIC_END]; - x = build_function_call_expr (x, NULL); + x = build_call_expr (built_in_decls[BUILT_IN_GOMP_ATOMIC_END], 0); gimplify_and_add (x, stmt_list); } @@ -1918,9 +2010,9 @@ lower_copyprivate_clauses (tree clauses, tree *slist, tree *rlist, by_ref = use_pointer_for_field (var, false); ref = build_sender_ref (var, ctx); - x = (ctx->is_nested) ? lookup_decl_in_outer_ctx (var, ctx) : var; + x = lookup_decl_in_outer_ctx (var, ctx); x = by_ref ? build_fold_addr_expr (x) : x; - x = build2 (MODIFY_EXPR, void_type_node, ref, x); + x = build_gimple_modify_stmt (ref, x); gimplify_and_add (x, slist); ref = build_receiver_ref (var, by_ref, ctx); @@ -1959,9 +2051,8 @@ lower_send_clauses (tree clauses, tree *ilist, tree *olist, omp_context *ctx) continue; } - var = val = OMP_CLAUSE_DECL (c); - if (ctx->is_nested) - var = lookup_decl_in_outer_ctx (val, ctx); + val = OMP_CLAUSE_DECL (c); + var = lookup_decl_in_outer_ctx (val, ctx); if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYIN && is_global_var (var)) @@ -2001,14 +2092,14 @@ lower_send_clauses (tree clauses, tree *ilist, tree *olist, omp_context *ctx) { ref = build_sender_ref (val, ctx); x = by_ref ? build_fold_addr_expr (var) : var; - x = build2 (MODIFY_EXPR, void_type_node, ref, x); + x = build_gimple_modify_stmt (ref, x); gimplify_and_add (x, ilist); } if (do_out) { ref = build_sender_ref (val, ctx); - x = build2 (MODIFY_EXPR, void_type_node, var, ref); + x = build_gimple_modify_stmt (var, ref); gimplify_and_add (x, olist); } } @@ -2033,29 +2124,26 @@ lower_send_shared_vars (tree *ilist, tree *olist, omp_context *ctx) if (!nvar || !DECL_HAS_VALUE_EXPR_P (nvar)) continue; - var = ovar; - /* If CTX is a nested parallel directive. Find the immediately enclosing parallel or workshare construct that contains a mapping for OVAR. */ - if (ctx->is_nested) - var = lookup_decl_in_outer_ctx (ovar, ctx); + var = lookup_decl_in_outer_ctx (ovar, ctx); if (use_pointer_for_field (ovar, true)) { x = build_sender_ref (ovar, ctx); var = build_fold_addr_expr (var); - x = build2 (MODIFY_EXPR, void_type_node, x, var); + x = build_gimple_modify_stmt (x, var); gimplify_and_add (x, ilist); } else { x = build_sender_ref (ovar, ctx); - x = build2 (MODIFY_EXPR, void_type_node, x, var); + x = build_gimple_modify_stmt (x, var); gimplify_and_add (x, ilist); x = build_sender_ref (ovar, ctx); - x = build2 (MODIFY_EXPR, void_type_node, var, x); + x = build_gimple_modify_stmt (var, x); gimplify_and_add (x, olist); } } @@ -2072,12 +2160,11 @@ static void expand_parallel_call (struct omp_region *region, basic_block bb, tree entry_stmt, tree ws_args) { - tree t, args, val, cond, c, list, clauses; + tree t, t1, t2, val, cond, c, clauses; block_stmt_iterator si; int start_ix; clauses = OMP_PARALLEL_CLAUSES (entry_stmt); - push_gimplify_context (); /* Determine what flavor of GOMP_parallel_start we will be emitting. */ @@ -2123,15 +2210,28 @@ expand_parallel_call (struct omp_region *region, basic_block bb, cond = gimple_boolify (cond); if (integer_zerop (val)) - val = build2 (EQ_EXPR, unsigned_type_node, cond, - build_int_cst (TREE_TYPE (cond), 0)); + val = fold_build2 (EQ_EXPR, unsigned_type_node, cond, + build_int_cst (TREE_TYPE (cond), 0)); else { basic_block cond_bb, then_bb, else_bb; - edge e; - tree t, then_lab, else_lab, tmp; + edge e, e_then, e_else; + tree t, tmp_then, tmp_else, tmp_join, tmp_var; + + tmp_var = create_tmp_var (TREE_TYPE (val), NULL); + if (gimple_in_ssa_p (cfun)) + { + tmp_then = make_ssa_name (tmp_var, NULL_TREE); + tmp_else = make_ssa_name (tmp_var, NULL_TREE); + tmp_join = make_ssa_name (tmp_var, NULL_TREE); + } + else + { + tmp_then = tmp_var; + tmp_else = tmp_var; + tmp_join = tmp_var; + } - tmp = create_tmp_var (TREE_TYPE (val), NULL); e = split_block (bb, NULL); cond_bb = e->src; bb = e->dest; @@ -2139,79 +2239,82 @@ expand_parallel_call (struct omp_region *region, basic_block bb, then_bb = create_empty_bb (cond_bb); else_bb = create_empty_bb (then_bb); - then_lab = create_artificial_label (); - else_lab = create_artificial_label (); + set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); + set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); t = build3 (COND_EXPR, void_type_node, - cond, - build_and_jump (&then_lab), - build_and_jump (&else_lab)); + cond, NULL_TREE, NULL_TREE); si = bsi_start (cond_bb); bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); si = bsi_start (then_bb); - t = build1 (LABEL_EXPR, void_type_node, then_lab); - bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); - t = build2 (MODIFY_EXPR, void_type_node, tmp, val); + t = build_gimple_modify_stmt (tmp_then, val); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (tmp_then) = t; bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); si = bsi_start (else_bb); - t = build1 (LABEL_EXPR, void_type_node, else_lab); - bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); - t = build2 (MODIFY_EXPR, void_type_node, tmp, - build_int_cst (unsigned_type_node, 1)); + t = build_gimple_modify_stmt (tmp_else, + build_int_cst (unsigned_type_node, 1)); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (tmp_else) = t; bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); - make_edge (then_bb, bb, EDGE_FALLTHRU); - make_edge (else_bb, bb, EDGE_FALLTHRU); + e_then = make_edge (then_bb, bb, EDGE_FALLTHRU); + e_else = make_edge (else_bb, bb, EDGE_FALLTHRU); + + if (gimple_in_ssa_p (cfun)) + { + tree phi = create_phi_node (tmp_join, bb); + SSA_NAME_DEF_STMT (tmp_join) = phi; + add_phi_arg (phi, tmp_then, e_then); + add_phi_arg (phi, tmp_else, e_else); + } - val = tmp; + val = tmp_join; } - list = NULL_TREE; - val = get_formal_tmp_var (val, &list); si = bsi_start (bb); - bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + val = force_gimple_operand_bsi (&si, val, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); } - list = NULL_TREE; - args = tree_cons (NULL, val, NULL); + si = bsi_last (bb); t = OMP_PARALLEL_DATA_ARG (entry_stmt); if (t == NULL) - t = null_pointer_node; + t1 = null_pointer_node; else - t = build_fold_addr_expr (t); - args = tree_cons (NULL, t, args); - t = build_fold_addr_expr (OMP_PARALLEL_FN (entry_stmt)); - args = tree_cons (NULL, t, args); + t1 = build_fold_addr_expr (t); + t2 = build_fold_addr_expr (OMP_PARALLEL_FN (entry_stmt)); if (ws_args) - args = chainon (args, ws_args); + { + tree args = tree_cons (NULL, t2, + tree_cons (NULL, t1, + tree_cons (NULL, val, ws_args))); + t = build_function_call_expr (built_in_decls[start_ix], args); + } + else + t = build_call_expr (built_in_decls[start_ix], 3, t2, t1, val); - t = built_in_decls[start_ix]; - t = build_function_call_expr (t, args); - gimplify_and_add (t, &list); + force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); t = OMP_PARALLEL_DATA_ARG (entry_stmt); if (t == NULL) t = null_pointer_node; else t = build_fold_addr_expr (t); - args = tree_cons (NULL, t, NULL); - t = build_function_call_expr (OMP_PARALLEL_FN (entry_stmt), args); - gimplify_and_add (t, &list); - - t = built_in_decls[BUILT_IN_GOMP_PARALLEL_END]; - t = build_function_call_expr (t, NULL); - gimplify_and_add (t, &list); - - si = bsi_last (bb); - bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + t = build_call_expr (OMP_PARALLEL_FN (entry_stmt), 1, t); + force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); - pop_gimplify_context (NULL_TREE); + t = build_call_expr (built_in_decls[BUILT_IN_GOMP_PARALLEL_END], 0); + force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); } @@ -2230,10 +2333,7 @@ maybe_catch_exception (tree *stmt_p) if (lang_protect_cleanup_actions) t = lang_protect_cleanup_actions (); else - { - t = built_in_decls[BUILT_IN_TRAP]; - t = build_function_call_expr (t, NULL); - } + t = build_call_expr (built_in_decls[BUILT_IN_TRAP], 0); f = build2 (EH_FILTER_EXPR, void_type_node, NULL, NULL); EH_FILTER_MUST_NOT_THROW (f) = 1; gimplify_and_add (t, &EH_FILTER_FAILURE (f)); @@ -2327,13 +2427,68 @@ remove_exit_barriers (struct omp_region *region) } } +/* Optimize omp_get_thread_num () and omp_get_num_threads () + calls. These can't be declared as const functions, but + within one parallel body they are constant, so they can be + transformed there into __builtin_omp_get_{thread_num,num_threads} () + which are declared const. */ + +static void +optimize_omp_library_calls (void) +{ + basic_block bb; + block_stmt_iterator bsi; + tree thr_num_id + = DECL_ASSEMBLER_NAME (built_in_decls [BUILT_IN_OMP_GET_THREAD_NUM]); + tree num_thr_id + = DECL_ASSEMBLER_NAME (built_in_decls [BUILT_IN_OMP_GET_NUM_THREADS]); + + FOR_EACH_BB (bb) + for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) + { + tree stmt = bsi_stmt (bsi); + tree call = get_call_expr_in (stmt); + tree decl; + + if (call + && (decl = get_callee_fndecl (call)) + && DECL_EXTERNAL (decl) + && TREE_PUBLIC (decl) + && DECL_INITIAL (decl) == NULL) + { + tree built_in; + + if (DECL_NAME (decl) == thr_num_id) + built_in = built_in_decls [BUILT_IN_OMP_GET_THREAD_NUM]; + else if (DECL_NAME (decl) == num_thr_id) + built_in = built_in_decls [BUILT_IN_OMP_GET_NUM_THREADS]; + else + continue; + + if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in) + || call_expr_nargs (call) != 0) + continue; + + if (flag_exceptions && !TREE_NOTHROW (decl)) + continue; + + if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE + || TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (decl))) + != TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (built_in)))) + continue; + + CALL_EXPR_FN (call) = build_fold_addr_expr (built_in); + } + } +} + /* Expand the OpenMP parallel directive starting at REGION. */ static void expand_omp_parallel (struct omp_region *region) { basic_block entry_bb, exit_bb, new_bb; - struct function *child_cfun, *saved_cfun; + struct function *child_cfun; tree child_fn, block, t, ws_args; block_stmt_iterator si; tree entry_stmt; @@ -2342,7 +2497,9 @@ expand_omp_parallel (struct omp_region *region) entry_stmt = last_stmt (region->entry); child_fn = OMP_PARALLEL_FN (entry_stmt); child_cfun = DECL_STRUCT_FUNCTION (child_fn); - saved_cfun = cfun; + /* If this function has been already instrumented, make sure + the child function isn't instrumented again. */ + child_cfun->after_tree_profile = cfun->after_tree_profile; entry_bb = region->entry; exit_bb = region->exit; @@ -2361,20 +2518,24 @@ expand_omp_parallel (struct omp_region *region) block_stmt_iterator si; entry_succ_e = single_succ_edge (entry_bb); - exit_succ_e = single_succ_edge (exit_bb); si = bsi_last (entry_bb); gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_PARALLEL); bsi_remove (&si, true); new_bb = entry_bb; - remove_edge (entry_succ_e); - make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU); + if (exit_bb) + { + exit_succ_e = single_succ_edge (exit_bb); + make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU); + } + remove_edge_and_dominated_blocks (entry_succ_e); } else { /* If the parallel region needs data sent from the parent - function, then the very first statement of the parallel body + function, then the very first statement (except possible + tree profile counter updates) of the parallel body is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since &.OMP_DATA_O is passed as an argument to the child function, we need to replace it with the argument as seen by the child @@ -2388,32 +2549,58 @@ expand_omp_parallel (struct omp_region *region) if (OMP_PARALLEL_DATA_ARG (entry_stmt)) { basic_block entry_succ_bb = single_succ (entry_bb); - block_stmt_iterator si = bsi_start (entry_succ_bb); - tree stmt; + block_stmt_iterator si; + tree parcopy_stmt = NULL_TREE, arg, narg; + + for (si = bsi_start (entry_succ_bb); ; bsi_next (&si)) + { + tree stmt, arg; + + gcc_assert (!bsi_end_p (si)); + stmt = bsi_stmt (si); + if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT) + continue; - gcc_assert (!bsi_end_p (si)); + arg = GIMPLE_STMT_OPERAND (stmt, 1); + STRIP_NOPS (arg); + if (TREE_CODE (arg) == ADDR_EXPR + && TREE_OPERAND (arg, 0) + == OMP_PARALLEL_DATA_ARG (entry_stmt)) + { + parcopy_stmt = stmt; + break; + } + } - stmt = bsi_stmt (si); - gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR - && TREE_CODE (TREE_OPERAND (stmt, 1)) == ADDR_EXPR - && TREE_OPERAND (TREE_OPERAND (stmt, 1), 0) - == OMP_PARALLEL_DATA_ARG (entry_stmt)); + gcc_assert (parcopy_stmt != NULL_TREE); + arg = DECL_ARGUMENTS (child_fn); - if (TREE_OPERAND (stmt, 0) == DECL_ARGUMENTS (child_fn)) - bsi_remove (&si, true); + if (!gimple_in_ssa_p (cfun)) + { + if (GIMPLE_STMT_OPERAND (parcopy_stmt, 0) == arg) + bsi_remove (&si, true); + else + GIMPLE_STMT_OPERAND (parcopy_stmt, 1) = arg; + } else - TREE_OPERAND (stmt, 1) = DECL_ARGUMENTS (child_fn); + { + /* If we are in ssa form, we must load the value from the default + definition of the argument. That should not be defined now, + since the argument is not used uninitialized. */ + gcc_assert (gimple_default_def (cfun, arg) == NULL); + narg = make_ssa_name (arg, build_empty_stmt ()); + set_default_def (arg, narg); + GIMPLE_STMT_OPERAND (parcopy_stmt, 1) = narg; + update_stmt (parcopy_stmt); + } } /* Declare local variables needed in CHILD_CFUN. */ block = DECL_INITIAL (child_fn); BLOCK_VARS (block) = list2chain (child_cfun->unexpanded_var_list); - DECL_SAVED_TREE (child_fn) = single_succ (entry_bb)->stmt_list; - - /* Reset DECL_CONTEXT on locals and function arguments. */ - for (t = BLOCK_VARS (block); t; t = TREE_CHAIN (t)) - DECL_CONTEXT (t) = child_fn; + DECL_SAVED_TREE (child_fn) = bb_stmt_list (single_succ (entry_bb)); + /* Reset DECL_CONTEXT on function arguments. */ for (t = DECL_ARGUMENTS (child_fn); t; t = TREE_CHAIN (t)) DECL_CONTEXT (t) = child_fn; @@ -2427,15 +2614,6 @@ expand_omp_parallel (struct omp_region *region) entry_bb = e->dest; single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; - /* Move the parallel region into CHILD_CFUN. We need to reset - dominance information because the expansion of the inner - regions has invalidated it. */ - free_dominance_info (CDI_DOMINATORS); - new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb); - if (exit_bb) - single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; - cgraph_add_new_function (child_fn); - /* Convert OMP_RETURN into a RETURN_EXPR. */ if (exit_bb) { @@ -2443,13 +2621,59 @@ expand_omp_parallel (struct omp_region *region) gcc_assert (!bsi_end_p (si) && TREE_CODE (bsi_stmt (si)) == OMP_RETURN); t = build1 (RETURN_EXPR, void_type_node, NULL); - bsi_insert_after (&si, t, TSI_SAME_STMT); + bsi_insert_after (&si, t, BSI_SAME_STMT); bsi_remove (&si, true); } - } + /* Move the parallel region into CHILD_CFUN. */ + + if (gimple_in_ssa_p (cfun)) + { + push_cfun (child_cfun); + init_tree_ssa (); + init_ssa_operands (); + cfun->gimple_df->in_ssa_p = true; + pop_cfun (); + } + new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb); + if (exit_bb) + single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; + + /* Inform the callgraph about the new function. */ + DECL_STRUCT_FUNCTION (child_fn)->curr_properties + = cfun->curr_properties; + cgraph_add_new_function (child_fn, true); + + /* Fix the callgraph edges for child_cfun. Those for cfun will be + fixed in a following pass. */ + push_cfun (child_cfun); + if (optimize) + optimize_omp_library_calls (); + rebuild_cgraph_edges (); + + /* Some EH regions might become dead, see PR34608. If + pass_cleanup_cfg isn't the first pass to happen with the + new child, these dead EH edges might cause problems. + Clean them up now. */ + if (flag_exceptions) + { + basic_block bb; + tree save_current = current_function_decl; + bool changed = false; + + current_function_decl = child_fn; + FOR_EACH_BB (bb) + changed |= tree_purge_dead_eh_edges (bb); + if (changed) + cleanup_tree_cfg (); + current_function_decl = save_current; + } + pop_cfun (); + } + /* Emit a library call to launch the children threads. */ expand_parallel_call (region, new_bb, entry_stmt, ws_args); + update_ssa (TODO_update_ssa_only_virtuals); } @@ -2474,7 +2698,7 @@ expand_omp_parallel (struct omp_region *region) L3: If this is a combined omp parallel loop, instead of the call to - GOMP_loop_foo_start, we emit 'goto L3'. */ + GOMP_loop_foo_start, we call GOMP_loop_foo_next. */ static void expand_omp_for_generic (struct omp_region *region, @@ -2482,114 +2706,135 @@ expand_omp_for_generic (struct omp_region *region, enum built_in_function start_fn, enum built_in_function next_fn) { - tree l0, l1, l2, l3; - tree type, istart0, iend0, iend; - tree t, args, list; - basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l3_bb; + tree type, istart0, iend0, iend, phi; + tree t, vmain, vback; + basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb; + basic_block l2_bb = NULL, l3_bb = NULL; block_stmt_iterator si; bool in_combined_parallel = is_combined_parallel (region); + bool broken_loop = region->cont == NULL; + edge e, ne; + + gcc_assert (!broken_loop || !in_combined_parallel); type = TREE_TYPE (fd->v); istart0 = create_tmp_var (long_integer_type_node, ".istart0"); iend0 = create_tmp_var (long_integer_type_node, ".iend0"); - iend = create_tmp_var (type, NULL); TREE_ADDRESSABLE (istart0) = 1; TREE_ADDRESSABLE (iend0) = 1; + if (gimple_in_ssa_p (cfun)) + { + add_referenced_var (istart0); + add_referenced_var (iend0); + } entry_bb = region->entry; - l0_bb = create_empty_bb (entry_bb); - l1_bb = single_succ (entry_bb); cont_bb = region->cont; - l2_bb = create_empty_bb (cont_bb); - l3_bb = single_succ (cont_bb); + gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); + gcc_assert (broken_loop + || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); + l0_bb = split_edge (FALLTHRU_EDGE (entry_bb)); + l1_bb = single_succ (l0_bb); + if (!broken_loop) + { + l2_bb = create_empty_bb (cont_bb); + gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb); + gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); + } + else + l2_bb = NULL; + l3_bb = BRANCH_EDGE (entry_bb)->dest; exit_bb = region->exit; - l0 = tree_block_label (l0_bb); - l1 = tree_block_label (l1_bb); - l2 = tree_block_label (l2_bb); - l3 = tree_block_label (l3_bb); - si = bsi_last (entry_bb); gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR); - if (!in_combined_parallel) + if (in_combined_parallel) + { + /* In a combined parallel loop, emit a call to + GOMP_loop_foo_next. */ + t = build_call_expr (built_in_decls[next_fn], 2, + build_fold_addr_expr (istart0), + build_fold_addr_expr (iend0)); + } + else { + tree t0, t1, t2, t3, t4; /* If this is not a combined parallel loop, emit a call to GOMP_loop_foo_start in ENTRY_BB. */ - list = alloc_stmt_list (); - t = build_fold_addr_expr (iend0); - args = tree_cons (NULL, t, NULL); - t = build_fold_addr_expr (istart0); - args = tree_cons (NULL, t, args); + t4 = build_fold_addr_expr (iend0); + t3 = build_fold_addr_expr (istart0); + t2 = fold_convert (long_integer_type_node, fd->step); + t1 = fold_convert (long_integer_type_node, fd->n2); + t0 = fold_convert (long_integer_type_node, fd->n1); if (fd->chunk_size) { t = fold_convert (long_integer_type_node, fd->chunk_size); - args = tree_cons (NULL, t, args); + t = build_call_expr (built_in_decls[start_fn], 6, + t0, t1, t2, t, t3, t4); } - t = fold_convert (long_integer_type_node, fd->step); - args = tree_cons (NULL, t, args); - t = fold_convert (long_integer_type_node, fd->n2); - args = tree_cons (NULL, t, args); - t = fold_convert (long_integer_type_node, fd->n1); - args = tree_cons (NULL, t, args); - t = build_function_call_expr (built_in_decls[start_fn], args); - t = get_formal_tmp_var (t, &list); - t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l0), - build_and_jump (&l3)); - append_to_statement_list (t, &list); - bsi_insert_after (&si, list, BSI_SAME_STMT); + else + t = build_call_expr (built_in_decls[start_fn], 5, + t0, t1, t2, t3, t4); } + t = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + true, BSI_SAME_STMT); + t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE); + bsi_insert_after (&si, t, BSI_SAME_STMT); + + /* Remove the OMP_FOR statement. */ bsi_remove (&si, true); /* Iteration setup for sequential loop goes in L0_BB. */ - list = alloc_stmt_list (); + si = bsi_start (l0_bb); t = fold_convert (type, istart0); - t = build2 (MODIFY_EXPR, void_type_node, fd->v, t); - gimplify_and_add (t, &list); + t = force_gimple_operand_bsi (&si, t, false, NULL_TREE, + false, BSI_CONTINUE_LINKING); + t = build_gimple_modify_stmt (fd->v, t); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (fd->v) = t; t = fold_convert (type, iend0); - t = build2 (MODIFY_EXPR, void_type_node, iend, t); - gimplify_and_add (t, &list); - - si = bsi_start (l0_bb); - bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + iend = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); - /* Code to control the increment and predicate for the sequential - loop goes in the first half of EXIT_BB (we split EXIT_BB so - that we can inherit all the edges going out of the loop - body). */ - list = alloc_stmt_list (); - - t = build2 (PLUS_EXPR, type, fd->v, fd->step); - t = build2 (MODIFY_EXPR, void_type_node, fd->v, t); - gimplify_and_add (t, &list); + if (!broken_loop) + { + /* Code to control the increment and predicate for the sequential + loop goes in the CONT_BB. */ + si = bsi_last (cont_bb); + t = bsi_stmt (si); + gcc_assert (TREE_CODE (t) == OMP_CONTINUE); + vmain = TREE_OPERAND (t, 1); + vback = TREE_OPERAND (t, 0); + + t = fold_build2 (PLUS_EXPR, type, vmain, fd->step); + t = force_gimple_operand_bsi (&si, t, false, NULL_TREE, + true, BSI_SAME_STMT); + t = build_gimple_modify_stmt (vback, t); + bsi_insert_before (&si, t, BSI_SAME_STMT); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (vback) = t; - t = build2 (fd->cond_code, boolean_type_node, fd->v, iend); - t = get_formal_tmp_var (t, &list); - t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l1), - build_and_jump (&l2)); - append_to_statement_list (t, &list); + t = build2 (fd->cond_code, boolean_type_node, vback, iend); + t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE); + bsi_insert_before (&si, t, BSI_SAME_STMT); - si = bsi_last (cont_bb); - bsi_insert_after (&si, list, BSI_SAME_STMT); - gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE); - bsi_remove (&si, true); + /* Remove OMP_CONTINUE. */ + bsi_remove (&si, true); - /* Emit code to get the next parallel iteration in L2_BB. */ - list = alloc_stmt_list (); - - t = build_fold_addr_expr (iend0); - args = tree_cons (NULL, t, NULL); - t = build_fold_addr_expr (istart0); - args = tree_cons (NULL, t, args); - t = build_function_call_expr (built_in_decls[next_fn], args); - t = get_formal_tmp_var (t, &list); - t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l0), - build_and_jump (&l3)); - append_to_statement_list (t, &list); - - si = bsi_start (l2_bb); - bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + /* Emit code to get the next parallel iteration in L2_BB. */ + si = bsi_start (l2_bb); + + t = build_call_expr (built_in_decls[next_fn], 2, + build_fold_addr_expr (istart0), + build_fold_addr_expr (iend0)); + t = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); + t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + } /* Add the loop cleanup function. */ si = bsi_last (exit_bb); @@ -2597,28 +2842,37 @@ expand_omp_for_generic (struct omp_region *region, t = built_in_decls[BUILT_IN_GOMP_LOOP_END_NOWAIT]; else t = built_in_decls[BUILT_IN_GOMP_LOOP_END]; - t = build_function_call_expr (t, NULL); + t = build_call_expr (t, 0); bsi_insert_after (&si, t, BSI_SAME_STMT); bsi_remove (&si, true); /* Connect the new blocks. */ - remove_edge (single_succ_edge (entry_bb)); - if (in_combined_parallel) - make_edge (entry_bb, l2_bb, EDGE_FALLTHRU); - else + find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE; + find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE; + + if (!broken_loop) { - make_edge (entry_bb, l0_bb, EDGE_TRUE_VALUE); - make_edge (entry_bb, l3_bb, EDGE_FALSE_VALUE); + e = find_edge (cont_bb, l3_bb); + ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE); + + for (phi = phi_nodes (l3_bb); phi; phi = PHI_CHAIN (phi)) + SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne), + PHI_ARG_DEF_FROM_EDGE (phi, e)); + remove_edge (e); + + find_edge (cont_bb, l1_bb)->flags = EDGE_TRUE_VALUE; + make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE); + make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); + + set_immediate_dominator (CDI_DOMINATORS, l2_bb, + recompute_dominator (CDI_DOMINATORS, l2_bb)); + set_immediate_dominator (CDI_DOMINATORS, l3_bb, + recompute_dominator (CDI_DOMINATORS, l3_bb)); + set_immediate_dominator (CDI_DOMINATORS, l0_bb, + recompute_dominator (CDI_DOMINATORS, l0_bb)); + set_immediate_dominator (CDI_DOMINATORS, l1_bb, + recompute_dominator (CDI_DOMINATORS, l1_bb)); } - - make_edge (l0_bb, l1_bb, EDGE_FALLTHRU); - - remove_edge (single_succ_edge (cont_bb)); - make_edge (cont_bb, l1_bb, EDGE_TRUE_VALUE); - make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE); - - make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); - make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE); } @@ -2639,9 +2893,9 @@ expand_omp_for_generic (struct omp_region *region, q += (q * nthreads != n); s0 = q * threadid; e0 = min(s0 + q, n); + V = s0 * STEP + N1; if (s0 >= e0) goto L2; else goto L0; L0: - V = s0 * STEP + N1; e = e0 * STEP + N1; L1: BODY; @@ -2654,141 +2908,144 @@ static void expand_omp_for_static_nochunk (struct omp_region *region, struct omp_for_data *fd) { - tree l0, l1, l2, n, q, s0, e0, e, t, nthreads, threadid; - tree type, utype, list; + tree n, q, s0, e0, e, t, nthreads, threadid; + tree type, vmain, vback; basic_block entry_bb, exit_bb, seq_start_bb, body_bb, cont_bb; basic_block fin_bb; block_stmt_iterator si; type = TREE_TYPE (fd->v); - utype = lang_hooks.types.unsigned_type (type); entry_bb = region->entry; - seq_start_bb = create_empty_bb (entry_bb); - body_bb = single_succ (entry_bb); cont_bb = region->cont; - fin_bb = single_succ (cont_bb); + gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); + gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); + seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb)); + body_bb = single_succ (seq_start_bb); + gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb); + gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); + fin_bb = FALLTHRU_EDGE (cont_bb)->dest; exit_bb = region->exit; - l0 = tree_block_label (seq_start_bb); - l1 = tree_block_label (body_bb); - l2 = tree_block_label (fin_bb); - /* Iteration space partitioning goes in ENTRY_BB. */ - list = alloc_stmt_list (); + si = bsi_last (entry_bb); + gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR); - t = built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS]; - t = build_function_call_expr (t, NULL); - t = fold_convert (utype, t); - nthreads = get_formal_tmp_var (t, &list); + t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS], 0); + t = fold_convert (type, t); + nthreads = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + true, BSI_SAME_STMT); - t = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM]; - t = build_function_call_expr (t, NULL); - t = fold_convert (utype, t); - threadid = get_formal_tmp_var (t, &list); - - fd->n1 = fold_convert (type, fd->n1); - if (!is_gimple_val (fd->n1)) - fd->n1 = get_formal_tmp_var (fd->n1, &list); - - fd->n2 = fold_convert (type, fd->n2); - if (!is_gimple_val (fd->n2)) - fd->n2 = get_formal_tmp_var (fd->n2, &list); - - fd->step = fold_convert (type, fd->step); - if (!is_gimple_val (fd->step)) - fd->step = get_formal_tmp_var (fd->step, &list); + t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM], 0); + t = fold_convert (type, t); + threadid = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + true, BSI_SAME_STMT); + + fd->n1 = force_gimple_operand_bsi (&si, + fold_convert (type, fd->n1), + true, NULL_TREE, + true, BSI_SAME_STMT); + + fd->n2 = force_gimple_operand_bsi (&si, + fold_convert (type, fd->n2), + true, NULL_TREE, + true, BSI_SAME_STMT); + + fd->step = force_gimple_operand_bsi (&si, + fold_convert (type, fd->step), + true, NULL_TREE, + true, BSI_SAME_STMT); t = build_int_cst (type, (fd->cond_code == LT_EXPR ? -1 : 1)); t = fold_build2 (PLUS_EXPR, type, fd->step, t); t = fold_build2 (PLUS_EXPR, type, t, fd->n2); t = fold_build2 (MINUS_EXPR, type, t, fd->n1); t = fold_build2 (TRUNC_DIV_EXPR, type, t, fd->step); - t = fold_convert (utype, t); - if (is_gimple_val (t)) - n = t; - else - n = get_formal_tmp_var (t, &list); + t = fold_convert (type, t); + n = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT); - t = build2 (TRUNC_DIV_EXPR, utype, n, nthreads); - q = get_formal_tmp_var (t, &list); + t = fold_build2 (TRUNC_DIV_EXPR, type, n, nthreads); + q = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT); - t = build2 (MULT_EXPR, utype, q, nthreads); - t = build2 (NE_EXPR, utype, t, n); - t = build2 (PLUS_EXPR, utype, q, t); - q = get_formal_tmp_var (t, &list); + t = fold_build2 (MULT_EXPR, type, q, nthreads); + t = fold_build2 (NE_EXPR, type, t, n); + t = fold_build2 (PLUS_EXPR, type, q, t); + q = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT); - t = build2 (MULT_EXPR, utype, q, threadid); - s0 = get_formal_tmp_var (t, &list); + t = build2 (MULT_EXPR, type, q, threadid); + s0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT); - t = build2 (PLUS_EXPR, utype, s0, q); - t = build2 (MIN_EXPR, utype, t, n); - e0 = get_formal_tmp_var (t, &list); + t = fold_build2 (PLUS_EXPR, type, s0, q); + t = fold_build2 (MIN_EXPR, type, t, n); + e0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT); t = build2 (GE_EXPR, boolean_type_node, s0, e0); - t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l2), - build_and_jump (&l0)); - append_to_statement_list (t, &list); + t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE); + bsi_insert_before (&si, t, BSI_SAME_STMT); - si = bsi_last (entry_bb); - gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR); - bsi_insert_after (&si, list, BSI_SAME_STMT); + /* Remove the OMP_FOR statement. */ bsi_remove (&si, true); /* Setup code for sequential iteration goes in SEQ_START_BB. */ - list = alloc_stmt_list (); + si = bsi_start (seq_start_bb); t = fold_convert (type, s0); - t = build2 (MULT_EXPR, type, t, fd->step); - t = build2 (PLUS_EXPR, type, t, fd->n1); - t = build2 (MODIFY_EXPR, void_type_node, fd->v, t); - gimplify_and_add (t, &list); + t = fold_build2 (MULT_EXPR, type, t, fd->step); + t = fold_build2 (PLUS_EXPR, type, t, fd->n1); + t = force_gimple_operand_bsi (&si, t, false, NULL_TREE, + false, BSI_CONTINUE_LINKING); + t = build_gimple_modify_stmt (fd->v, t); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (fd->v) = t; t = fold_convert (type, e0); - t = build2 (MULT_EXPR, type, t, fd->step); - t = build2 (PLUS_EXPR, type, t, fd->n1); - e = get_formal_tmp_var (t, &list); - - si = bsi_start (seq_start_bb); - bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + t = fold_build2 (MULT_EXPR, type, t, fd->step); + t = fold_build2 (PLUS_EXPR, type, t, fd->n1); + e = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); /* The code controlling the sequential loop replaces the OMP_CONTINUE. */ - list = alloc_stmt_list (); - - t = build2 (PLUS_EXPR, type, fd->v, fd->step); - t = build2 (MODIFY_EXPR, void_type_node, fd->v, t); - gimplify_and_add (t, &list); - - t = build2 (fd->cond_code, boolean_type_node, fd->v, e); - t = get_formal_tmp_var (t, &list); - t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l1), - build_and_jump (&l2)); - append_to_statement_list (t, &list); - si = bsi_last (cont_bb); - gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE); - bsi_insert_after (&si, list, BSI_SAME_STMT); + t = bsi_stmt (si); + gcc_assert (TREE_CODE (t) == OMP_CONTINUE); + vmain = TREE_OPERAND (t, 1); + vback = TREE_OPERAND (t, 0); + + t = fold_build2 (PLUS_EXPR, type, vmain, fd->step); + t = force_gimple_operand_bsi (&si, t, false, NULL_TREE, + true, BSI_SAME_STMT); + t = build_gimple_modify_stmt (vback, t); + bsi_insert_before (&si, t, BSI_SAME_STMT); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (vback) = t; + + t = build2 (fd->cond_code, boolean_type_node, vback, e); + t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE); + bsi_insert_before (&si, t, BSI_SAME_STMT); + + /* Remove the OMP_CONTINUE statement. */ bsi_remove (&si, true); /* Replace the OMP_RETURN with a barrier, or nothing. */ si = bsi_last (exit_bb); if (!OMP_RETURN_NOWAIT (bsi_stmt (si))) - { - list = alloc_stmt_list (); - build_omp_barrier (&list); - bsi_insert_after (&si, list, BSI_SAME_STMT); - } + force_gimple_operand_bsi (&si, build_omp_barrier (), false, NULL_TREE, + false, BSI_SAME_STMT); bsi_remove (&si, true); /* Connect all the blocks. */ - make_edge (seq_start_bb, body_bb, EDGE_FALLTHRU); - - remove_edge (single_succ_edge (entry_bb)); - make_edge (entry_bb, fin_bb, EDGE_TRUE_VALUE); - make_edge (entry_bb, seq_start_bb, EDGE_FALSE_VALUE); + find_edge (entry_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; + find_edge (entry_bb, fin_bb)->flags = EDGE_TRUE_VALUE; - make_edge (cont_bb, body_bb, EDGE_TRUE_VALUE); + find_edge (cont_bb, body_bb)->flags = EDGE_TRUE_VALUE; find_edge (cont_bb, fin_bb)->flags = EDGE_FALSE_VALUE; + + set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, entry_bb); + set_immediate_dominator (CDI_DOMINATORS, body_bb, + recompute_dominator (CDI_DOMINATORS, body_bb)); + set_immediate_dominator (CDI_DOMINATORS, fin_bb, + recompute_dominator (CDI_DOMINATORS, fin_bb)); } @@ -2806,6 +3063,9 @@ expand_omp_for_static_nochunk (struct omp_region *region, adj = STEP + 1; n = (adj + N2 - N1) / STEP; trip = 0; + V = threadid * CHUNK * STEP + N1; -- this extra definition of V is + here so that V is defined + if the loop is not entered L0: s0 = (trip * nthreads + threadid) * CHUNK; e0 = min(s0 + CHUNK, n); @@ -2826,171 +3086,231 @@ expand_omp_for_static_nochunk (struct omp_region *region, static void expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd) { - tree l0, l1, l2, l3, l4, n, s0, e0, e, t; - tree trip, nthreads, threadid; - tree type, utype; + tree n, s0, e0, e, t, phi, nphi, args; + tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid; + tree type, cont, v_main, v_back, v_extra; basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb; basic_block trip_update_bb, cont_bb, fin_bb; - tree list; block_stmt_iterator si; + edge se, re, ene; type = TREE_TYPE (fd->v); - utype = lang_hooks.types.unsigned_type (type); entry_bb = region->entry; - iter_part_bb = create_empty_bb (entry_bb); - seq_start_bb = create_empty_bb (iter_part_bb); - body_bb = single_succ (entry_bb); + se = split_block (entry_bb, last_stmt (entry_bb)); + entry_bb = se->src; + iter_part_bb = se->dest; cont_bb = region->cont; - trip_update_bb = create_empty_bb (cont_bb); - fin_bb = single_succ (cont_bb); + gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2); + gcc_assert (BRANCH_EDGE (iter_part_bb)->dest + == FALLTHRU_EDGE (cont_bb)->dest); + seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb)); + body_bb = single_succ (seq_start_bb); + gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb); + gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); + fin_bb = FALLTHRU_EDGE (cont_bb)->dest; + trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb)); exit_bb = region->exit; - l0 = tree_block_label (iter_part_bb); - l1 = tree_block_label (seq_start_bb); - l2 = tree_block_label (body_bb); - l3 = tree_block_label (trip_update_bb); - l4 = tree_block_label (fin_bb); - /* Trip and adjustment setup goes in ENTRY_BB. */ - list = alloc_stmt_list (); + si = bsi_last (entry_bb); + gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR); - t = built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS]; - t = build_function_call_expr (t, NULL); - t = fold_convert (utype, t); - nthreads = get_formal_tmp_var (t, &list); + t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS], 0); + t = fold_convert (type, t); + nthreads = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + true, BSI_SAME_STMT); - t = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM]; - t = build_function_call_expr (t, NULL); - t = fold_convert (utype, t); - threadid = get_formal_tmp_var (t, &list); - - fd->n1 = fold_convert (type, fd->n1); - if (!is_gimple_val (fd->n1)) - fd->n1 = get_formal_tmp_var (fd->n1, &list); - - fd->n2 = fold_convert (type, fd->n2); - if (!is_gimple_val (fd->n2)) - fd->n2 = get_formal_tmp_var (fd->n2, &list); - - fd->step = fold_convert (type, fd->step); - if (!is_gimple_val (fd->step)) - fd->step = get_formal_tmp_var (fd->step, &list); - - fd->chunk_size = fold_convert (utype, fd->chunk_size); - if (!is_gimple_val (fd->chunk_size)) - fd->chunk_size = get_formal_tmp_var (fd->chunk_size, &list); + t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM], 0); + t = fold_convert (type, t); + threadid = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + true, BSI_SAME_STMT); + + fd->n1 = force_gimple_operand_bsi (&si, fold_convert (type, fd->n1), + true, NULL_TREE, + true, BSI_SAME_STMT); + fd->n2 = force_gimple_operand_bsi (&si, fold_convert (type, fd->n2), + true, NULL_TREE, + true, BSI_SAME_STMT); + fd->step = force_gimple_operand_bsi (&si, fold_convert (type, fd->step), + true, NULL_TREE, + true, BSI_SAME_STMT); + fd->chunk_size + = force_gimple_operand_bsi (&si, fold_convert (type, + fd->chunk_size), + true, NULL_TREE, + true, BSI_SAME_STMT); t = build_int_cst (type, (fd->cond_code == LT_EXPR ? -1 : 1)); t = fold_build2 (PLUS_EXPR, type, fd->step, t); t = fold_build2 (PLUS_EXPR, type, t, fd->n2); t = fold_build2 (MINUS_EXPR, type, t, fd->n1); t = fold_build2 (TRUNC_DIV_EXPR, type, t, fd->step); - t = fold_convert (utype, t); - if (is_gimple_val (t)) - n = t; + t = fold_convert (type, t); + n = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + true, BSI_SAME_STMT); + + trip_var = create_tmp_var (type, ".trip"); + if (gimple_in_ssa_p (cfun)) + { + add_referenced_var (trip_var); + trip_init = make_ssa_name (trip_var, NULL_TREE); + trip_main = make_ssa_name (trip_var, NULL_TREE); + trip_back = make_ssa_name (trip_var, NULL_TREE); + } else - n = get_formal_tmp_var (t, &list); + { + trip_init = trip_var; + trip_main = trip_var; + trip_back = trip_var; + } - t = build_int_cst (utype, 0); - trip = get_initialized_tmp_var (t, &list, NULL); + t = build_gimple_modify_stmt (trip_init, build_int_cst (type, 0)); + bsi_insert_before (&si, t, BSI_SAME_STMT); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (trip_init) = t; - si = bsi_last (entry_bb); - gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR); - bsi_insert_after (&si, list, BSI_SAME_STMT); + t = fold_build2 (MULT_EXPR, type, threadid, fd->chunk_size); + t = fold_build2 (MULT_EXPR, type, t, fd->step); + t = fold_build2 (PLUS_EXPR, type, t, fd->n1); + v_extra = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + true, BSI_SAME_STMT); + + /* Remove the OMP_FOR. */ bsi_remove (&si, true); /* Iteration space partitioning goes in ITER_PART_BB. */ - list = alloc_stmt_list (); + si = bsi_last (iter_part_bb); - t = build2 (MULT_EXPR, utype, trip, nthreads); - t = build2 (PLUS_EXPR, utype, t, threadid); - t = build2 (MULT_EXPR, utype, t, fd->chunk_size); - s0 = get_formal_tmp_var (t, &list); + t = fold_build2 (MULT_EXPR, type, trip_main, nthreads); + t = fold_build2 (PLUS_EXPR, type, t, threadid); + t = fold_build2 (MULT_EXPR, type, t, fd->chunk_size); + s0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); - t = build2 (PLUS_EXPR, utype, s0, fd->chunk_size); - t = build2 (MIN_EXPR, utype, t, n); - e0 = get_formal_tmp_var (t, &list); + t = fold_build2 (PLUS_EXPR, type, s0, fd->chunk_size); + t = fold_build2 (MIN_EXPR, type, t, n); + e0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); t = build2 (LT_EXPR, boolean_type_node, s0, n); - t = build3 (COND_EXPR, void_type_node, t, - build_and_jump (&l1), build_and_jump (&l4)); - append_to_statement_list (t, &list); - - si = bsi_start (iter_part_bb); - bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); /* Setup code for sequential iteration goes in SEQ_START_BB. */ - list = alloc_stmt_list (); + si = bsi_start (seq_start_bb); t = fold_convert (type, s0); - t = build2 (MULT_EXPR, type, t, fd->step); - t = build2 (PLUS_EXPR, type, t, fd->n1); - t = build2 (MODIFY_EXPR, void_type_node, fd->v, t); - gimplify_and_add (t, &list); + t = fold_build2 (MULT_EXPR, type, t, fd->step); + t = fold_build2 (PLUS_EXPR, type, t, fd->n1); + t = force_gimple_operand_bsi (&si, t, false, NULL_TREE, + false, BSI_CONTINUE_LINKING); + t = build_gimple_modify_stmt (fd->v, t); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (fd->v) = t; t = fold_convert (type, e0); - t = build2 (MULT_EXPR, type, t, fd->step); - t = build2 (PLUS_EXPR, type, t, fd->n1); - e = get_formal_tmp_var (t, &list); - - si = bsi_start (seq_start_bb); - bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + t = fold_build2 (MULT_EXPR, type, t, fd->step); + t = fold_build2 (PLUS_EXPR, type, t, fd->n1); + e = force_gimple_operand_bsi (&si, t, true, NULL_TREE, + false, BSI_CONTINUE_LINKING); /* The code controlling the sequential loop goes in CONT_BB, replacing the OMP_CONTINUE. */ - list = alloc_stmt_list (); - - t = build2 (PLUS_EXPR, type, fd->v, fd->step); - t = build2 (MODIFY_EXPR, void_type_node, fd->v, t); - gimplify_and_add (t, &list); - - t = build2 (fd->cond_code, boolean_type_node, fd->v, e); - t = get_formal_tmp_var (t, &list); - t = build3 (COND_EXPR, void_type_node, t, - build_and_jump (&l2), build_and_jump (&l3)); - append_to_statement_list (t, &list); - si = bsi_last (cont_bb); - gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE); - bsi_insert_after (&si, list, BSI_SAME_STMT); + cont = bsi_stmt (si); + gcc_assert (TREE_CODE (cont) == OMP_CONTINUE); + v_main = TREE_OPERAND (cont, 1); + v_back = TREE_OPERAND (cont, 0); + + t = build2 (PLUS_EXPR, type, v_main, fd->step); + t = build_gimple_modify_stmt (v_back, t); + bsi_insert_before (&si, t, BSI_SAME_STMT); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (v_back) = t; + + t = build2 (fd->cond_code, boolean_type_node, v_back, e); + t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE); + bsi_insert_before (&si, t, BSI_SAME_STMT); + + /* Remove OMP_CONTINUE. */ bsi_remove (&si, true); /* Trip update code goes into TRIP_UPDATE_BB. */ - list = alloc_stmt_list (); - - t = build_int_cst (utype, 1); - t = build2 (PLUS_EXPR, utype, trip, t); - t = build2 (MODIFY_EXPR, void_type_node, trip, t); - gimplify_and_add (t, &list); - si = bsi_start (trip_update_bb); - bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + + t = build_int_cst (type, 1); + t = build2 (PLUS_EXPR, type, trip_main, t); + t = build_gimple_modify_stmt (trip_back, t); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (trip_back) = t; /* Replace the OMP_RETURN with a barrier, or nothing. */ si = bsi_last (exit_bb); if (!OMP_RETURN_NOWAIT (bsi_stmt (si))) - { - list = alloc_stmt_list (); - build_omp_barrier (&list); - bsi_insert_after (&si, list, BSI_SAME_STMT); - } + force_gimple_operand_bsi (&si, build_omp_barrier (), false, NULL_TREE, + false, BSI_SAME_STMT); bsi_remove (&si, true); /* Connect the new blocks. */ - remove_edge (single_succ_edge (entry_bb)); - make_edge (entry_bb, iter_part_bb, EDGE_FALLTHRU); + find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE; + find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE; - make_edge (iter_part_bb, seq_start_bb, EDGE_TRUE_VALUE); - make_edge (iter_part_bb, fin_bb, EDGE_FALSE_VALUE); + find_edge (cont_bb, body_bb)->flags = EDGE_TRUE_VALUE; + find_edge (cont_bb, trip_update_bb)->flags = EDGE_FALSE_VALUE; - make_edge (seq_start_bb, body_bb, EDGE_FALLTHRU); + redirect_edge_and_branch (single_succ_edge (trip_update_bb), iter_part_bb); - remove_edge (single_succ_edge (cont_bb)); - make_edge (cont_bb, body_bb, EDGE_TRUE_VALUE); - make_edge (cont_bb, trip_update_bb, EDGE_FALSE_VALUE); + if (gimple_in_ssa_p (cfun)) + { + /* When we redirect the edge from trip_update_bb to iter_part_bb, we + remove arguments of the phi nodes in fin_bb. We need to create + appropriate phi nodes in iter_part_bb instead. */ + se = single_pred_edge (fin_bb); + re = single_succ_edge (trip_update_bb); + ene = single_succ_edge (entry_bb); + + args = PENDING_STMT (re); + PENDING_STMT (re) = NULL_TREE; + for (phi = phi_nodes (fin_bb); + phi && args; + phi = PHI_CHAIN (phi), args = TREE_CHAIN (args)) + { + t = PHI_RESULT (phi); + gcc_assert (t == TREE_PURPOSE (args)); + nphi = create_phi_node (t, iter_part_bb); + SSA_NAME_DEF_STMT (t) = nphi; + + t = PHI_ARG_DEF_FROM_EDGE (phi, se); + /* A special case -- fd->v is not yet computed in iter_part_bb, we + need to use v_extra instead. */ + if (t == fd->v) + t = v_extra; + add_phi_arg (nphi, t, ene); + add_phi_arg (nphi, TREE_VALUE (args), re); + } + gcc_assert (!phi && !args); + while ((phi = phi_nodes (fin_bb)) != NULL_TREE) + remove_phi_node (phi, NULL_TREE, false); + + /* Make phi node for trip. */ + phi = create_phi_node (trip_main, iter_part_bb); + SSA_NAME_DEF_STMT (trip_main) = phi; + add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb)); + add_phi_arg (phi, trip_init, single_succ_edge (entry_bb)); + } - make_edge (trip_update_bb, iter_part_bb, EDGE_FALLTHRU); + set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb); + set_immediate_dominator (CDI_DOMINATORS, iter_part_bb, + recompute_dominator (CDI_DOMINATORS, iter_part_bb)); + set_immediate_dominator (CDI_DOMINATORS, fin_bb, + recompute_dominator (CDI_DOMINATORS, fin_bb)); + set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, + recompute_dominator (CDI_DOMINATORS, seq_start_bb)); + set_immediate_dominator (CDI_DOMINATORS, body_bb, + recompute_dominator (CDI_DOMINATORS, body_bb)); } @@ -3001,12 +3321,22 @@ expand_omp_for (struct omp_region *region) { struct omp_for_data fd; - push_gimplify_context (); - extract_omp_for_data (last_stmt (region->entry), &fd); region->sched_kind = fd.sched_kind; - if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC && !fd.have_ordered) + gcc_assert (EDGE_COUNT (region->entry->succs) == 2); + BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; + FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; + if (region->cont) + { + gcc_assert (EDGE_COUNT (region->cont->succs) == 2); + BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; + FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; + } + + if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC + && !fd.have_ordered + && region->cont != NULL) { if (fd.chunk_size == NULL) expand_omp_for_static_nochunk (region, &fd); @@ -3021,7 +3351,7 @@ expand_omp_for (struct omp_region *region) expand_omp_for_generic (region, &fd, start_ix, next_ix); } - pop_gimplify_context (NULL); + update_ssa (TODO_update_ssa_only_virtuals); } @@ -3050,68 +3380,103 @@ expand_omp_for (struct omp_region *region) reduction; If this is a combined parallel sections, replace the call to - GOMP_sections_start with 'goto L1'. */ + GOMP_sections_start with call to GOMP_sections_next. */ static void expand_omp_sections (struct omp_region *region) { - tree label_vec, l0, l1, l2, t, u, v, sections_stmt; - unsigned i, len; - basic_block entry_bb, exit_bb, l0_bb, l1_bb, l2_bb, default_bb; + tree label_vec, l1, l2, t, u, sections_stmt, vin, vmain, vnext, cont; + unsigned i, casei, len; + basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb; block_stmt_iterator si; struct omp_region *inner; - edge e; + bool exit_reachable = region->cont != NULL; + gcc_assert (exit_reachable == (region->exit != NULL)); entry_bb = region->entry; - l0_bb = create_empty_bb (entry_bb); + l0_bb = single_succ (entry_bb); l1_bb = region->cont; - l2_bb = single_succ (l1_bb); - default_bb = create_empty_bb (l1_bb->prev_bb); - exit_bb = region->exit; - - l0 = tree_block_label (l0_bb); - l1 = tree_block_label (l1_bb); - l2 = tree_block_label (l2_bb); - - v = create_tmp_var (unsigned_type_node, ".section"); + l2_bb = region->exit; + if (exit_reachable) + { + gcc_assert (single_pred (l2_bb) == l0_bb); + default_bb = create_empty_bb (l1_bb->prev_bb); + l1 = tree_block_label (l1_bb); + l2 = tree_block_label (l2_bb); + } + else + { + default_bb = create_empty_bb (l0_bb); + l1 = NULL_TREE; + l2 = tree_block_label (default_bb); + } /* We will build a switch() with enough cases for all the OMP_SECTION regions, a '0' case to handle the end of more work and a default case to abort if something goes wrong. */ - len = EDGE_COUNT (entry_bb->succs); - label_vec = make_tree_vec (len + 2); + len = EDGE_COUNT (l0_bb->succs); + label_vec = make_tree_vec (len + 1); /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the OMP_SECTIONS statement. */ si = bsi_last (entry_bb); sections_stmt = bsi_stmt (si); gcc_assert (TREE_CODE (sections_stmt) == OMP_SECTIONS); + vin = OMP_SECTIONS_CONTROL (sections_stmt); if (!is_combined_parallel (region)) { /* If we are not inside a combined parallel+sections region, call GOMP_sections_start. */ - t = build_int_cst (unsigned_type_node, len); - t = tree_cons (NULL, t, NULL); + t = build_int_cst (unsigned_type_node, + exit_reachable ? len - 1 : len); u = built_in_decls[BUILT_IN_GOMP_SECTIONS_START]; - t = build_function_call_expr (u, t); - t = build2 (MODIFY_EXPR, void_type_node, v, t); - bsi_insert_after (&si, t, BSI_SAME_STMT); + t = build_call_expr (u, 1, t); } + else + { + /* Otherwise, call GOMP_sections_next. */ + u = built_in_decls[BUILT_IN_GOMP_SECTIONS_NEXT]; + t = build_call_expr (u, 0); + } + t = build_gimple_modify_stmt (vin, t); + bsi_insert_after (&si, t, BSI_SAME_STMT); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (vin) = t; bsi_remove (&si, true); - /* The switch() statement replacing OMP_SECTIONS goes in L0_BB. */ - si = bsi_start (l0_bb); + /* The switch() statement replacing OMP_SECTIONS_SWITCH goes in L0_BB. */ + si = bsi_last (l0_bb); + gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SECTIONS_SWITCH); + if (exit_reachable) + { + cont = last_stmt (l1_bb); + gcc_assert (TREE_CODE (cont) == OMP_CONTINUE); + vmain = TREE_OPERAND (cont, 1); + vnext = TREE_OPERAND (cont, 0); + } + else + { + vmain = vin; + vnext = NULL_TREE; + } - t = build3 (SWITCH_EXPR, void_type_node, v, NULL, label_vec); - bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + t = build3 (SWITCH_EXPR, void_type_node, vmain, NULL, label_vec); + bsi_insert_after (&si, t, BSI_SAME_STMT); + bsi_remove (&si, true); + + i = 0; + if (exit_reachable) + { + t = build3 (CASE_LABEL_EXPR, void_type_node, + build_int_cst (unsigned_type_node, 0), NULL, l2); + TREE_VEC_ELT (label_vec, 0) = t; + i++; + } - t = build3 (CASE_LABEL_EXPR, void_type_node, - build_int_cst (unsigned_type_node, 0), NULL, l2); - TREE_VEC_ELT (label_vec, 0) = t; - make_edge (l0_bb, l2_bb, 0); - /* Convert each OMP_SECTION into a CASE_LABEL_EXPR. */ - for (inner = region->inner, i = 1; inner; inner = inner->next, ++i) + for (inner = region->inner, casei = 1; + inner; + inner = inner->next, i++, casei++) { basic_block s_entry_bb, s_exit_bb; @@ -3119,7 +3484,7 @@ expand_omp_sections (struct omp_region *region) s_exit_bb = inner->exit; t = tree_block_label (s_entry_bb); - u = build_int_cst (unsigned_type_node, i); + u = build_int_cst (unsigned_type_node, casei); u = build3 (CASE_LABEL_EXPR, void_type_node, u, NULL, t); TREE_VEC_ELT (label_vec, i) = u; @@ -3127,64 +3492,55 @@ expand_omp_sections (struct omp_region *region) gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SECTION); gcc_assert (i < len || OMP_SECTION_LAST (bsi_stmt (si))); bsi_remove (&si, true); + single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU; + + if (s_exit_bb == NULL) + continue; si = bsi_last (s_exit_bb); gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN); bsi_remove (&si, true); - e = single_pred_edge (s_entry_bb); - e->flags = 0; - redirect_edge_pred (e, l0_bb); - - single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU; single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU; } /* Error handling code goes in DEFAULT_BB. */ t = tree_block_label (default_bb); u = build3 (CASE_LABEL_EXPR, void_type_node, NULL, NULL, t); - TREE_VEC_ELT (label_vec, len + 1) = u; + TREE_VEC_ELT (label_vec, len) = u; make_edge (l0_bb, default_bb, 0); si = bsi_start (default_bb); - t = built_in_decls[BUILT_IN_TRAP]; - t = build_function_call_expr (t, NULL); + t = build_call_expr (built_in_decls[BUILT_IN_TRAP], 0); bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); - /* Code to get the next section goes in L1_BB. */ - si = bsi_last (l1_bb); - gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE); + if (exit_reachable) + { + /* Code to get the next section goes in L1_BB. */ + si = bsi_last (l1_bb); + gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE); - t = built_in_decls[BUILT_IN_GOMP_SECTIONS_NEXT]; - t = build_function_call_expr (t, NULL); - t = build2 (MODIFY_EXPR, void_type_node, v, t); - bsi_insert_after (&si, t, BSI_SAME_STMT); - bsi_remove (&si, true); + t = build_call_expr (built_in_decls[BUILT_IN_GOMP_SECTIONS_NEXT], 0); + t = build_gimple_modify_stmt (vnext, t); + bsi_insert_after (&si, t, BSI_SAME_STMT); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (vnext) = t; + bsi_remove (&si, true); - /* Cleanup function replaces OMP_RETURN in EXIT_BB. */ - si = bsi_last (exit_bb); - if (OMP_RETURN_NOWAIT (bsi_stmt (si))) - t = built_in_decls[BUILT_IN_GOMP_SECTIONS_END_NOWAIT]; - else - t = built_in_decls[BUILT_IN_GOMP_SECTIONS_END]; - t = build_function_call_expr (t, NULL); - bsi_insert_after (&si, t, BSI_SAME_STMT); - bsi_remove (&si, true); + single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU; - /* Connect the new blocks. */ - if (is_combined_parallel (region)) - { - /* If this was a combined parallel+sections region, we did not - emit a GOMP_sections_start in the entry block, so we just - need to jump to L1_BB to get the next section. */ - make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); + /* Cleanup function replaces OMP_RETURN in EXIT_BB. */ + si = bsi_last (l2_bb); + if (OMP_RETURN_NOWAIT (bsi_stmt (si))) + t = built_in_decls[BUILT_IN_GOMP_SECTIONS_END_NOWAIT]; + else + t = built_in_decls[BUILT_IN_GOMP_SECTIONS_END]; + t = build_call_expr (t, 0); + bsi_insert_after (&si, t, BSI_SAME_STMT); + bsi_remove (&si, true); } - else - make_edge (entry_bb, l0_bb, EDGE_FALLTHRU); - e = single_succ_edge (l1_bb); - redirect_edge_succ (e, l0_bb); - e->flags = EDGE_FALLTHRU; + set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb); } @@ -3214,11 +3570,8 @@ expand_omp_single (struct omp_region *region) si = bsi_last (exit_bb); if (!OMP_RETURN_NOWAIT (bsi_stmt (si)) || need_barrier) - { - tree t = alloc_stmt_list (); - build_omp_barrier (&t); - bsi_insert_after (&si, t, BSI_SAME_STMT); - } + force_gimple_operand_bsi (&si, build_omp_barrier (), false, NULL_TREE, + false, BSI_SAME_STMT); bsi_remove (&si, true); single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; } @@ -3245,10 +3598,362 @@ expand_omp_synch (struct omp_region *region) bsi_remove (&si, true); single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; - si = bsi_last (exit_bb); - gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN); - bsi_remove (&si, true); - single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; + if (exit_bb) + { + si = bsi_last (exit_bb); + gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN); + bsi_remove (&si, true); + single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; + } +} + +/* A subroutine of expand_omp_atomic. Attempt to implement the atomic + operation as a __sync_fetch_and_op builtin. INDEX is log2 of the + size of the data type, and thus usable to find the index of the builtin + decl. Returns false if the expression is not of the proper form. */ + +static bool +expand_omp_atomic_fetch_op (basic_block load_bb, + tree addr, tree loaded_val, + tree stored_val, int index) +{ + enum built_in_function base; + tree decl, itype, call; + enum insn_code *optab; + tree rhs; + basic_block store_bb = single_succ (load_bb); + block_stmt_iterator bsi; + tree stmt; + + /* We expect to find the following sequences: + + load_bb: + OMP_ATOMIC_LOAD (tmp, mem) + + store_bb: + val = tmp OP something; (or: something OP tmp) + OMP_STORE (val) + + ???FIXME: Allow a more flexible sequence. + Perhaps use data flow to pick the statements. + + */ + + bsi = bsi_after_labels (store_bb); + stmt = bsi_stmt (bsi); + if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT) + return false; + bsi_next (&bsi); + if (TREE_CODE (bsi_stmt (bsi)) != OMP_ATOMIC_STORE) + return false; + + if (!operand_equal_p (GIMPLE_STMT_OPERAND (stmt, 0), stored_val, 0)) + return false; + + rhs = GIMPLE_STMT_OPERAND (stmt, 1); + + /* Check for one of the supported fetch-op operations. */ + switch (TREE_CODE (rhs)) + { + case PLUS_EXPR: + case POINTER_PLUS_EXPR: + base = BUILT_IN_FETCH_AND_ADD_N; + optab = sync_add_optab; + break; + case MINUS_EXPR: + base = BUILT_IN_FETCH_AND_SUB_N; + optab = sync_add_optab; + break; + case BIT_AND_EXPR: + base = BUILT_IN_FETCH_AND_AND_N; + optab = sync_and_optab; + break; + case BIT_IOR_EXPR: + base = BUILT_IN_FETCH_AND_OR_N; + optab = sync_ior_optab; + break; + case BIT_XOR_EXPR: + base = BUILT_IN_FETCH_AND_XOR_N; + optab = sync_xor_optab; + break; + default: + return false; + } + /* Make sure the expression is of the proper form. */ + if (operand_equal_p (TREE_OPERAND (rhs, 0), loaded_val, 0)) + rhs = TREE_OPERAND (rhs, 1); + else if (commutative_tree_code (TREE_CODE (rhs)) + && operand_equal_p (TREE_OPERAND (rhs, 1), loaded_val, 0)) + rhs = TREE_OPERAND (rhs, 0); + else + return false; + + decl = built_in_decls[base + index + 1]; + itype = TREE_TYPE (TREE_TYPE (decl)); + + if (optab[TYPE_MODE (itype)] == CODE_FOR_nothing) + return false; + + bsi = bsi_last (load_bb); + gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_LOAD); + call = build_call_expr (decl, 2, addr, fold_convert (itype, rhs)); + force_gimple_operand_bsi (&bsi, call, true, NULL_TREE, true, BSI_SAME_STMT); + bsi_remove (&bsi, true); + + bsi = bsi_last (store_bb); + gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_STORE); + bsi_remove (&bsi, true); + bsi = bsi_last (store_bb); + bsi_remove (&bsi, true); + + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa_no_phi); + + return true; +} + +/* A subroutine of expand_omp_atomic. Implement the atomic operation as: + + oldval = *addr; + repeat: + newval = rhs; // with oldval replacing *addr in rhs + oldval = __sync_val_compare_and_swap (addr, oldval, newval); + if (oldval != newval) + goto repeat; + + INDEX is log2 of the size of the data type, and thus usable to find the + index of the builtin decl. */ + +static bool +expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb, + tree addr, tree loaded_val, tree stored_val, + int index) +{ + tree loadedi, storedi, initial, new_stored, new_storedi, old_vali; + tree type, itype, cmpxchg, iaddr; + block_stmt_iterator bsi; + basic_block loop_header = single_succ (load_bb); + tree phi, x; + edge e; + + cmpxchg = built_in_decls[BUILT_IN_VAL_COMPARE_AND_SWAP_N + index + 1]; + type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr))); + itype = TREE_TYPE (TREE_TYPE (cmpxchg)); + + if (sync_compare_and_swap[TYPE_MODE (itype)] == CODE_FOR_nothing) + return false; + + /* Load the initial value, replacing the OMP_ATOMIC_LOAD. */ + bsi = bsi_last (load_bb); + gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_LOAD); + initial = force_gimple_operand_bsi (&bsi, build_fold_indirect_ref (addr), + true, NULL_TREE, true, BSI_SAME_STMT); + /* Move the value to the LOADED_VAL temporary. */ + if (gimple_in_ssa_p (cfun)) + { + gcc_assert (phi_nodes (loop_header) == NULL_TREE); + phi = create_phi_node (loaded_val, loop_header); + SSA_NAME_DEF_STMT (loaded_val) = phi; + SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)), + initial); + } + else + bsi_insert_before (&bsi, + build_gimple_modify_stmt (loaded_val, initial), + BSI_SAME_STMT); + bsi_remove (&bsi, true); + + bsi = bsi_last (store_bb); + gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_STORE); + + /* For floating-point values, we'll need to view-convert them to integers + so that we can perform the atomic compare and swap. Simplify the + following code by always setting up the "i"ntegral variables. */ + if (INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) + { + loadedi = loaded_val; + storedi = stored_val; + iaddr = addr; + } + else + { + loadedi = force_gimple_operand_bsi (&bsi, + build1 (VIEW_CONVERT_EXPR, itype, + loaded_val), true, + NULL_TREE, true, BSI_SAME_STMT); + storedi = + force_gimple_operand_bsi (&bsi, + build1 (VIEW_CONVERT_EXPR, itype, + stored_val), true, NULL_TREE, true, + BSI_SAME_STMT); + iaddr = fold_convert (build_pointer_type (itype), addr); + } + + /* Build the compare&swap statement. */ + new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi); + new_storedi = force_gimple_operand_bsi (&bsi, + fold_convert (itype, new_storedi), + true, NULL_TREE, + true, BSI_SAME_STMT); + if (storedi == stored_val) + new_stored = new_storedi; + else + new_stored = force_gimple_operand_bsi (&bsi, + build1 (VIEW_CONVERT_EXPR, type, + new_storedi), true, + NULL_TREE, true, BSI_SAME_STMT); + + if (gimple_in_ssa_p (cfun)) + old_vali = loadedi; + else + { + old_vali = create_tmp_var (itype, NULL); + x = build_gimple_modify_stmt (old_vali, loadedi); + bsi_insert_before (&bsi, x, BSI_SAME_STMT); + + x = build_gimple_modify_stmt (loaded_val, new_stored); + bsi_insert_before (&bsi, x, BSI_SAME_STMT); + } + + /* Note that we always perform the comparison as an integer, even for + floating point. This allows the atomic operation to properly + succeed even with NaNs and -0.0. */ + x = build3 (COND_EXPR, void_type_node, + build2 (NE_EXPR, boolean_type_node, + new_storedi, old_vali), NULL_TREE, NULL_TREE); + bsi_insert_before (&bsi, x, BSI_SAME_STMT); + + /* Update cfg. */ + e = single_succ_edge (store_bb); + e->flags &= ~EDGE_FALLTHRU; + e->flags |= EDGE_FALSE_VALUE; + + e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE); + + /* Copy the new value to loaded_val (we already did that before the condition + if we are not in SSA). */ + if (gimple_in_ssa_p (cfun)) + { + phi = phi_nodes (loop_header); + SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_stored); + } + + /* Remove OMP_ATOMIC_STORE. */ + bsi_remove (&bsi, true); + + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa_no_phi); + + return true; +} + +/* A subroutine of expand_omp_atomic. Implement the atomic operation as: + + GOMP_atomic_start (); + *addr = rhs; + GOMP_atomic_end (); + + The result is not globally atomic, but works so long as all parallel + references are within #pragma omp atomic directives. According to + responses received from omp@openmp.org, appears to be within spec. + Which makes sense, since that's how several other compilers handle + this situation as well. + LOADED_VAL and ADDR are the operands of OMP_ATOMIC_LOAD we're expanding. + STORED_VAL is the operand of the matching OMP_ATOMIC_STORE. + + We replace + OMP_ATOMIC_LOAD (loaded_val, addr) with + loaded_val = *addr; + + and replace + OMP_ATOMIC_ATORE (stored_val) with + *addr = stored_val; +*/ + +static bool +expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb, + tree addr, tree loaded_val, tree stored_val) +{ + block_stmt_iterator bsi; + tree t; + + bsi = bsi_last (load_bb); + gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_LOAD); + + t = built_in_decls[BUILT_IN_GOMP_ATOMIC_START]; + t = build_function_call_expr (t, 0); + force_gimple_operand_bsi (&bsi, t, true, NULL_TREE, true, BSI_SAME_STMT); + + t = build_gimple_modify_stmt (loaded_val, build_fold_indirect_ref (addr)); + if (gimple_in_ssa_p (cfun)) + SSA_NAME_DEF_STMT (loaded_val) = t; + bsi_insert_before (&bsi, t, BSI_SAME_STMT); + bsi_remove (&bsi, true); + + bsi = bsi_last (store_bb); + gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_STORE); + + t = build_gimple_modify_stmt (build_fold_indirect_ref (unshare_expr (addr)), + stored_val); + bsi_insert_before (&bsi, t, BSI_SAME_STMT); + + t = built_in_decls[BUILT_IN_GOMP_ATOMIC_END]; + t = build_function_call_expr (t, 0); + force_gimple_operand_bsi (&bsi, t, true, NULL_TREE, true, BSI_SAME_STMT); + bsi_remove (&bsi, true); + + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa_no_phi); + return true; +} + +/* Expand an OMP_ATOMIC statement. We try to expand + using expand_omp_atomic_fetch_op. If it failed, we try to + call expand_omp_atomic_pipeline, and if it fails too, the + ultimate fallback is wrapping the operation in a mutex + (expand_omp_atomic_mutex). REGION is the atomic region built + by build_omp_regions_1(). */ + +static void +expand_omp_atomic (struct omp_region *region) +{ + basic_block load_bb = region->entry, store_bb = region->exit; + tree load = last_stmt (load_bb), store = last_stmt (store_bb); + tree loaded_val = TREE_OPERAND (load, 0); + tree addr = TREE_OPERAND (load, 1); + tree stored_val = TREE_OPERAND (store, 0); + tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr))); + HOST_WIDE_INT index; + + /* Make sure the type is one of the supported sizes. */ + index = tree_low_cst (TYPE_SIZE_UNIT (type), 1); + index = exact_log2 (index); + if (index >= 0 && index <= 4) + { + unsigned int align = TYPE_ALIGN_UNIT (type); + + /* __sync builtins require strict data alignment. */ + if (exact_log2 (align) >= index) + { + /* When possible, use specialized atomic update functions. */ + if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) + && store_bb == single_succ (load_bb)) + { + if (expand_omp_atomic_fetch_op (load_bb, addr, + loaded_val, stored_val, index)) + return; + } + + /* If we don't have specialized __sync builtins, try and implement + as a compare and swap loop. */ + if (expand_omp_atomic_pipeline (load_bb, store_bb, addr, + loaded_val, stored_val, index)) + return; + } + } + + /* The ultimate fallback is wrapping the operation in a mutex. */ + expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val); } @@ -3263,6 +3968,11 @@ expand_omp (struct omp_region *region) { while (region) { + /* First, determine whether this is a combined parallel+workshare + region. */ + if (region->type == OMP_PARALLEL) + determine_parallel_type (region); + if (region->inner) expand_omp (region->inner); @@ -3295,6 +4005,11 @@ expand_omp (struct omp_region *region) expand_omp_synch (region); break; + case OMP_ATOMIC_LOAD: + expand_omp_atomic (region); + break; + + default: gcc_unreachable (); } @@ -3305,10 +4020,13 @@ expand_omp (struct omp_region *region) /* Helper for build_omp_regions. Scan the dominator tree starting at - block BB. PARENT is the region that contains BB. */ + block BB. PARENT is the region that contains BB. If SINGLE_TREE is + true, the function ends once a single tree is built (otherwise, whole + forest of OMP constructs may be built). */ static void -build_omp_regions_1 (basic_block bb, struct omp_region *parent) +build_omp_regions_1 (basic_block bb, struct omp_region *parent, + bool single_tree) { block_stmt_iterator si; tree stmt; @@ -3322,7 +4040,6 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent) stmt = bsi_stmt (si); code = TREE_CODE (stmt); - if (code == OMP_RETURN) { /* STMT is the return point out of region PARENT. Mark it @@ -3332,17 +4049,28 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent) region = parent; region->exit = bb; parent = parent->outer; - - /* If REGION is a parallel region, determine whether it is - a combined parallel+workshare region. */ - if (region->type == OMP_PARALLEL) - determine_parallel_type (region); } + else if (code == OMP_ATOMIC_STORE) + { + /* OMP_ATOMIC_STORE is analoguous to OMP_RETURN, but matches with + OMP_ATOMIC_LOAD. */ + gcc_assert (parent); + gcc_assert (parent->type == OMP_ATOMIC_LOAD); + region = parent; + region->exit = bb; + parent = parent->outer; + } + else if (code == OMP_CONTINUE) { gcc_assert (parent); parent->cont = bb; } + else if (code == OMP_SECTIONS_SWITCH) + { + /* OMP_SECTIONS_SWITCH is part of OMP_SECTIONS, and we do nothing for + it. */ ; + } else { /* Otherwise, this directive becomes the parent for a new @@ -3352,12 +4080,44 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent) } } + if (single_tree && !parent) + return; + for (son = first_dom_son (CDI_DOMINATORS, bb); son; son = next_dom_son (CDI_DOMINATORS, son)) - build_omp_regions_1 (son, parent); + build_omp_regions_1 (son, parent, single_tree); +} + +/* Builds the tree of OMP regions rooted at ROOT, storing it to + root_omp_region. */ + +static void +build_omp_regions_root (basic_block root) +{ + gcc_assert (root_omp_region == NULL); + build_omp_regions_1 (root, NULL, true); + gcc_assert (root_omp_region != NULL); } +/* Expands omp construct (and its subconstructs) starting in HEAD. */ + +void +omp_expand_local (basic_block head) +{ + build_omp_regions_root (head); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nOMP region tree\n\n"); + dump_omp_region (dump_file, root_omp_region, 0); + fprintf (dump_file, "\n"); + } + + remove_exit_barriers (root_omp_region); + expand_omp (root_omp_region); + + free_omp_regions (); +} /* Scan the CFG and build a tree of OMP regions. Return the root of the OMP region tree. */ @@ -3367,7 +4127,7 @@ build_omp_regions (void) { gcc_assert (root_omp_region == NULL); calculate_dominance_info (CDI_DOMINATORS); - build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL); + build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL, false); } @@ -3392,8 +4152,6 @@ execute_expand_omp (void) expand_omp (root_omp_region); - free_dominance_info (CDI_DOMINATORS); - free_dominance_info (CDI_POST_DOMINATORS); cleanup_tree_cfg (); free_omp_regions (); @@ -3401,10 +4159,38 @@ execute_expand_omp (void) return 0; } +/* OMP expansion in SSA form. For testing purposes only. */ + +static bool +gate_expand_omp_ssa (void) +{ + return flag_openmp_ssa && flag_openmp != 0 && errorcount == 0; +} + +struct tree_opt_pass pass_expand_omp_ssa = +{ + "ompexpssa", /* name */ + gate_expand_omp_ssa, /* gate */ + execute_expand_omp, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + 0, /* tv_id */ + PROP_gimple_any, /* properties_required */ + PROP_gimple_lomp, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func, /* todo_flags_finish */ + 0 /* letter */ +}; + +/* OMP expansion -- the default pass, run before creation of SSA form. */ + static bool gate_expand_omp (void) { - return flag_openmp != 0 && errorcount == 0; + return ((!flag_openmp_ssa || !optimize) + && flag_openmp != 0 && errorcount == 0); } struct tree_opt_pass pass_expand_omp = @@ -3431,7 +4217,7 @@ struct tree_opt_pass pass_expand_omp = static void lower_omp_sections (tree *stmt_p, omp_context *ctx) { - tree new_stmt, stmt, body, bind, block, ilist, olist, new_body; + tree new_stmt, stmt, body, bind, block, ilist, olist, new_body, control; tree t, dlist; tree_stmt_iterator tsi; unsigned i, len; @@ -3493,9 +4279,12 @@ lower_omp_sections (tree *stmt_p, omp_context *ctx) new_body = alloc_stmt_list (); append_to_statement_list (ilist, &new_body); append_to_statement_list (stmt, &new_body); + append_to_statement_list (make_node (OMP_SECTIONS_SWITCH), &new_body); append_to_statement_list (bind, &new_body); - t = make_node (OMP_CONTINUE); + control = create_tmp_var (unsigned_type_node, ".section"); + t = build2 (OMP_CONTINUE, void_type_node, control, control); + OMP_SECTIONS_CONTROL (stmt) = control; append_to_statement_list (t, &new_body); append_to_statement_list (olist, &new_body); @@ -3531,8 +4320,7 @@ lower_omp_single_simple (tree single_stmt, tree *pre_p) { tree t; - t = built_in_decls[BUILT_IN_GOMP_SINGLE_START]; - t = build_function_call_expr (t, NULL); + t = build_call_expr (built_in_decls[BUILT_IN_GOMP_SINGLE_START], 0); t = build3 (COND_EXPR, void_type_node, t, OMP_SINGLE_BODY (single_stmt), NULL); gimplify_and_add (t, pre_p); @@ -3571,7 +4359,7 @@ lower_omp_single_simple (tree single_stmt, tree *pre_p) static void lower_omp_single_copy (tree single_stmt, tree *pre_p, omp_context *ctx) { - tree ptr_type, t, args, l0, l1, l2, copyin_seq; + tree ptr_type, t, l0, l1, l2, copyin_seq; ctx->sender_decl = create_tmp_var (ctx->record_type, ".omp_copy_o"); @@ -3582,10 +4370,9 @@ lower_omp_single_copy (tree single_stmt, tree *pre_p, omp_context *ctx) l1 = create_artificial_label (); l2 = create_artificial_label (); - t = built_in_decls[BUILT_IN_GOMP_SINGLE_COPY_START]; - t = build_function_call_expr (t, NULL); + t = build_call_expr (built_in_decls[BUILT_IN_GOMP_SINGLE_COPY_START], 0); t = fold_convert (ptr_type, t); - t = build2 (MODIFY_EXPR, void_type_node, ctx->receiver_decl, t); + t = build_gimple_modify_stmt (ctx->receiver_decl, t); gimplify_and_add (t, pre_p); t = build2 (EQ_EXPR, boolean_type_node, ctx->receiver_decl, @@ -3604,9 +4391,7 @@ lower_omp_single_copy (tree single_stmt, tree *pre_p, omp_context *ctx) ©in_seq, ctx); t = build_fold_addr_expr (ctx->sender_decl); - args = tree_cons (NULL, t, NULL); - t = built_in_decls[BUILT_IN_GOMP_SINGLE_COPY_END]; - t = build_function_call_expr (t, args); + t = build_call_expr (built_in_decls[BUILT_IN_GOMP_SINGLE_COPY_END], 1, t); gimplify_and_add (t, pre_p); t = build_and_jump (&l2); @@ -3679,8 +4464,7 @@ lower_omp_master (tree *stmt_p, omp_context *ctx) append_to_statement_list (stmt, &BIND_EXPR_BODY (bind)); - x = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM]; - x = build_function_call_expr (x, NULL); + x = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM], 0); x = build2 (EQ_EXPR, boolean_type_node, x, integer_zero_node); x = build3 (COND_EXPR, void_type_node, x, NULL, build_and_jump (&lab)); gimplify_and_add (x, &BIND_EXPR_BODY (bind)); @@ -3719,8 +4503,7 @@ lower_omp_ordered (tree *stmt_p, omp_context *ctx) append_to_statement_list (stmt, &BIND_EXPR_BODY (bind)); - x = built_in_decls[BUILT_IN_GOMP_ORDERED_START]; - x = build_function_call_expr (x, NULL); + x = build_call_expr (built_in_decls[BUILT_IN_GOMP_ORDERED_START], 0); gimplify_and_add (x, &BIND_EXPR_BODY (bind)); lower_omp (&OMP_ORDERED_BODY (stmt), ctx); @@ -3728,8 +4511,7 @@ lower_omp_ordered (tree *stmt_p, omp_context *ctx) append_to_statement_list (OMP_ORDERED_BODY (stmt), &BIND_EXPR_BODY (bind)); OMP_ORDERED_BODY (stmt) = NULL; - x = built_in_decls[BUILT_IN_GOMP_ORDERED_END]; - x = build_function_call_expr (x, NULL); + x = build_call_expr (built_in_decls[BUILT_IN_GOMP_ORDERED_END], 0); gimplify_and_add (x, &BIND_EXPR_BODY (bind)); x = make_node (OMP_RETURN); @@ -3760,7 +4542,7 @@ lower_omp_critical (tree *stmt_p, omp_context *ctx) name = OMP_CRITICAL_NAME (stmt); if (name) { - tree decl, args; + tree decl; splay_tree_node n; if (!critical_name_mutexes) @@ -3782,7 +4564,7 @@ lower_omp_critical (tree *stmt_p, omp_context *ctx) DECL_COMMON (decl) = 1; DECL_ARTIFICIAL (decl) = 1; DECL_IGNORED_P (decl) = 1; - cgraph_varpool_finalize_decl (decl); + varpool_finalize_decl (decl); splay_tree_insert (critical_name_mutexes, (splay_tree_key) name, (splay_tree_value) decl); @@ -3790,21 +4572,19 @@ lower_omp_critical (tree *stmt_p, omp_context *ctx) else decl = (tree) n->value; - args = tree_cons (NULL, build_fold_addr_expr (decl), NULL); lock = built_in_decls[BUILT_IN_GOMP_CRITICAL_NAME_START]; - lock = build_function_call_expr (lock, args); + lock = build_call_expr (lock, 1, build_fold_addr_expr (decl)); - args = tree_cons (NULL, build_fold_addr_expr (decl), NULL); unlock = built_in_decls[BUILT_IN_GOMP_CRITICAL_NAME_END]; - unlock = build_function_call_expr (unlock, args); + unlock = build_call_expr (unlock, 1, build_fold_addr_expr (decl)); } else { lock = built_in_decls[BUILT_IN_GOMP_CRITICAL_START]; - lock = build_function_call_expr (lock, NULL); + lock = build_call_expr (lock, 0); unlock = built_in_decls[BUILT_IN_GOMP_CRITICAL_END]; - unlock = build_function_call_expr (unlock, NULL); + unlock = build_call_expr (unlock, 0); } push_gimplify_context (); @@ -3837,13 +4617,14 @@ lower_omp_critical (tree *stmt_p, omp_context *ctx) /* A subroutine of lower_omp_for. Generate code to emit the predicate for a lastprivate clause. Given a loop control predicate of (V cond N2), we gate the clause on (!(V cond N2)). The lowered form - is appended to *BODY_P. */ + is appended to *DLIST, iterator initialization is appended to + *BODY_P. */ static void lower_omp_for_lastprivate (struct omp_for_data *fd, tree *body_p, - struct omp_context *ctx) + tree *dlist, struct omp_context *ctx) { - tree clauses, cond; + tree clauses, cond, stmts, vinit, t; enum tree_code cond_code; cond_code = fd->cond_code; @@ -3861,7 +4642,24 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, tree *body_p, cond = build2 (cond_code, boolean_type_node, fd->v, fd->n2); clauses = OMP_FOR_CLAUSES (fd->for_stmt); - lower_lastprivate_clauses (clauses, cond, body_p, ctx); + stmts = NULL; + lower_lastprivate_clauses (clauses, cond, &stmts, ctx); + if (stmts != NULL) + { + append_to_statement_list (stmts, dlist); + + /* Optimize: v = 0; is usually cheaper than v = some_other_constant. */ + vinit = fd->n1; + if (cond_code == EQ_EXPR + && host_integerp (fd->n2, 0) + && ! integer_zerop (fd->n2)) + vinit = build_int_cst (TREE_TYPE (fd->v), 0); + + /* Initialize the iterator variable, so that threads that don't execute + any iterations don't execute the lastprivate clauses by accident. */ + t = build_gimple_modify_stmt (fd->v, vinit); + gimplify_and_add (t, body_p); + } } @@ -3902,7 +4700,7 @@ lower_omp_for (tree *stmt_p, omp_context *ctx) We just need to make sure that VAL1, VAL2 and VAL3 are lowered using the .omp_data_s mapping, if needed. */ - rhs_p = &TREE_OPERAND (OMP_FOR_INIT (stmt), 1); + rhs_p = &GIMPLE_STMT_OPERAND (OMP_FOR_INIT (stmt), 1); if (!is_gimple_min_invariant (*rhs_p)) *rhs_p = get_formal_tmp_var (*rhs_p, body_p); @@ -3910,22 +4708,23 @@ lower_omp_for (tree *stmt_p, omp_context *ctx) if (!is_gimple_min_invariant (*rhs_p)) *rhs_p = get_formal_tmp_var (*rhs_p, body_p); - rhs_p = &TREE_OPERAND (TREE_OPERAND (OMP_FOR_INCR (stmt), 1), 1); + rhs_p = &TREE_OPERAND (GIMPLE_STMT_OPERAND (OMP_FOR_INCR (stmt), 1), 1); if (!is_gimple_min_invariant (*rhs_p)) *rhs_p = get_formal_tmp_var (*rhs_p, body_p); /* Once lowered, extract the bounds and clauses. */ extract_omp_for_data (stmt, &fd); + lower_omp_for_lastprivate (&fd, body_p, &dlist, ctx); + append_to_statement_list (stmt, body_p); append_to_statement_list (OMP_FOR_BODY (stmt), body_p); - t = make_node (OMP_CONTINUE); + t = build2 (OMP_CONTINUE, void_type_node, fd.v, fd.v); append_to_statement_list (t, body_p); /* After the loop, add exit clauses. */ - lower_omp_for_lastprivate (&fd, &dlist, ctx); lower_reduction_clauses (OMP_FOR_CLAUSES (stmt), body_p, ctx); append_to_statement_list (dlist, body_p); @@ -3944,6 +4743,28 @@ lower_omp_for (tree *stmt_p, omp_context *ctx) *stmt_p = new_stmt; } +/* Callback for walk_stmts. Check if *TP only contains OMP_FOR + or OMP_PARALLEL. */ + +static tree +check_combined_parallel (tree *tp, int *walk_subtrees, void *data) +{ + struct walk_stmt_info *wi = data; + int *info = wi->info; + + *walk_subtrees = 0; + switch (TREE_CODE (*tp)) + { + case OMP_FOR: + case OMP_SECTIONS: + *info = *info == 0 ? 1 : -1; + break; + default: + *info = -1; + break; + } + return NULL; +} /* Lower the OpenMP parallel directive in *STMT_P. CTX holds context information for the directive. */ @@ -3961,6 +4782,19 @@ lower_omp_parallel (tree *stmt_p, omp_context *ctx) par_bind = OMP_PARALLEL_BODY (stmt); par_body = BIND_EXPR_BODY (par_bind); child_fn = ctx->cb.dst_fn; + if (!OMP_PARALLEL_COMBINED (stmt)) + { + struct walk_stmt_info wi; + int ws_num = 0; + + memset (&wi, 0, sizeof (wi)); + wi.callback = check_combined_parallel; + wi.info = &ws_num; + wi.val_only = true; + walk_stmts (&wi, &par_bind); + if (ws_num == 1) + OMP_PARALLEL_COMBINED (stmt) = 1; + } push_gimplify_context (); @@ -3996,7 +4830,9 @@ lower_omp_parallel (tree *stmt_p, omp_context *ctx) if (ctx->record_type) { t = build_fold_addr_expr (ctx->sender_decl); - t = build2 (MODIFY_EXPR, void_type_node, ctx->receiver_decl, t); + /* fixup_child_record_type might have changed receiver_decl's type. */ + t = fold_convert (TREE_TYPE (ctx->receiver_decl), t); + t = build_gimple_modify_stmt (ctx->receiver_decl, t); append_to_statement_list (t, &new_body); } @@ -4039,6 +4875,38 @@ lower_regimplify (tree *tp, struct walk_stmt_info *wi) tsi_link_before (&wi->tsi, pre, TSI_SAME_STMT); } +/* Copy EXP into a temporary. Insert the initialization statement before TSI. */ + +static tree +init_tmp_var (tree exp, tree_stmt_iterator *tsi) +{ + tree t, stmt; + + t = create_tmp_var (TREE_TYPE (exp), NULL); + DECL_GIMPLE_REG_P (t) = 1; + stmt = build_gimple_modify_stmt (t, exp); + SET_EXPR_LOCUS (stmt, EXPR_LOCUS (tsi_stmt (*tsi))); + tsi_link_before (tsi, stmt, TSI_SAME_STMT); + + return t; +} + +/* Similarly, but copy from the temporary and insert the statement + after the iterator. */ + +static tree +save_tmp_var (tree exp, tree_stmt_iterator *tsi) +{ + tree t, stmt; + + t = create_tmp_var (TREE_TYPE (exp), NULL); + DECL_GIMPLE_REG_P (t) = 1; + stmt = build_gimple_modify_stmt (exp, t); + SET_EXPR_LOCUS (stmt, EXPR_LOCUS (tsi_stmt (*tsi))); + tsi_link_after (tsi, stmt, TSI_SAME_STMT); + + return t; +} /* Callback for walk_stmts. Lower the OpenMP directive pointed by TP. */ @@ -4104,7 +4972,17 @@ lower_omp_1 (tree *tp, int *walk_subtrees, void *data) case VAR_DECL: if (ctx && DECL_HAS_VALUE_EXPR_P (t)) - lower_regimplify (tp, wi); + { + lower_regimplify (&t, wi); + if (wi->val_only) + { + if (wi->is_lhs) + t = save_tmp_var (t, &wi->tsi); + else + t = init_tmp_var (t, &wi->tsi); + } + *tp = t; + } break; case ADDR_EXPR: