1 /* Lowering pass for OpenMP directives. Converts OpenMP directives
2 into explicit calls to the runtime library (libgomp) and data
3 marshalling to implement data sharing and copying clauses.
4 Contributed by Diego Novillo <dnovillo@redhat.com>
6 Copyright (C) 2005, 2006 Free Software Foundation, Inc.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to the Free
22 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
27 #include "coretypes.h"
31 #include "tree-gimple.h"
32 #include "tree-inline.h"
33 #include "langhooks.h"
34 #include "diagnostic.h"
35 #include "tree-flow.h"
41 #include "tree-pass.h"
46 /* Lowering of OpenMP parallel and workshare constructs proceeds in two
47 phases. The first phase scans the function looking for OMP statements
48 and then for variables that must be replaced to satisfy data sharing
49 clauses. The second phase expands code for the constructs, as well as
50 re-gimplifying things when variables have been replaced with complex
53 Final code generation is done by pass_expand_omp. The flowgraph is
54 scanned for parallel regions which are then moved to a new
55 function, to be invoked by the thread library. */
57 /* Context structure. Used to store information about each parallel
58 directive in the code. */
60 typedef struct omp_context
62 /* This field must be at the beginning, as we do "inheritance": Some
63 callback functions for tree-inline.c (e.g., omp_copy_decl)
64 receive a copy_body_data pointer that is up-casted to an
65 omp_context pointer. */
68 /* The tree of contexts corresponding to the encountered constructs. */
69 struct omp_context *outer;
72 /* Map variables to fields in a structure that allows communication
73 between sending and receiving threads. */
79 /* A chain of variables to add to the top-level block surrounding the
80 construct. In the case of a parallel, this is in the child function. */
83 /* What to do with variables with implicitly determined sharing
85 enum omp_clause_default_kind default_kind;
87 /* Nesting depth of this context. Used to beautify error messages re
88 invalid gotos. The outermost ctx is depth 1, with depth 0 being
89 reserved for the main body of the function. */
92 /* True if this parallel directive is nested within another. */
97 /* A structure describing the main elements of a parallel loop. */
101 tree v, n1, n2, step, chunk_size, for_stmt;
102 enum tree_code cond_code;
104 bool have_nowait, have_ordered;
105 enum omp_clause_schedule_kind sched_kind;
109 static splay_tree all_contexts;
110 static int parallel_nesting_level;
111 struct omp_region *root_omp_region;
113 static void scan_omp (tree *, omp_context *);
114 static void lower_omp (tree *, omp_context *);
115 static tree lookup_decl_in_outer_ctx (tree, omp_context *);
116 static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *);
118 /* Find an OpenMP clause of type KIND within CLAUSES. */
121 find_omp_clause (tree clauses, enum tree_code kind)
123 for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses))
124 if (OMP_CLAUSE_CODE (clauses) == kind)
130 /* Return true if CTX is for an omp parallel. */
133 is_parallel_ctx (omp_context *ctx)
135 return TREE_CODE (ctx->stmt) == OMP_PARALLEL;
139 /* Return true if REGION is a combined parallel+workshare region. */
142 is_combined_parallel (struct omp_region *region)
144 return region->is_combined_parallel;
148 /* Extract the header elements of parallel loop FOR_STMT and store
152 extract_omp_for_data (tree for_stmt, struct omp_for_data *fd)
156 fd->for_stmt = for_stmt;
159 t = OMP_FOR_INIT (for_stmt);
160 gcc_assert (TREE_CODE (t) == MODIFY_EXPR);
161 fd->v = TREE_OPERAND (t, 0);
162 gcc_assert (DECL_P (fd->v));
163 gcc_assert (TREE_CODE (TREE_TYPE (fd->v)) == INTEGER_TYPE);
164 fd->n1 = TREE_OPERAND (t, 1);
166 t = OMP_FOR_COND (for_stmt);
167 fd->cond_code = TREE_CODE (t);
168 gcc_assert (TREE_OPERAND (t, 0) == fd->v);
169 fd->n2 = TREE_OPERAND (t, 1);
170 switch (fd->cond_code)
176 fd->n2 = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->n2), fd->n2,
177 build_int_cst (TREE_TYPE (fd->n2), 1));
178 fd->cond_code = LT_EXPR;
181 fd->n2 = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->n2), fd->n2,
182 build_int_cst (TREE_TYPE (fd->n2), 1));
183 fd->cond_code = GT_EXPR;
189 t = OMP_FOR_INCR (fd->for_stmt);
190 gcc_assert (TREE_CODE (t) == MODIFY_EXPR);
191 gcc_assert (TREE_OPERAND (t, 0) == fd->v);
192 t = TREE_OPERAND (t, 1);
193 gcc_assert (TREE_OPERAND (t, 0) == fd->v);
194 switch (TREE_CODE (t))
197 fd->step = TREE_OPERAND (t, 1);
200 fd->step = TREE_OPERAND (t, 1);
201 fd->step = fold_build1 (NEGATE_EXPR, TREE_TYPE (fd->step), fd->step);
207 fd->have_nowait = fd->have_ordered = false;
208 fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
209 fd->chunk_size = NULL_TREE;
211 for (t = OMP_FOR_CLAUSES (for_stmt); t ; t = OMP_CLAUSE_CHAIN (t))
212 switch (OMP_CLAUSE_CODE (t))
214 case OMP_CLAUSE_NOWAIT:
215 fd->have_nowait = true;
217 case OMP_CLAUSE_ORDERED:
218 fd->have_ordered = true;
220 case OMP_CLAUSE_SCHEDULE:
221 fd->sched_kind = OMP_CLAUSE_SCHEDULE_KIND (t);
222 fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t);
228 if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME)
229 gcc_assert (fd->chunk_size == NULL);
230 else if (fd->chunk_size == NULL)
232 /* We only need to compute a default chunk size for ordered
233 static loops and dynamic loops. */
234 if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC || fd->have_ordered)
235 fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
236 ? integer_zero_node : integer_one_node;
241 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
242 is the immediate dominator of PAR_ENTRY_BB, return true if there
243 are no data dependencies that would prevent expanding the parallel
244 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
246 When expanding a combined parallel+workshare region, the call to
247 the child function may need additional arguments in the case of
248 OMP_FOR regions. In some cases, these arguments are computed out
249 of variables passed in from the parent to the child via 'struct
250 .omp_data_s'. For instance:
252 #pragma omp parallel for schedule (guided, i * 4)
257 # BLOCK 2 (PAR_ENTRY_BB)
259 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
261 # BLOCK 3 (WS_ENTRY_BB)
262 .omp_data_i = &.omp_data_o;
263 D.1667 = .omp_data_i->i;
265 #pragma omp for schedule (guided, D.1598)
267 When we outline the parallel region, the call to the child function
268 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
269 that value is computed *after* the call site. So, in principle we
270 cannot do the transformation.
272 To see whether the code in WS_ENTRY_BB blocks the combined
273 parallel+workshare call, we collect all the variables used in the
274 OMP_FOR header check whether they appear on the LHS of any
275 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
278 FIXME. If we had the SSA form built at this point, we could merely
279 hoist the code in block 3 into block 2 and be done with it. But at
280 this point we don't have dataflow information and though we could
281 hack something up here, it is really not worth the aggravation. */
284 workshare_safe_to_combine_p (basic_block par_entry_bb, basic_block ws_entry_bb)
286 struct omp_for_data fd;
287 tree par_stmt, ws_stmt;
289 par_stmt = last_stmt (par_entry_bb);
290 ws_stmt = last_stmt (ws_entry_bb);
292 if (TREE_CODE (ws_stmt) == OMP_SECTIONS)
295 gcc_assert (TREE_CODE (ws_stmt) == OMP_FOR);
297 extract_omp_for_data (ws_stmt, &fd);
299 /* FIXME. We give up too easily here. If any of these arguments
300 are not constants, they will likely involve variables that have
301 been mapped into fields of .omp_data_s for sharing with the child
302 function. With appropriate data flow, it would be possible to
304 if (!is_gimple_min_invariant (fd.n1)
305 || !is_gimple_min_invariant (fd.n2)
306 || !is_gimple_min_invariant (fd.step)
307 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
314 /* Collect additional arguments needed to emit a combined
315 parallel+workshare call. WS_STMT is the workshare directive being
319 get_ws_args_for (tree ws_stmt)
323 if (TREE_CODE (ws_stmt) == OMP_FOR)
325 struct omp_for_data fd;
328 extract_omp_for_data (ws_stmt, &fd);
333 t = fold_convert (long_integer_type_node, fd.chunk_size);
334 ws_args = tree_cons (NULL, t, ws_args);
337 t = fold_convert (long_integer_type_node, fd.step);
338 ws_args = tree_cons (NULL, t, ws_args);
340 t = fold_convert (long_integer_type_node, fd.n2);
341 ws_args = tree_cons (NULL, t, ws_args);
343 t = fold_convert (long_integer_type_node, fd.n1);
344 ws_args = tree_cons (NULL, t, ws_args);
348 else if (TREE_CODE (ws_stmt) == OMP_SECTIONS)
350 basic_block bb = bb_for_stmt (ws_stmt);
351 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs));
352 t = tree_cons (NULL, t, NULL);
360 /* Discover whether REGION is a combined parallel+workshare region. */
363 determine_parallel_type (struct omp_region *region)
365 basic_block par_entry_bb, par_exit_bb;
366 basic_block ws_entry_bb, ws_exit_bb;
368 if (region == NULL || region->inner == NULL
369 || region->exit == NULL || region->inner->exit == NULL)
372 /* We only support parallel+for and parallel+sections. */
373 if (region->type != OMP_PARALLEL
374 || (region->inner->type != OMP_FOR
375 && region->inner->type != OMP_SECTIONS))
378 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
379 WS_EXIT_BB -> PAR_EXIT_BB. */
380 par_entry_bb = region->entry;
381 par_exit_bb = region->exit;
382 ws_entry_bb = region->inner->entry;
383 ws_exit_bb = region->inner->exit;
385 if (single_succ (par_entry_bb) == ws_entry_bb
386 && single_succ (ws_exit_bb) == par_exit_bb
387 && workshare_safe_to_combine_p (par_entry_bb, ws_entry_bb))
389 tree ws_stmt = last_stmt (region->inner->entry);
391 if (region->inner->type == OMP_FOR)
393 /* If this is a combined parallel loop, we need to determine
394 whether or not to use the combined library calls. There
395 are two cases where we do not apply the transformation:
396 static loops and any kind of ordered loop. In the first
397 case, we already open code the loop so there is no need
398 to do anything else. In the latter case, the combined
399 parallel loop call would still need extra synchronization
400 to implement ordered semantics, so there would not be any
401 gain in using the combined call. */
402 tree clauses = OMP_FOR_CLAUSES (ws_stmt);
403 tree c = find_omp_clause (clauses, OMP_CLAUSE_SCHEDULE);
405 || OMP_CLAUSE_SCHEDULE_KIND (c) == OMP_CLAUSE_SCHEDULE_STATIC
406 || find_omp_clause (clauses, OMP_CLAUSE_ORDERED))
408 region->is_combined_parallel = false;
409 region->inner->is_combined_parallel = false;
414 region->is_combined_parallel = true;
415 region->inner->is_combined_parallel = true;
416 region->ws_args = get_ws_args_for (ws_stmt);
421 /* Return true if EXPR is variable sized. */
424 is_variable_sized (tree expr)
426 return !TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
429 /* Return true if DECL is a reference type. */
432 is_reference (tree decl)
434 return lang_hooks.decls.omp_privatize_by_reference (decl);
437 /* Lookup variables in the decl or field splay trees. The "maybe" form
438 allows for the variable form to not have been entered, otherwise we
439 assert that the variable must have been entered. */
442 lookup_decl (tree var, omp_context *ctx)
445 n = splay_tree_lookup (ctx->cb.decl_map, (splay_tree_key) var);
446 return (tree) n->value;
450 maybe_lookup_decl (tree var, omp_context *ctx)
453 n = splay_tree_lookup (ctx->cb.decl_map, (splay_tree_key) var);
454 return n ? (tree) n->value : NULL_TREE;
458 lookup_field (tree var, omp_context *ctx)
461 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) var);
462 return (tree) n->value;
466 maybe_lookup_field (tree var, omp_context *ctx)
469 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) var);
470 return n ? (tree) n->value : NULL_TREE;
473 /* Return true if DECL should be copied by pointer. SHARED_P is true
474 if DECL is to be shared. */
477 use_pointer_for_field (tree decl, bool shared_p)
479 if (AGGREGATE_TYPE_P (TREE_TYPE (decl)))
482 /* We can only use copy-in/copy-out semantics for shared variables
483 when we know the value is not accessible from an outer scope. */
486 /* ??? Trivially accessible from anywhere. But why would we even
487 be passing an address in this case? Should we simply assert
488 this to be false, or should we have a cleanup pass that removes
489 these from the list of mappings? */
490 if (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
493 /* For variables with DECL_HAS_VALUE_EXPR_P set, we cannot tell
494 without analyzing the expression whether or not its location
495 is accessible to anyone else. In the case of nested parallel
496 regions it certainly may be. */
497 if (TREE_CODE (decl) != RESULT_DECL && DECL_HAS_VALUE_EXPR_P (decl))
500 /* Do not use copy-in/copy-out for variables that have their
502 if (TREE_ADDRESSABLE (decl))
509 /* Construct a new automatic decl similar to VAR. */
512 omp_copy_decl_2 (tree var, tree name, tree type, omp_context *ctx)
514 tree copy = build_decl (VAR_DECL, name, type);
516 TREE_ADDRESSABLE (copy) = TREE_ADDRESSABLE (var);
517 DECL_COMPLEX_GIMPLE_REG_P (copy) = DECL_COMPLEX_GIMPLE_REG_P (var);
518 DECL_ARTIFICIAL (copy) = DECL_ARTIFICIAL (var);
519 DECL_IGNORED_P (copy) = DECL_IGNORED_P (var);
520 TREE_USED (copy) = 1;
521 DECL_CONTEXT (copy) = current_function_decl;
522 DECL_SEEN_IN_BIND_EXPR_P (copy) = 1;
524 TREE_CHAIN (copy) = ctx->block_vars;
525 ctx->block_vars = copy;
531 omp_copy_decl_1 (tree var, omp_context *ctx)
533 return omp_copy_decl_2 (var, DECL_NAME (var), TREE_TYPE (var), ctx);
536 /* Build tree nodes to access the field for VAR on the receiver side. */
539 build_receiver_ref (tree var, bool by_ref, omp_context *ctx)
541 tree x, field = lookup_field (var, ctx);
543 /* If the receiver record type was remapped in the child function,
544 remap the field into the new record type. */
545 x = maybe_lookup_field (field, ctx);
549 x = build_fold_indirect_ref (ctx->receiver_decl);
550 x = build3 (COMPONENT_REF, TREE_TYPE (field), x, field, NULL);
552 x = build_fold_indirect_ref (x);
557 /* Build tree nodes to access VAR in the scope outer to CTX. In the case
558 of a parallel, this is a component reference; for workshare constructs
559 this is some variable. */
562 build_outer_var_ref (tree var, omp_context *ctx)
566 if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx)))
568 else if (is_variable_sized (var))
570 x = TREE_OPERAND (DECL_VALUE_EXPR (var), 0);
571 x = build_outer_var_ref (x, ctx);
572 x = build_fold_indirect_ref (x);
574 else if (is_parallel_ctx (ctx))
576 bool by_ref = use_pointer_for_field (var, false);
577 x = build_receiver_ref (var, by_ref, ctx);
580 x = lookup_decl (var, ctx->outer);
581 else if (is_reference (var))
582 /* This can happen with orphaned constructs. If var is reference, it is
583 possible it is shared and as such valid. */
588 if (is_reference (var))
589 x = build_fold_indirect_ref (x);
594 /* Build tree nodes to access the field for VAR on the sender side. */
597 build_sender_ref (tree var, omp_context *ctx)
599 tree field = lookup_field (var, ctx);
600 return build3 (COMPONENT_REF, TREE_TYPE (field),
601 ctx->sender_decl, field, NULL);
604 /* Add a new field for VAR inside the structure CTX->SENDER_DECL. */
607 install_var_field (tree var, bool by_ref, omp_context *ctx)
611 gcc_assert (!splay_tree_lookup (ctx->field_map, (splay_tree_key) var));
613 type = TREE_TYPE (var);
615 type = build_pointer_type (type);
617 field = build_decl (FIELD_DECL, DECL_NAME (var), type);
619 /* Remember what variable this field was created for. This does have a
620 side effect of making dwarf2out ignore this member, so for helpful
621 debugging we clear it later in delete_omp_context. */
622 DECL_ABSTRACT_ORIGIN (field) = var;
624 insert_field_into_struct (ctx->record_type, field);
626 splay_tree_insert (ctx->field_map, (splay_tree_key) var,
627 (splay_tree_value) field);
631 install_var_local (tree var, omp_context *ctx)
633 tree new_var = omp_copy_decl_1 (var, ctx);
634 insert_decl_map (&ctx->cb, var, new_var);
638 /* Adjust the replacement for DECL in CTX for the new context. This means
639 copying the DECL_VALUE_EXPR, and fixing up the type. */
642 fixup_remapped_decl (tree decl, omp_context *ctx, bool private_debug)
646 new_decl = lookup_decl (decl, ctx);
648 TREE_TYPE (new_decl) = remap_type (TREE_TYPE (decl), &ctx->cb);
650 if ((!TREE_CONSTANT (DECL_SIZE (new_decl)) || private_debug)
651 && DECL_HAS_VALUE_EXPR_P (decl))
653 tree ve = DECL_VALUE_EXPR (decl);
654 walk_tree (&ve, copy_body_r, &ctx->cb, NULL);
655 SET_DECL_VALUE_EXPR (new_decl, ve);
656 DECL_HAS_VALUE_EXPR_P (new_decl) = 1;
659 if (!TREE_CONSTANT (DECL_SIZE (new_decl)))
661 size = remap_decl (DECL_SIZE (decl), &ctx->cb);
662 if (size == error_mark_node)
663 size = TYPE_SIZE (TREE_TYPE (new_decl));
664 DECL_SIZE (new_decl) = size;
666 size = remap_decl (DECL_SIZE_UNIT (decl), &ctx->cb);
667 if (size == error_mark_node)
668 size = TYPE_SIZE_UNIT (TREE_TYPE (new_decl));
669 DECL_SIZE_UNIT (new_decl) = size;
673 /* The callback for remap_decl. Search all containing contexts for a
674 mapping of the variable; this avoids having to duplicate the splay
675 tree ahead of time. We know a mapping doesn't already exist in the
676 given context. Create new mappings to implement default semantics. */
679 omp_copy_decl (tree var, copy_body_data *cb)
681 omp_context *ctx = (omp_context *) cb;
684 if (TREE_CODE (var) == LABEL_DECL)
686 new_var = create_artificial_label ();
687 DECL_CONTEXT (new_var) = current_function_decl;
688 insert_decl_map (&ctx->cb, var, new_var);
692 while (!is_parallel_ctx (ctx))
697 new_var = maybe_lookup_decl (var, ctx);
702 if (is_global_var (var) || decl_function_context (var) != ctx->cb.src_fn)
705 return error_mark_node;
709 /* Return the parallel region associated with STMT. */
711 /* Debugging dumps for parallel regions. */
712 void dump_omp_region (FILE *, struct omp_region *, int);
713 void debug_omp_region (struct omp_region *);
714 void debug_all_omp_regions (void);
716 /* Dump the parallel region tree rooted at REGION. */
719 dump_omp_region (FILE *file, struct omp_region *region, int indent)
721 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
722 tree_code_name[region->type]);
725 dump_omp_region (file, region->inner, indent + 4);
729 fprintf (file, "%*sbb %d: OMP_CONTINUE\n", indent, "",
730 region->cont->index);
734 fprintf (file, "%*sbb %d: OMP_RETURN\n", indent, "",
735 region->exit->index);
737 fprintf (file, "%*s[no exit marker]\n", indent, "");
740 dump_omp_region (file, region->next, indent);
744 debug_omp_region (struct omp_region *region)
746 dump_omp_region (stderr, region, 0);
750 debug_all_omp_regions (void)
752 dump_omp_region (stderr, root_omp_region, 0);
756 /* Create a new parallel region starting at STMT inside region PARENT. */
759 new_omp_region (basic_block bb, enum tree_code type, struct omp_region *parent)
761 struct omp_region *region = xcalloc (1, sizeof (*region));
763 region->outer = parent;
769 /* This is a nested region. Add it to the list of inner
770 regions in PARENT. */
771 region->next = parent->inner;
772 parent->inner = region;
776 /* This is a toplevel region. Add it to the list of toplevel
777 regions in ROOT_OMP_REGION. */
778 region->next = root_omp_region;
779 root_omp_region = region;
785 /* Release the memory associated with the region tree rooted at REGION. */
788 free_omp_region_1 (struct omp_region *region)
790 struct omp_region *i, *n;
792 for (i = region->inner; i ; i = n)
795 free_omp_region_1 (i);
801 /* Release the memory for the entire omp region tree. */
804 free_omp_regions (void)
806 struct omp_region *r, *n;
807 for (r = root_omp_region; r ; r = n)
810 free_omp_region_1 (r);
812 root_omp_region = NULL;
816 /* Create a new context, with OUTER_CTX being the surrounding context. */
819 new_omp_context (tree stmt, omp_context *outer_ctx)
821 omp_context *ctx = XCNEW (omp_context);
823 splay_tree_insert (all_contexts, (splay_tree_key) stmt,
824 (splay_tree_value) ctx);
829 ctx->outer = outer_ctx;
830 ctx->cb = outer_ctx->cb;
831 ctx->cb.block = NULL;
832 ctx->depth = outer_ctx->depth + 1;
836 ctx->cb.src_fn = current_function_decl;
837 ctx->cb.dst_fn = current_function_decl;
838 ctx->cb.src_node = cgraph_node (current_function_decl);
839 ctx->cb.dst_node = ctx->cb.src_node;
840 ctx->cb.src_cfun = cfun;
841 ctx->cb.copy_decl = omp_copy_decl;
842 ctx->cb.eh_region = -1;
843 ctx->cb.transform_call_graph_edges = CB_CGE_MOVE;
847 ctx->cb.decl_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
852 /* Destroy a omp_context data structures. Called through the splay tree
853 value delete callback. */
856 delete_omp_context (splay_tree_value value)
858 omp_context *ctx = (omp_context *) value;
860 splay_tree_delete (ctx->cb.decl_map);
863 splay_tree_delete (ctx->field_map);
865 /* We hijacked DECL_ABSTRACT_ORIGIN earlier. We need to clear it before
866 it produces corrupt debug information. */
867 if (ctx->record_type)
870 for (t = TYPE_FIELDS (ctx->record_type); t ; t = TREE_CHAIN (t))
871 DECL_ABSTRACT_ORIGIN (t) = NULL;
877 /* Fix up RECEIVER_DECL with a type that has been remapped to the child
881 fixup_child_record_type (omp_context *ctx)
883 tree f, type = ctx->record_type;
885 /* ??? It isn't sufficient to just call remap_type here, because
886 variably_modified_type_p doesn't work the way we expect for
887 record types. Testing each field for whether it needs remapping
888 and creating a new record by hand works, however. */
889 for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
890 if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn))
894 tree name, new_fields = NULL;
896 type = lang_hooks.types.make_type (RECORD_TYPE);
897 name = DECL_NAME (TYPE_NAME (ctx->record_type));
898 name = build_decl (TYPE_DECL, name, type);
899 TYPE_NAME (type) = name;
901 for (f = TYPE_FIELDS (ctx->record_type); f ; f = TREE_CHAIN (f))
903 tree new_f = copy_node (f);
904 DECL_CONTEXT (new_f) = type;
905 TREE_TYPE (new_f) = remap_type (TREE_TYPE (f), &ctx->cb);
906 TREE_CHAIN (new_f) = new_fields;
909 /* Arrange to be able to look up the receiver field
910 given the sender field. */
911 splay_tree_insert (ctx->field_map, (splay_tree_key) f,
912 (splay_tree_value) new_f);
914 TYPE_FIELDS (type) = nreverse (new_fields);
918 TREE_TYPE (ctx->receiver_decl) = build_pointer_type (type);
921 /* Instantiate decls as necessary in CTX to satisfy the data sharing
922 specified by CLAUSES. */
925 scan_sharing_clauses (tree clauses, omp_context *ctx)
928 bool scan_array_reductions = false;
930 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
934 switch (OMP_CLAUSE_CODE (c))
936 case OMP_CLAUSE_PRIVATE:
937 decl = OMP_CLAUSE_DECL (c);
938 if (!is_variable_sized (decl))
939 install_var_local (decl, ctx);
942 case OMP_CLAUSE_SHARED:
943 gcc_assert (is_parallel_ctx (ctx));
944 decl = OMP_CLAUSE_DECL (c);
945 gcc_assert (!is_variable_sized (decl));
946 by_ref = use_pointer_for_field (decl, true);
947 /* Global variables don't need to be copied,
948 the receiver side will use them directly. */
949 if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
951 if (! TREE_READONLY (decl)
952 || TREE_ADDRESSABLE (decl)
954 || is_reference (decl))
956 install_var_field (decl, by_ref, ctx);
957 install_var_local (decl, ctx);
960 /* We don't need to copy const scalar vars back. */
961 OMP_CLAUSE_SET_CODE (c, OMP_CLAUSE_FIRSTPRIVATE);
964 case OMP_CLAUSE_LASTPRIVATE:
965 /* Let the corresponding firstprivate clause create
967 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
971 case OMP_CLAUSE_FIRSTPRIVATE:
972 case OMP_CLAUSE_REDUCTION:
973 decl = OMP_CLAUSE_DECL (c);
975 if (is_variable_sized (decl))
977 else if (is_parallel_ctx (ctx)
978 && ! is_global_var (maybe_lookup_decl_in_outer_ctx (decl,
981 by_ref = use_pointer_for_field (decl, false);
982 install_var_field (decl, by_ref, ctx);
984 install_var_local (decl, ctx);
987 case OMP_CLAUSE_COPYPRIVATE:
989 scan_omp (&OMP_CLAUSE_DECL (c), ctx->outer);
992 case OMP_CLAUSE_COPYIN:
993 decl = OMP_CLAUSE_DECL (c);
994 by_ref = use_pointer_for_field (decl, false);
995 install_var_field (decl, by_ref, ctx);
998 case OMP_CLAUSE_DEFAULT:
999 ctx->default_kind = OMP_CLAUSE_DEFAULT_KIND (c);
1003 case OMP_CLAUSE_NUM_THREADS:
1004 case OMP_CLAUSE_SCHEDULE:
1006 scan_omp (&OMP_CLAUSE_OPERAND (c, 0), ctx->outer);
1009 case OMP_CLAUSE_NOWAIT:
1010 case OMP_CLAUSE_ORDERED:
1018 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
1020 switch (OMP_CLAUSE_CODE (c))
1022 case OMP_CLAUSE_LASTPRIVATE:
1023 /* Let the corresponding firstprivate clause create
1025 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
1029 case OMP_CLAUSE_PRIVATE:
1030 case OMP_CLAUSE_FIRSTPRIVATE:
1031 case OMP_CLAUSE_REDUCTION:
1032 decl = OMP_CLAUSE_DECL (c);
1033 if (is_variable_sized (decl))
1034 install_var_local (decl, ctx);
1035 fixup_remapped_decl (decl, ctx,
1036 OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE
1037 && OMP_CLAUSE_PRIVATE_DEBUG (c));
1038 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
1039 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1040 scan_array_reductions = true;
1043 case OMP_CLAUSE_SHARED:
1044 decl = OMP_CLAUSE_DECL (c);
1045 if (! is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
1046 fixup_remapped_decl (decl, ctx, false);
1049 case OMP_CLAUSE_COPYPRIVATE:
1050 case OMP_CLAUSE_COPYIN:
1051 case OMP_CLAUSE_DEFAULT:
1053 case OMP_CLAUSE_NUM_THREADS:
1054 case OMP_CLAUSE_SCHEDULE:
1055 case OMP_CLAUSE_NOWAIT:
1056 case OMP_CLAUSE_ORDERED:
1064 if (scan_array_reductions)
1065 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
1066 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
1067 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1069 scan_omp (&OMP_CLAUSE_REDUCTION_INIT (c), ctx);
1070 scan_omp (&OMP_CLAUSE_REDUCTION_MERGE (c), ctx);
1074 /* Create a new name for omp child function. Returns an identifier. */
1076 static GTY(()) unsigned int tmp_ompfn_id_num;
1079 create_omp_child_function_name (void)
1081 tree name = DECL_ASSEMBLER_NAME (current_function_decl);
1082 size_t len = IDENTIFIER_LENGTH (name);
1083 char *tmp_name, *prefix;
1085 prefix = alloca (len + sizeof ("_omp_fn"));
1086 memcpy (prefix, IDENTIFIER_POINTER (name), len);
1087 strcpy (prefix + len, "_omp_fn");
1088 #ifndef NO_DOT_IN_LABEL
1090 #elif !defined NO_DOLLAR_IN_LABEL
1093 ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, tmp_ompfn_id_num++);
1094 return get_identifier (tmp_name);
1097 /* Build a decl for the omp child function. It'll not contain a body
1098 yet, just the bare decl. */
1101 create_omp_child_function (omp_context *ctx)
1103 tree decl, type, name, t;
1105 name = create_omp_child_function_name ();
1106 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1108 decl = build_decl (FUNCTION_DECL, name, type);
1109 decl = lang_hooks.decls.pushdecl (decl);
1111 ctx->cb.dst_fn = decl;
1113 TREE_STATIC (decl) = 1;
1114 TREE_USED (decl) = 1;
1115 DECL_ARTIFICIAL (decl) = 1;
1116 DECL_IGNORED_P (decl) = 0;
1117 TREE_PUBLIC (decl) = 0;
1118 DECL_UNINLINABLE (decl) = 1;
1119 DECL_EXTERNAL (decl) = 0;
1120 DECL_CONTEXT (decl) = NULL_TREE;
1121 DECL_INITIAL (decl) = make_node (BLOCK);
1123 t = build_decl (RESULT_DECL, NULL_TREE, void_type_node);
1124 DECL_ARTIFICIAL (t) = 1;
1125 DECL_IGNORED_P (t) = 1;
1126 DECL_RESULT (decl) = t;
1128 t = build_decl (PARM_DECL, get_identifier (".omp_data_i"), ptr_type_node);
1129 DECL_ARTIFICIAL (t) = 1;
1130 DECL_ARG_TYPE (t) = ptr_type_node;
1131 DECL_CONTEXT (t) = current_function_decl;
1133 DECL_ARGUMENTS (decl) = t;
1134 ctx->receiver_decl = t;
1136 /* Allocate memory for the function structure. The call to
1137 allocate_struct_function clobbers CFUN, so we need to restore
1139 allocate_struct_function (decl);
1140 DECL_SOURCE_LOCATION (decl) = EXPR_LOCATION (ctx->stmt);
1141 cfun->function_end_locus = EXPR_LOCATION (ctx->stmt);
1142 cfun = ctx->cb.src_cfun;
1146 /* Scan an OpenMP parallel directive. */
1149 scan_omp_parallel (tree *stmt_p, omp_context *outer_ctx)
1154 /* Ignore parallel directives with empty bodies, unless there
1155 are copyin clauses. */
1157 && empty_body_p (OMP_PARALLEL_BODY (*stmt_p))
1158 && find_omp_clause (OMP_CLAUSES (*stmt_p), OMP_CLAUSE_COPYIN) == NULL)
1160 *stmt_p = build_empty_stmt ();
1164 ctx = new_omp_context (*stmt_p, outer_ctx);
1165 if (parallel_nesting_level > 1)
1166 ctx->is_nested = true;
1167 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
1168 ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED;
1169 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
1170 name = create_tmp_var_name (".omp_data_s");
1171 name = build_decl (TYPE_DECL, name, ctx->record_type);
1172 TYPE_NAME (ctx->record_type) = name;
1173 create_omp_child_function (ctx);
1174 OMP_PARALLEL_FN (*stmt_p) = ctx->cb.dst_fn;
1176 scan_sharing_clauses (OMP_PARALLEL_CLAUSES (*stmt_p), ctx);
1177 scan_omp (&OMP_PARALLEL_BODY (*stmt_p), ctx);
1179 if (TYPE_FIELDS (ctx->record_type) == NULL)
1180 ctx->record_type = ctx->receiver_decl = NULL;
1183 layout_type (ctx->record_type);
1184 fixup_child_record_type (ctx);
1189 /* Scan an OpenMP loop directive. */
1192 scan_omp_for (tree *stmt_p, omp_context *outer_ctx)
1198 ctx = new_omp_context (stmt, outer_ctx);
1200 scan_sharing_clauses (OMP_FOR_CLAUSES (stmt), ctx);
1202 scan_omp (&OMP_FOR_PRE_BODY (stmt), ctx);
1203 scan_omp (&OMP_FOR_INIT (stmt), ctx);
1204 scan_omp (&OMP_FOR_COND (stmt), ctx);
1205 scan_omp (&OMP_FOR_INCR (stmt), ctx);
1206 scan_omp (&OMP_FOR_BODY (stmt), ctx);
1209 /* Scan an OpenMP sections directive. */
1212 scan_omp_sections (tree *stmt_p, omp_context *outer_ctx)
1218 ctx = new_omp_context (stmt, outer_ctx);
1219 scan_sharing_clauses (OMP_SECTIONS_CLAUSES (stmt), ctx);
1220 scan_omp (&OMP_SECTIONS_BODY (stmt), ctx);
1223 /* Scan an OpenMP single directive. */
1226 scan_omp_single (tree *stmt_p, omp_context *outer_ctx)
1228 tree stmt = *stmt_p;
1232 ctx = new_omp_context (stmt, outer_ctx);
1233 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
1234 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
1235 name = create_tmp_var_name (".omp_copy_s");
1236 name = build_decl (TYPE_DECL, name, ctx->record_type);
1237 TYPE_NAME (ctx->record_type) = name;
1239 scan_sharing_clauses (OMP_SINGLE_CLAUSES (stmt), ctx);
1240 scan_omp (&OMP_SINGLE_BODY (stmt), ctx);
1242 if (TYPE_FIELDS (ctx->record_type) == NULL)
1243 ctx->record_type = NULL;
1245 layout_type (ctx->record_type);
1249 /* Check OpenMP nesting restrictions. */
1251 check_omp_nesting_restrictions (tree t, omp_context *ctx)
1253 switch (TREE_CODE (t))
1258 for (; ctx != NULL; ctx = ctx->outer)
1259 switch (TREE_CODE (ctx->stmt))
1266 warning (0, "work-sharing region may not be closely nested inside "
1267 "of work-sharing, critical, ordered or master region");
1276 for (; ctx != NULL; ctx = ctx->outer)
1277 switch (TREE_CODE (ctx->stmt))
1282 warning (0, "master region may not be closely nested inside "
1283 "of work-sharing region");
1292 for (; ctx != NULL; ctx = ctx->outer)
1293 switch (TREE_CODE (ctx->stmt))
1296 warning (0, "ordered region may not be closely nested inside "
1297 "of critical region");
1300 if (find_omp_clause (OMP_CLAUSES (ctx->stmt),
1301 OMP_CLAUSE_ORDERED) == NULL)
1302 warning (0, "ordered region must be closely nested inside "
1303 "a loop region with an ordered clause");
1312 for (; ctx != NULL; ctx = ctx->outer)
1313 if (TREE_CODE (ctx->stmt) == OMP_CRITICAL
1314 && OMP_CRITICAL_NAME (t) == OMP_CRITICAL_NAME (ctx->stmt))
1316 warning (0, "critical region may not be nested inside a critical "
1317 "region with the same name");
1327 /* Callback for walk_stmts used to scan for OpenMP directives at TP. */
1330 scan_omp_1 (tree *tp, int *walk_subtrees, void *data)
1332 struct walk_stmt_info *wi = data;
1333 omp_context *ctx = wi->info;
1336 if (EXPR_HAS_LOCATION (t))
1337 input_location = EXPR_LOCATION (t);
1339 /* Check the OpenMP nesting restrictions. */
1340 if (OMP_DIRECTIVE_P (t) && ctx != NULL)
1341 check_omp_nesting_restrictions (t, ctx);
1344 switch (TREE_CODE (t))
1347 parallel_nesting_level++;
1348 scan_omp_parallel (tp, ctx);
1349 parallel_nesting_level--;
1353 scan_omp_for (tp, ctx);
1357 scan_omp_sections (tp, ctx);
1361 scan_omp_single (tp, ctx);
1368 ctx = new_omp_context (*tp, ctx);
1369 scan_omp (&OMP_BODY (*tp), ctx);
1377 for (var = BIND_EXPR_VARS (t); var ; var = TREE_CHAIN (var))
1378 insert_decl_map (&ctx->cb, var, var);
1387 *tp = remap_decl (t, &ctx->cb);
1391 if (ctx && TYPE_P (t))
1392 *tp = remap_type (t, &ctx->cb);
1393 else if (!DECL_P (t))
1402 /* Scan all the statements starting at STMT_P. CTX contains context
1403 information about the OpenMP directives and clauses found during
1407 scan_omp (tree *stmt_p, omp_context *ctx)
1409 location_t saved_location;
1410 struct walk_stmt_info wi;
1412 memset (&wi, 0, sizeof (wi));
1413 wi.callback = scan_omp_1;
1415 wi.want_bind_expr = (ctx != NULL);
1416 wi.want_locations = true;
1418 saved_location = input_location;
1419 walk_stmts (&wi, stmt_p);
1420 input_location = saved_location;
1423 /* Re-gimplification and code generation routines. */
1425 /* Build a call to GOMP_barrier. */
1428 build_omp_barrier (tree *stmt_list)
1432 t = built_in_decls[BUILT_IN_GOMP_BARRIER];
1433 t = build_function_call_expr (t, NULL);
1434 gimplify_and_add (t, stmt_list);
1437 /* If a context was created for STMT when it was scanned, return it. */
1439 static omp_context *
1440 maybe_lookup_ctx (tree stmt)
1443 n = splay_tree_lookup (all_contexts, (splay_tree_key) stmt);
1444 return n ? (omp_context *) n->value : NULL;
1448 /* Find the mapping for DECL in CTX or the immediately enclosing
1449 context that has a mapping for DECL.
1451 If CTX is a nested parallel directive, we may have to use the decl
1452 mappings created in CTX's parent context. Suppose that we have the
1453 following parallel nesting (variable UIDs showed for clarity):
1456 #omp parallel shared(iD.1562) -> outer parallel
1457 iD.1562 = iD.1562 + 1;
1459 #omp parallel shared (iD.1562) -> inner parallel
1460 iD.1562 = iD.1562 - 1;
1462 Each parallel structure will create a distinct .omp_data_s structure
1463 for copying iD.1562 in/out of the directive:
1465 outer parallel .omp_data_s.1.i -> iD.1562
1466 inner parallel .omp_data_s.2.i -> iD.1562
1468 A shared variable mapping will produce a copy-out operation before
1469 the parallel directive and a copy-in operation after it. So, in
1470 this case we would have:
1473 .omp_data_o.1.i = iD.1562;
1474 #omp parallel shared(iD.1562) -> outer parallel
1475 .omp_data_i.1 = &.omp_data_o.1
1476 .omp_data_i.1->i = .omp_data_i.1->i + 1;
1478 .omp_data_o.2.i = iD.1562; -> **
1479 #omp parallel shared(iD.1562) -> inner parallel
1480 .omp_data_i.2 = &.omp_data_o.2
1481 .omp_data_i.2->i = .omp_data_i.2->i - 1;
1484 ** This is a problem. The symbol iD.1562 cannot be referenced
1485 inside the body of the outer parallel region. But since we are
1486 emitting this copy operation while expanding the inner parallel
1487 directive, we need to access the CTX structure of the outer
1488 parallel directive to get the correct mapping:
1490 .omp_data_o.2.i = .omp_data_i.1->i
1492 Since there may be other workshare or parallel directives enclosing
1493 the parallel directive, it may be necessary to walk up the context
1494 parent chain. This is not a problem in general because nested
1495 parallelism happens only rarely. */
1498 lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
1503 gcc_assert (ctx->is_nested);
1505 for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
1506 t = maybe_lookup_decl (decl, up);
1514 /* Similar to lookup_decl_in_outer_ctx, but return DECL if not found
1515 in outer contexts. */
1518 maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
1524 for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
1525 t = maybe_lookup_decl (decl, up);
1527 return t ? t : decl;
1531 /* Construct the initialization value for reduction CLAUSE. */
1534 omp_reduction_init (tree clause, tree type)
1536 switch (OMP_CLAUSE_REDUCTION_CODE (clause))
1543 case TRUTH_ORIF_EXPR:
1544 case TRUTH_XOR_EXPR:
1546 return fold_convert (type, integer_zero_node);
1549 case TRUTH_AND_EXPR:
1550 case TRUTH_ANDIF_EXPR:
1552 return fold_convert (type, integer_one_node);
1555 return fold_convert (type, integer_minus_one_node);
1558 if (SCALAR_FLOAT_TYPE_P (type))
1560 REAL_VALUE_TYPE max, min;
1561 if (HONOR_INFINITIES (TYPE_MODE (type)))
1564 real_arithmetic (&min, NEGATE_EXPR, &max, NULL);
1567 real_maxval (&min, 1, TYPE_MODE (type));
1568 return build_real (type, min);
1572 gcc_assert (INTEGRAL_TYPE_P (type));
1573 return TYPE_MIN_VALUE (type);
1577 if (SCALAR_FLOAT_TYPE_P (type))
1579 REAL_VALUE_TYPE max;
1580 if (HONOR_INFINITIES (TYPE_MODE (type)))
1583 real_maxval (&max, 0, TYPE_MODE (type));
1584 return build_real (type, max);
1588 gcc_assert (INTEGRAL_TYPE_P (type));
1589 return TYPE_MAX_VALUE (type);
1597 /* Generate code to implement the input clauses, FIRSTPRIVATE and COPYIN,
1598 from the receiver (aka child) side and initializers for REFERENCE_TYPE
1599 private variables. Initialization statements go in ILIST, while calls
1600 to destructors go in DLIST. */
1603 lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist,
1606 tree_stmt_iterator diter;
1607 tree c, dtor, copyin_seq, x, args, ptr;
1608 bool copyin_by_ref = false;
1609 bool lastprivate_firstprivate = false;
1612 *dlist = alloc_stmt_list ();
1613 diter = tsi_start (*dlist);
1616 /* Do all the fixed sized types in the first pass, and the variable sized
1617 types in the second pass. This makes sure that the scalar arguments to
1618 the variable sized types are processed before we use them in the
1619 variable sized operations. */
1620 for (pass = 0; pass < 2; ++pass)
1622 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1624 enum omp_clause_code c_kind = OMP_CLAUSE_CODE (c);
1630 case OMP_CLAUSE_PRIVATE:
1631 if (OMP_CLAUSE_PRIVATE_DEBUG (c))
1634 case OMP_CLAUSE_SHARED:
1635 if (maybe_lookup_decl (OMP_CLAUSE_DECL (c), ctx) == NULL)
1637 gcc_assert (is_global_var (OMP_CLAUSE_DECL (c)));
1640 case OMP_CLAUSE_FIRSTPRIVATE:
1641 case OMP_CLAUSE_COPYIN:
1642 case OMP_CLAUSE_REDUCTION:
1644 case OMP_CLAUSE_LASTPRIVATE:
1645 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
1647 lastprivate_firstprivate = true;
1656 new_var = var = OMP_CLAUSE_DECL (c);
1657 if (c_kind != OMP_CLAUSE_COPYIN)
1658 new_var = lookup_decl (var, ctx);
1660 if (c_kind == OMP_CLAUSE_SHARED || c_kind == OMP_CLAUSE_COPYIN)
1665 else if (is_variable_sized (var))
1667 /* For variable sized types, we need to allocate the
1668 actual storage here. Call alloca and store the
1669 result in the pointer decl that we created elsewhere. */
1673 ptr = DECL_VALUE_EXPR (new_var);
1674 gcc_assert (TREE_CODE (ptr) == INDIRECT_REF);
1675 ptr = TREE_OPERAND (ptr, 0);
1676 gcc_assert (DECL_P (ptr));
1678 x = TYPE_SIZE_UNIT (TREE_TYPE (new_var));
1679 args = tree_cons (NULL, x, NULL);
1680 x = built_in_decls[BUILT_IN_ALLOCA];
1681 x = build_function_call_expr (x, args);
1682 x = fold_convert (TREE_TYPE (ptr), x);
1683 x = build2 (MODIFY_EXPR, void_type_node, ptr, x);
1684 gimplify_and_add (x, ilist);
1686 else if (is_reference (var))
1688 /* For references that are being privatized for Fortran,
1689 allocate new backing storage for the new pointer
1690 variable. This allows us to avoid changing all the
1691 code that expects a pointer to something that expects
1692 a direct variable. Note that this doesn't apply to
1693 C++, since reference types are disallowed in data
1694 sharing clauses there, except for NRV optimized
1699 x = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_var)));
1700 if (TREE_CONSTANT (x))
1702 const char *name = NULL;
1703 if (DECL_NAME (var))
1704 name = IDENTIFIER_POINTER (DECL_NAME (new_var));
1706 x = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (new_var)),
1708 gimple_add_tmp_var (x);
1709 x = build_fold_addr_expr_with_type (x, TREE_TYPE (new_var));
1713 args = tree_cons (NULL, x, NULL);
1714 x = built_in_decls[BUILT_IN_ALLOCA];
1715 x = build_function_call_expr (x, args);
1716 x = fold_convert (TREE_TYPE (new_var), x);
1719 x = build2 (MODIFY_EXPR, void_type_node, new_var, x);
1720 gimplify_and_add (x, ilist);
1722 new_var = build_fold_indirect_ref (new_var);
1724 else if (c_kind == OMP_CLAUSE_REDUCTION
1725 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1733 switch (OMP_CLAUSE_CODE (c))
1735 case OMP_CLAUSE_SHARED:
1736 /* Shared global vars are just accessed directly. */
1737 if (is_global_var (new_var))
1739 /* Set up the DECL_VALUE_EXPR for shared variables now. This
1740 needs to be delayed until after fixup_child_record_type so
1741 that we get the correct type during the dereference. */
1742 by_ref = use_pointer_for_field (var, true);
1743 x = build_receiver_ref (var, by_ref, ctx);
1744 SET_DECL_VALUE_EXPR (new_var, x);
1745 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
1747 /* ??? If VAR is not passed by reference, and the variable
1748 hasn't been initialized yet, then we'll get a warning for
1749 the store into the omp_data_s structure. Ideally, we'd be
1750 able to notice this and not store anything at all, but
1751 we're generating code too early. Suppress the warning. */
1753 TREE_NO_WARNING (var) = 1;
1756 case OMP_CLAUSE_LASTPRIVATE:
1757 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
1761 case OMP_CLAUSE_PRIVATE:
1762 x = lang_hooks.decls.omp_clause_default_ctor (c, new_var);
1764 gimplify_and_add (x, ilist);
1768 x = lang_hooks.decls.omp_clause_dtor (c, new_var);
1772 gimplify_stmt (&dtor);
1773 tsi_link_before (&diter, dtor, TSI_SAME_STMT);
1777 case OMP_CLAUSE_FIRSTPRIVATE:
1778 x = build_outer_var_ref (var, ctx);
1779 x = lang_hooks.decls.omp_clause_copy_ctor (c, new_var, x);
1780 gimplify_and_add (x, ilist);
1784 case OMP_CLAUSE_COPYIN:
1785 by_ref = use_pointer_for_field (var, false);
1786 x = build_receiver_ref (var, by_ref, ctx);
1787 x = lang_hooks.decls.omp_clause_assign_op (c, new_var, x);
1788 append_to_statement_list (x, ©in_seq);
1789 copyin_by_ref |= by_ref;
1792 case OMP_CLAUSE_REDUCTION:
1793 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1795 gimplify_and_add (OMP_CLAUSE_REDUCTION_INIT (c), ilist);
1796 OMP_CLAUSE_REDUCTION_INIT (c) = NULL;
1800 x = omp_reduction_init (c, TREE_TYPE (new_var));
1801 gcc_assert (TREE_CODE (TREE_TYPE (new_var)) != ARRAY_TYPE);
1802 x = build2 (MODIFY_EXPR, void_type_node, new_var, x);
1803 gimplify_and_add (x, ilist);
1813 /* The copyin sequence is not to be executed by the main thread, since
1814 that would result in self-copies. Perhaps not visible to scalars,
1815 but it certainly is to C++ operator=. */
1818 x = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
1819 x = build_function_call_expr (x, NULL);
1820 x = build2 (NE_EXPR, boolean_type_node, x,
1821 build_int_cst (TREE_TYPE (x), 0));
1822 x = build3 (COND_EXPR, void_type_node, x, copyin_seq, NULL);
1823 gimplify_and_add (x, ilist);
1826 /* If any copyin variable is passed by reference, we must ensure the
1827 master thread doesn't modify it before it is copied over in all
1828 threads. Similarly for variables in both firstprivate and
1829 lastprivate clauses we need to ensure the lastprivate copying
1830 happens after firstprivate copying in all threads. */
1831 if (copyin_by_ref || lastprivate_firstprivate)
1832 build_omp_barrier (ilist);
1836 /* Generate code to implement the LASTPRIVATE clauses. This is used for
1837 both parallel and workshare constructs. PREDICATE may be NULL if it's
1841 lower_lastprivate_clauses (tree clauses, tree predicate, tree *stmt_list,
1844 tree sub_list, x, c;
1846 /* Early exit if there are no lastprivate clauses. */
1847 clauses = find_omp_clause (clauses, OMP_CLAUSE_LASTPRIVATE);
1848 if (clauses == NULL)
1850 /* If this was a workshare clause, see if it had been combined
1851 with its parallel. In that case, look for the clauses on the
1852 parallel statement itself. */
1853 if (is_parallel_ctx (ctx))
1857 if (ctx == NULL || !is_parallel_ctx (ctx))
1860 clauses = find_omp_clause (OMP_PARALLEL_CLAUSES (ctx->stmt),
1861 OMP_CLAUSE_LASTPRIVATE);
1862 if (clauses == NULL)
1866 sub_list = alloc_stmt_list ();
1868 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1872 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_LASTPRIVATE)
1875 var = OMP_CLAUSE_DECL (c);
1876 new_var = lookup_decl (var, ctx);
1878 x = build_outer_var_ref (var, ctx);
1879 if (is_reference (var))
1880 new_var = build_fold_indirect_ref (new_var);
1881 x = lang_hooks.decls.omp_clause_assign_op (c, x, new_var);
1882 append_to_statement_list (x, &sub_list);
1886 x = build3 (COND_EXPR, void_type_node, predicate, sub_list, NULL);
1890 gimplify_and_add (x, stmt_list);
1894 /* Generate code to implement the REDUCTION clauses. */
1897 lower_reduction_clauses (tree clauses, tree *stmt_list, omp_context *ctx)
1899 tree sub_list = NULL, x, c;
1902 /* First see if there is exactly one reduction clause. Use OMP_ATOMIC
1903 update in that case, otherwise use a lock. */
1904 for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c))
1905 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
1907 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1909 /* Never use OMP_ATOMIC for array reductions. */
1919 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1921 tree var, ref, new_var;
1922 enum tree_code code;
1924 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION)
1927 var = OMP_CLAUSE_DECL (c);
1928 new_var = lookup_decl (var, ctx);
1929 if (is_reference (var))
1930 new_var = build_fold_indirect_ref (new_var);
1931 ref = build_outer_var_ref (var, ctx);
1932 code = OMP_CLAUSE_REDUCTION_CODE (c);
1934 /* reduction(-:var) sums up the partial results, so it acts
1935 identically to reduction(+:var). */
1936 if (code == MINUS_EXPR)
1941 tree addr = build_fold_addr_expr (ref);
1943 addr = save_expr (addr);
1944 ref = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (addr)), addr);
1945 x = fold_build2 (code, TREE_TYPE (ref), ref, new_var);
1946 x = build2 (OMP_ATOMIC, void_type_node, addr, x);
1947 gimplify_and_add (x, stmt_list);
1951 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1953 tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
1955 if (is_reference (var))
1956 ref = build_fold_addr_expr (ref);
1957 SET_DECL_VALUE_EXPR (placeholder, ref);
1958 DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
1959 gimplify_and_add (OMP_CLAUSE_REDUCTION_MERGE (c), &sub_list);
1960 OMP_CLAUSE_REDUCTION_MERGE (c) = NULL;
1961 OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL;
1965 x = build2 (code, TREE_TYPE (ref), ref, new_var);
1966 ref = build_outer_var_ref (var, ctx);
1967 x = build2 (MODIFY_EXPR, void_type_node, ref, x);
1968 append_to_statement_list (x, &sub_list);
1972 x = built_in_decls[BUILT_IN_GOMP_ATOMIC_START];
1973 x = build_function_call_expr (x, NULL);
1974 gimplify_and_add (x, stmt_list);
1976 gimplify_and_add (sub_list, stmt_list);
1978 x = built_in_decls[BUILT_IN_GOMP_ATOMIC_END];
1979 x = build_function_call_expr (x, NULL);
1980 gimplify_and_add (x, stmt_list);
1984 /* Generate code to implement the COPYPRIVATE clauses. */
1987 lower_copyprivate_clauses (tree clauses, tree *slist, tree *rlist,
1992 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1997 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYPRIVATE)
2000 var = OMP_CLAUSE_DECL (c);
2001 by_ref = use_pointer_for_field (var, false);
2003 ref = build_sender_ref (var, ctx);
2004 x = (ctx->is_nested) ? lookup_decl_in_outer_ctx (var, ctx) : var;
2005 x = by_ref ? build_fold_addr_expr (x) : x;
2006 x = build2 (MODIFY_EXPR, void_type_node, ref, x);
2007 gimplify_and_add (x, slist);
2009 ref = build_receiver_ref (var, by_ref, ctx);
2010 if (is_reference (var))
2012 ref = build_fold_indirect_ref (ref);
2013 var = build_fold_indirect_ref (var);
2015 x = lang_hooks.decls.omp_clause_assign_op (c, var, ref);
2016 gimplify_and_add (x, rlist);
2021 /* Generate code to implement the clauses, FIRSTPRIVATE, COPYIN, LASTPRIVATE,
2022 and REDUCTION from the sender (aka parent) side. */
2025 lower_send_clauses (tree clauses, tree *ilist, tree *olist, omp_context *ctx)
2029 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
2031 tree val, ref, x, var;
2032 bool by_ref, do_in = false, do_out = false;
2034 switch (OMP_CLAUSE_CODE (c))
2036 case OMP_CLAUSE_FIRSTPRIVATE:
2037 case OMP_CLAUSE_COPYIN:
2038 case OMP_CLAUSE_LASTPRIVATE:
2039 case OMP_CLAUSE_REDUCTION:
2045 var = val = OMP_CLAUSE_DECL (c);
2047 var = lookup_decl_in_outer_ctx (val, ctx);
2049 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYIN
2050 && is_global_var (var))
2052 if (is_variable_sized (val))
2054 by_ref = use_pointer_for_field (val, false);
2056 switch (OMP_CLAUSE_CODE (c))
2058 case OMP_CLAUSE_FIRSTPRIVATE:
2059 case OMP_CLAUSE_COPYIN:
2063 case OMP_CLAUSE_LASTPRIVATE:
2064 if (by_ref || is_reference (val))
2066 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
2074 case OMP_CLAUSE_REDUCTION:
2076 do_out = !(by_ref || is_reference (val));
2085 ref = build_sender_ref (val, ctx);
2086 x = by_ref ? build_fold_addr_expr (var) : var;
2087 x = build2 (MODIFY_EXPR, void_type_node, ref, x);
2088 gimplify_and_add (x, ilist);
2093 ref = build_sender_ref (val, ctx);
2094 x = build2 (MODIFY_EXPR, void_type_node, var, ref);
2095 gimplify_and_add (x, olist);
2100 /* Generate code to implement SHARED from the sender (aka parent) side.
2101 This is trickier, since OMP_PARALLEL_CLAUSES doesn't list things that
2102 got automatically shared. */
2105 lower_send_shared_vars (tree *ilist, tree *olist, omp_context *ctx)
2107 tree var, ovar, nvar, f, x;
2109 if (ctx->record_type == NULL)
2112 for (f = TYPE_FIELDS (ctx->record_type); f ; f = TREE_CHAIN (f))
2114 ovar = DECL_ABSTRACT_ORIGIN (f);
2115 nvar = maybe_lookup_decl (ovar, ctx);
2116 if (!nvar || !DECL_HAS_VALUE_EXPR_P (nvar))
2121 /* If CTX is a nested parallel directive. Find the immediately
2122 enclosing parallel or workshare construct that contains a
2123 mapping for OVAR. */
2125 var = lookup_decl_in_outer_ctx (ovar, ctx);
2127 if (use_pointer_for_field (ovar, true))
2129 x = build_sender_ref (ovar, ctx);
2130 var = build_fold_addr_expr (var);
2131 x = build2 (MODIFY_EXPR, void_type_node, x, var);
2132 gimplify_and_add (x, ilist);
2136 x = build_sender_ref (ovar, ctx);
2137 x = build2 (MODIFY_EXPR, void_type_node, x, var);
2138 gimplify_and_add (x, ilist);
2140 x = build_sender_ref (ovar, ctx);
2141 x = build2 (MODIFY_EXPR, void_type_node, var, x);
2142 gimplify_and_add (x, olist);
2147 /* Build the function calls to GOMP_parallel_start etc to actually
2148 generate the parallel operation. REGION is the parallel region
2149 being expanded. BB is the block where to insert the code. WS_ARGS
2150 will be set if this is a call to a combined parallel+workshare
2151 construct, it contains the list of additional arguments needed by
2152 the workshare construct. */
2155 expand_parallel_call (struct omp_region *region, basic_block bb,
2156 tree entry_stmt, tree ws_args)
2158 tree t, args, val, cond, c, list, clauses;
2159 block_stmt_iterator si;
2162 clauses = OMP_PARALLEL_CLAUSES (entry_stmt);
2163 push_gimplify_context ();
2165 /* Determine what flavor of GOMP_parallel_start we will be
2167 start_ix = BUILT_IN_GOMP_PARALLEL_START;
2168 if (is_combined_parallel (region))
2170 switch (region->inner->type)
2173 start_ix = BUILT_IN_GOMP_PARALLEL_LOOP_STATIC_START
2174 + region->inner->sched_kind;
2177 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS_START;
2184 /* By default, the value of NUM_THREADS is zero (selected at run time)
2185 and there is no conditional. */
2187 val = build_int_cst (unsigned_type_node, 0);
2189 c = find_omp_clause (clauses, OMP_CLAUSE_IF);
2191 cond = OMP_CLAUSE_IF_EXPR (c);
2193 c = find_omp_clause (clauses, OMP_CLAUSE_NUM_THREADS);
2195 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
2197 /* Ensure 'val' is of the correct type. */
2198 val = fold_convert (unsigned_type_node, val);
2200 /* If we found the clause 'if (cond)', build either
2201 (cond != 0) or (cond ? val : 1u). */
2204 block_stmt_iterator si;
2206 cond = gimple_boolify (cond);
2208 if (integer_zerop (val))
2209 val = build2 (EQ_EXPR, unsigned_type_node, cond,
2210 build_int_cst (TREE_TYPE (cond), 0));
2213 basic_block cond_bb, then_bb, else_bb;
2215 tree t, then_lab, else_lab, tmp;
2217 tmp = create_tmp_var (TREE_TYPE (val), NULL);
2218 e = split_block (bb, NULL);
2223 then_bb = create_empty_bb (cond_bb);
2224 else_bb = create_empty_bb (then_bb);
2225 then_lab = create_artificial_label ();
2226 else_lab = create_artificial_label ();
2228 t = build3 (COND_EXPR, void_type_node,
2230 build_and_jump (&then_lab),
2231 build_and_jump (&else_lab));
2233 si = bsi_start (cond_bb);
2234 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2236 si = bsi_start (then_bb);
2237 t = build1 (LABEL_EXPR, void_type_node, then_lab);
2238 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2239 t = build2 (MODIFY_EXPR, void_type_node, tmp, val);
2240 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2242 si = bsi_start (else_bb);
2243 t = build1 (LABEL_EXPR, void_type_node, else_lab);
2244 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2245 t = build2 (MODIFY_EXPR, void_type_node, tmp,
2246 build_int_cst (unsigned_type_node, 1));
2247 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2249 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
2250 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
2251 make_edge (then_bb, bb, EDGE_FALLTHRU);
2252 make_edge (else_bb, bb, EDGE_FALLTHRU);
2258 val = get_formal_tmp_var (val, &list);
2259 si = bsi_start (bb);
2260 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2264 args = tree_cons (NULL, val, NULL);
2265 t = OMP_PARALLEL_DATA_ARG (entry_stmt);
2267 t = null_pointer_node;
2269 t = build_fold_addr_expr (t);
2270 args = tree_cons (NULL, t, args);
2271 t = build_fold_addr_expr (OMP_PARALLEL_FN (entry_stmt));
2272 args = tree_cons (NULL, t, args);
2275 args = chainon (args, ws_args);
2277 t = built_in_decls[start_ix];
2278 t = build_function_call_expr (t, args);
2279 gimplify_and_add (t, &list);
2281 t = OMP_PARALLEL_DATA_ARG (entry_stmt);
2283 t = null_pointer_node;
2285 t = build_fold_addr_expr (t);
2286 args = tree_cons (NULL, t, NULL);
2287 t = build_function_call_expr (OMP_PARALLEL_FN (entry_stmt), args);
2288 gimplify_and_add (t, &list);
2290 t = built_in_decls[BUILT_IN_GOMP_PARALLEL_END];
2291 t = build_function_call_expr (t, NULL);
2292 gimplify_and_add (t, &list);
2295 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2297 pop_gimplify_context (NULL_TREE);
2301 /* If exceptions are enabled, wrap *STMT_P in a MUST_NOT_THROW catch
2302 handler. This prevents programs from violating the structured
2303 block semantics with throws. */
2306 maybe_catch_exception (tree *stmt_p)
2310 if (!flag_exceptions)
2313 if (lang_protect_cleanup_actions)
2314 t = lang_protect_cleanup_actions ();
2317 t = built_in_decls[BUILT_IN_TRAP];
2318 t = build_function_call_expr (t, NULL);
2320 f = build2 (EH_FILTER_EXPR, void_type_node, NULL, NULL);
2321 EH_FILTER_MUST_NOT_THROW (f) = 1;
2322 gimplify_and_add (t, &EH_FILTER_FAILURE (f));
2324 t = build2 (TRY_CATCH_EXPR, void_type_node, *stmt_p, NULL);
2325 append_to_statement_list (f, &TREE_OPERAND (t, 1));
2328 append_to_statement_list (t, stmt_p);
2331 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
2334 list2chain (tree list)
2338 for (t = list; t; t = TREE_CHAIN (t))
2340 tree var = TREE_VALUE (t);
2342 TREE_CHAIN (var) = TREE_VALUE (TREE_CHAIN (t));
2344 TREE_CHAIN (var) = NULL_TREE;
2347 return list ? TREE_VALUE (list) : NULL_TREE;
2351 /* Remove barriers in REGION->EXIT's block. Note that this is only
2352 valid for OMP_PARALLEL regions. Since the end of a parallel region
2353 is an implicit barrier, any workshare inside the OMP_PARALLEL that
2354 left a barrier at the end of the OMP_PARALLEL region can now be
2358 remove_exit_barrier (struct omp_region *region)
2360 block_stmt_iterator si;
2361 basic_block exit_bb;
2366 exit_bb = region->exit;
2368 /* If the parallel region doesn't return, we don't have REGION->EXIT
2373 /* The last insn in the block will be the parallel's OMP_RETURN. The
2374 workshare's OMP_RETURN will be in a preceding block. The kinds of
2375 statements that can appear in between are extremely limited -- no
2376 memory operations at all. Here, we allow nothing at all, so the
2377 only thing we allow to precede this OMP_RETURN is a label. */
2378 si = bsi_last (exit_bb);
2379 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
2381 if (!bsi_end_p (si) && TREE_CODE (bsi_stmt (si)) != LABEL_EXPR)
2384 FOR_EACH_EDGE (e, ei, exit_bb->preds)
2386 si = bsi_last (e->src);
2390 if (TREE_CODE (t) == OMP_RETURN)
2391 OMP_RETURN_NOWAIT (t) = 1;
2396 remove_exit_barriers (struct omp_region *region)
2398 if (region->type == OMP_PARALLEL)
2399 remove_exit_barrier (region);
2403 region = region->inner;
2404 remove_exit_barriers (region);
2405 while (region->next)
2407 region = region->next;
2408 remove_exit_barriers (region);
2413 /* Expand the OpenMP parallel directive starting at REGION. */
2416 expand_omp_parallel (struct omp_region *region)
2418 basic_block entry_bb, exit_bb, new_bb;
2419 struct function *child_cfun, *saved_cfun;
2420 tree child_fn, block, t, ws_args;
2421 block_stmt_iterator si;
2425 entry_stmt = last_stmt (region->entry);
2426 child_fn = OMP_PARALLEL_FN (entry_stmt);
2427 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
2430 entry_bb = region->entry;
2431 exit_bb = region->exit;
2433 if (is_combined_parallel (region))
2434 ws_args = region->ws_args;
2436 ws_args = NULL_TREE;
2438 if (child_cfun->cfg)
2440 /* Due to inlining, it may happen that we have already outlined
2441 the region, in which case all we need to do is make the
2442 sub-graph unreachable and emit the parallel call. */
2443 edge entry_succ_e, exit_succ_e;
2444 block_stmt_iterator si;
2446 entry_succ_e = single_succ_edge (entry_bb);
2448 si = bsi_last (entry_bb);
2449 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_PARALLEL);
2450 bsi_remove (&si, true);
2453 remove_edge (entry_succ_e);
2456 exit_succ_e = single_succ_edge (exit_bb);
2457 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
2462 /* If the parallel region needs data sent from the parent
2463 function, then the very first statement (except possible
2464 tree profile counter updates) of the parallel body
2465 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
2466 &.OMP_DATA_O is passed as an argument to the child function,
2467 we need to replace it with the argument as seen by the child
2470 In most cases, this will end up being the identity assignment
2471 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
2472 a function call that has been inlined, the original PARM_DECL
2473 .OMP_DATA_I may have been converted into a different local
2474 variable. In which case, we need to keep the assignment. */
2475 if (OMP_PARALLEL_DATA_ARG (entry_stmt))
2477 basic_block entry_succ_bb = single_succ (entry_bb);
2478 block_stmt_iterator si;
2480 for (si = bsi_start (entry_succ_bb); ; bsi_next (&si))
2484 gcc_assert (!bsi_end_p (si));
2485 stmt = bsi_stmt (si);
2486 if (TREE_CODE (stmt) != MODIFY_EXPR)
2489 arg = TREE_OPERAND (stmt, 1);
2491 if (TREE_CODE (arg) == ADDR_EXPR
2492 && TREE_OPERAND (arg, 0)
2493 == OMP_PARALLEL_DATA_ARG (entry_stmt))
2495 if (TREE_OPERAND (stmt, 0) == DECL_ARGUMENTS (child_fn))
2496 bsi_remove (&si, true);
2498 TREE_OPERAND (stmt, 1) = DECL_ARGUMENTS (child_fn);
2504 /* Declare local variables needed in CHILD_CFUN. */
2505 block = DECL_INITIAL (child_fn);
2506 BLOCK_VARS (block) = list2chain (child_cfun->unexpanded_var_list);
2507 DECL_SAVED_TREE (child_fn) = single_succ (entry_bb)->stmt_list;
2509 /* Reset DECL_CONTEXT on locals and function arguments. */
2510 for (t = BLOCK_VARS (block); t; t = TREE_CHAIN (t))
2511 DECL_CONTEXT (t) = child_fn;
2513 for (t = DECL_ARGUMENTS (child_fn); t; t = TREE_CHAIN (t))
2514 DECL_CONTEXT (t) = child_fn;
2516 /* Split ENTRY_BB at OMP_PARALLEL so that it can be moved to the
2518 si = bsi_last (entry_bb);
2520 gcc_assert (t && TREE_CODE (t) == OMP_PARALLEL);
2521 bsi_remove (&si, true);
2522 e = split_block (entry_bb, t);
2524 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
2526 /* Move the parallel region into CHILD_CFUN. We need to reset
2527 dominance information because the expansion of the inner
2528 regions has invalidated it. */
2529 free_dominance_info (CDI_DOMINATORS);
2530 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb);
2532 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
2533 cgraph_add_new_function (child_fn);
2535 /* Convert OMP_RETURN into a RETURN_EXPR. */
2538 si = bsi_last (exit_bb);
2539 gcc_assert (!bsi_end_p (si)
2540 && TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
2541 t = build1 (RETURN_EXPR, void_type_node, NULL);
2542 bsi_insert_after (&si, t, BSI_SAME_STMT);
2543 bsi_remove (&si, true);
2547 /* Emit a library call to launch the children threads. */
2548 expand_parallel_call (region, new_bb, entry_stmt, ws_args);
2552 /* A subroutine of expand_omp_for. Generate code for a parallel
2553 loop with any schedule. Given parameters:
2555 for (V = N1; V cond N2; V += STEP) BODY;
2557 where COND is "<" or ">", we generate pseudocode
2559 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2560 if (more) goto L0; else goto L3;
2567 if (V cond iend) goto L1; else goto L2;
2569 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2572 If this is a combined omp parallel loop, instead of the call to
2573 GOMP_loop_foo_start, we emit 'goto L3'. */
2576 expand_omp_for_generic (struct omp_region *region,
2577 struct omp_for_data *fd,
2578 enum built_in_function start_fn,
2579 enum built_in_function next_fn)
2581 tree l0, l1, l2 = NULL, l3 = NULL;
2582 tree type, istart0, iend0, iend;
2584 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb;
2585 basic_block l2_bb = NULL, l3_bb = NULL;
2586 block_stmt_iterator si;
2587 bool in_combined_parallel = is_combined_parallel (region);
2589 type = TREE_TYPE (fd->v);
2591 istart0 = create_tmp_var (long_integer_type_node, ".istart0");
2592 iend0 = create_tmp_var (long_integer_type_node, ".iend0");
2593 iend = create_tmp_var (type, NULL);
2594 TREE_ADDRESSABLE (istart0) = 1;
2595 TREE_ADDRESSABLE (iend0) = 1;
2597 gcc_assert ((region->cont != NULL) ^ (region->exit == NULL));
2599 entry_bb = region->entry;
2600 l0_bb = create_empty_bb (entry_bb);
2601 l1_bb = single_succ (entry_bb);
2603 l0 = tree_block_label (l0_bb);
2604 l1 = tree_block_label (l1_bb);
2606 cont_bb = region->cont;
2607 exit_bb = region->exit;
2610 l2_bb = create_empty_bb (cont_bb);
2611 l3_bb = single_succ (cont_bb);
2613 l2 = tree_block_label (l2_bb);
2614 l3 = tree_block_label (l3_bb);
2617 si = bsi_last (entry_bb);
2618 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
2619 if (!in_combined_parallel)
2621 /* If this is not a combined parallel loop, emit a call to
2622 GOMP_loop_foo_start in ENTRY_BB. */
2623 list = alloc_stmt_list ();
2624 t = build_fold_addr_expr (iend0);
2625 args = tree_cons (NULL, t, NULL);
2626 t = build_fold_addr_expr (istart0);
2627 args = tree_cons (NULL, t, args);
2630 t = fold_convert (long_integer_type_node, fd->chunk_size);
2631 args = tree_cons (NULL, t, args);
2633 t = fold_convert (long_integer_type_node, fd->step);
2634 args = tree_cons (NULL, t, args);
2635 t = fold_convert (long_integer_type_node, fd->n2);
2636 args = tree_cons (NULL, t, args);
2637 t = fold_convert (long_integer_type_node, fd->n1);
2638 args = tree_cons (NULL, t, args);
2639 t = build_function_call_expr (built_in_decls[start_fn], args);
2640 t = get_formal_tmp_var (t, &list);
2643 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l0),
2644 build_and_jump (&l3));
2645 append_to_statement_list (t, &list);
2647 bsi_insert_after (&si, list, BSI_SAME_STMT);
2649 bsi_remove (&si, true);
2651 /* Iteration setup for sequential loop goes in L0_BB. */
2652 list = alloc_stmt_list ();
2653 t = fold_convert (type, istart0);
2654 t = build2 (MODIFY_EXPR, void_type_node, fd->v, t);
2655 gimplify_and_add (t, &list);
2657 t = fold_convert (type, iend0);
2658 t = build2 (MODIFY_EXPR, void_type_node, iend, t);
2659 gimplify_and_add (t, &list);
2661 si = bsi_start (l0_bb);
2662 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2664 /* Handle the rare case where BODY doesn't ever return. */
2665 if (cont_bb == NULL)
2667 remove_edge (single_succ_edge (entry_bb));
2668 make_edge (entry_bb, l0_bb, EDGE_FALLTHRU);
2669 make_edge (l0_bb, l1_bb, EDGE_FALLTHRU);
2673 /* Code to control the increment and predicate for the sequential
2674 loop goes in the first half of EXIT_BB (we split EXIT_BB so
2675 that we can inherit all the edges going out of the loop
2677 list = alloc_stmt_list ();
2679 t = build2 (PLUS_EXPR, type, fd->v, fd->step);
2680 t = build2 (MODIFY_EXPR, void_type_node, fd->v, t);
2681 gimplify_and_add (t, &list);
2683 t = build2 (fd->cond_code, boolean_type_node, fd->v, iend);
2684 t = get_formal_tmp_var (t, &list);
2685 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l1),
2686 build_and_jump (&l2));
2687 append_to_statement_list (t, &list);
2689 si = bsi_last (cont_bb);
2690 bsi_insert_after (&si, list, BSI_SAME_STMT);
2691 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
2692 bsi_remove (&si, true);
2694 /* Emit code to get the next parallel iteration in L2_BB. */
2695 list = alloc_stmt_list ();
2697 t = build_fold_addr_expr (iend0);
2698 args = tree_cons (NULL, t, NULL);
2699 t = build_fold_addr_expr (istart0);
2700 args = tree_cons (NULL, t, args);
2701 t = build_function_call_expr (built_in_decls[next_fn], args);
2702 t = get_formal_tmp_var (t, &list);
2703 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l0),
2704 build_and_jump (&l3));
2705 append_to_statement_list (t, &list);
2707 si = bsi_start (l2_bb);
2708 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2710 /* Add the loop cleanup function. */
2711 si = bsi_last (exit_bb);
2712 if (OMP_RETURN_NOWAIT (bsi_stmt (si)))
2713 t = built_in_decls[BUILT_IN_GOMP_LOOP_END_NOWAIT];
2715 t = built_in_decls[BUILT_IN_GOMP_LOOP_END];
2716 t = build_function_call_expr (t, NULL);
2717 bsi_insert_after (&si, t, BSI_SAME_STMT);
2718 bsi_remove (&si, true);
2720 /* Connect the new blocks. */
2721 remove_edge (single_succ_edge (entry_bb));
2722 if (in_combined_parallel)
2723 make_edge (entry_bb, l2_bb, EDGE_FALLTHRU);
2726 make_edge (entry_bb, l0_bb, EDGE_TRUE_VALUE);
2727 make_edge (entry_bb, l3_bb, EDGE_FALSE_VALUE);
2730 make_edge (l0_bb, l1_bb, EDGE_FALLTHRU);
2732 remove_edge (single_succ_edge (cont_bb));
2733 make_edge (cont_bb, l1_bb, EDGE_TRUE_VALUE);
2734 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
2736 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
2737 make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
2741 /* A subroutine of expand_omp_for. Generate code for a parallel
2742 loop with static schedule and no specified chunk size. Given
2745 for (V = N1; V cond N2; V += STEP) BODY;
2747 where COND is "<" or ">", we generate pseudocode
2753 n = (adj + N2 - N1) / STEP;
2755 q += (q * nthreads != n);
2757 e0 = min(s0 + q, n);
2758 if (s0 >= e0) goto L2; else goto L0;
2765 if (V cond e) goto L1;
2770 expand_omp_for_static_nochunk (struct omp_region *region,
2771 struct omp_for_data *fd)
2773 tree l0, l1, l2, n, q, s0, e0, e, t, nthreads, threadid;
2775 basic_block entry_bb, exit_bb, seq_start_bb, body_bb, cont_bb;
2777 block_stmt_iterator si;
2779 type = TREE_TYPE (fd->v);
2781 entry_bb = region->entry;
2782 seq_start_bb = create_empty_bb (entry_bb);
2783 body_bb = single_succ (entry_bb);
2784 cont_bb = region->cont;
2785 fin_bb = single_succ (cont_bb);
2786 exit_bb = region->exit;
2788 l0 = tree_block_label (seq_start_bb);
2789 l1 = tree_block_label (body_bb);
2790 l2 = tree_block_label (fin_bb);
2792 /* Iteration space partitioning goes in ENTRY_BB. */
2793 list = alloc_stmt_list ();
2795 t = built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS];
2796 t = build_function_call_expr (t, NULL);
2797 t = fold_convert (type, t);
2798 nthreads = get_formal_tmp_var (t, &list);
2800 t = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
2801 t = build_function_call_expr (t, NULL);
2802 t = fold_convert (type, t);
2803 threadid = get_formal_tmp_var (t, &list);
2805 fd->n1 = fold_convert (type, fd->n1);
2806 if (!is_gimple_val (fd->n1))
2807 fd->n1 = get_formal_tmp_var (fd->n1, &list);
2809 fd->n2 = fold_convert (type, fd->n2);
2810 if (!is_gimple_val (fd->n2))
2811 fd->n2 = get_formal_tmp_var (fd->n2, &list);
2813 fd->step = fold_convert (type, fd->step);
2814 if (!is_gimple_val (fd->step))
2815 fd->step = get_formal_tmp_var (fd->step, &list);
2817 t = build_int_cst (type, (fd->cond_code == LT_EXPR ? -1 : 1));
2818 t = fold_build2 (PLUS_EXPR, type, fd->step, t);
2819 t = fold_build2 (PLUS_EXPR, type, t, fd->n2);
2820 t = fold_build2 (MINUS_EXPR, type, t, fd->n1);
2821 t = fold_build2 (TRUNC_DIV_EXPR, type, t, fd->step);
2822 t = fold_convert (type, t);
2823 if (is_gimple_val (t))
2826 n = get_formal_tmp_var (t, &list);
2828 t = build2 (TRUNC_DIV_EXPR, type, n, nthreads);
2829 q = get_formal_tmp_var (t, &list);
2831 t = build2 (MULT_EXPR, type, q, nthreads);
2832 t = build2 (NE_EXPR, type, t, n);
2833 t = build2 (PLUS_EXPR, type, q, t);
2834 q = get_formal_tmp_var (t, &list);
2836 t = build2 (MULT_EXPR, type, q, threadid);
2837 s0 = get_formal_tmp_var (t, &list);
2839 t = build2 (PLUS_EXPR, type, s0, q);
2840 t = build2 (MIN_EXPR, type, t, n);
2841 e0 = get_formal_tmp_var (t, &list);
2843 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
2844 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l2),
2845 build_and_jump (&l0));
2846 append_to_statement_list (t, &list);
2848 si = bsi_last (entry_bb);
2849 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
2850 bsi_insert_after (&si, list, BSI_SAME_STMT);
2851 bsi_remove (&si, true);
2853 /* Setup code for sequential iteration goes in SEQ_START_BB. */
2854 list = alloc_stmt_list ();
2856 t = fold_convert (type, s0);
2857 t = build2 (MULT_EXPR, type, t, fd->step);
2858 t = build2 (PLUS_EXPR, type, t, fd->n1);
2859 t = build2 (MODIFY_EXPR, void_type_node, fd->v, t);
2860 gimplify_and_add (t, &list);
2862 t = fold_convert (type, e0);
2863 t = build2 (MULT_EXPR, type, t, fd->step);
2864 t = build2 (PLUS_EXPR, type, t, fd->n1);
2865 e = get_formal_tmp_var (t, &list);
2867 si = bsi_start (seq_start_bb);
2868 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2870 /* The code controlling the sequential loop replaces the OMP_CONTINUE. */
2871 list = alloc_stmt_list ();
2873 t = build2 (PLUS_EXPR, type, fd->v, fd->step);
2874 t = build2 (MODIFY_EXPR, void_type_node, fd->v, t);
2875 gimplify_and_add (t, &list);
2877 t = build2 (fd->cond_code, boolean_type_node, fd->v, e);
2878 t = get_formal_tmp_var (t, &list);
2879 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l1),
2880 build_and_jump (&l2));
2881 append_to_statement_list (t, &list);
2883 si = bsi_last (cont_bb);
2884 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
2885 bsi_insert_after (&si, list, BSI_SAME_STMT);
2886 bsi_remove (&si, true);
2888 /* Replace the OMP_RETURN with a barrier, or nothing. */
2889 si = bsi_last (exit_bb);
2890 if (!OMP_RETURN_NOWAIT (bsi_stmt (si)))
2892 list = alloc_stmt_list ();
2893 build_omp_barrier (&list);
2894 bsi_insert_after (&si, list, BSI_SAME_STMT);
2896 bsi_remove (&si, true);
2898 /* Connect all the blocks. */
2899 make_edge (seq_start_bb, body_bb, EDGE_FALLTHRU);
2901 remove_edge (single_succ_edge (entry_bb));
2902 make_edge (entry_bb, fin_bb, EDGE_TRUE_VALUE);
2903 make_edge (entry_bb, seq_start_bb, EDGE_FALSE_VALUE);
2905 make_edge (cont_bb, body_bb, EDGE_TRUE_VALUE);
2906 find_edge (cont_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
2910 /* A subroutine of expand_omp_for. Generate code for a parallel
2911 loop with static schedule and a specified chunk size. Given
2914 for (V = N1; V cond N2; V += STEP) BODY;
2916 where COND is "<" or ">", we generate pseudocode
2922 n = (adj + N2 - N1) / STEP;
2925 s0 = (trip * nthreads + threadid) * CHUNK;
2926 e0 = min(s0 + CHUNK, n);
2927 if (s0 < n) goto L1; else goto L4;
2934 if (V cond e) goto L2; else goto L3;
2942 expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd)
2944 tree l0, l1, l2, l3, l4, n, s0, e0, e, t;
2945 tree trip, nthreads, threadid;
2947 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
2948 basic_block trip_update_bb, cont_bb, fin_bb;
2950 block_stmt_iterator si;
2952 type = TREE_TYPE (fd->v);
2954 entry_bb = region->entry;
2955 iter_part_bb = create_empty_bb (entry_bb);
2956 seq_start_bb = create_empty_bb (iter_part_bb);
2957 body_bb = single_succ (entry_bb);
2958 cont_bb = region->cont;
2959 trip_update_bb = create_empty_bb (cont_bb);
2960 fin_bb = single_succ (cont_bb);
2961 exit_bb = region->exit;
2963 l0 = tree_block_label (iter_part_bb);
2964 l1 = tree_block_label (seq_start_bb);
2965 l2 = tree_block_label (body_bb);
2966 l3 = tree_block_label (trip_update_bb);
2967 l4 = tree_block_label (fin_bb);
2969 /* Trip and adjustment setup goes in ENTRY_BB. */
2970 list = alloc_stmt_list ();
2972 t = built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS];
2973 t = build_function_call_expr (t, NULL);
2974 t = fold_convert (type, t);
2975 nthreads = get_formal_tmp_var (t, &list);
2977 t = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
2978 t = build_function_call_expr (t, NULL);
2979 t = fold_convert (type, t);
2980 threadid = get_formal_tmp_var (t, &list);
2982 fd->n1 = fold_convert (type, fd->n1);
2983 if (!is_gimple_val (fd->n1))
2984 fd->n1 = get_formal_tmp_var (fd->n1, &list);
2986 fd->n2 = fold_convert (type, fd->n2);
2987 if (!is_gimple_val (fd->n2))
2988 fd->n2 = get_formal_tmp_var (fd->n2, &list);
2990 fd->step = fold_convert (type, fd->step);
2991 if (!is_gimple_val (fd->step))
2992 fd->step = get_formal_tmp_var (fd->step, &list);
2994 fd->chunk_size = fold_convert (type, fd->chunk_size);
2995 if (!is_gimple_val (fd->chunk_size))
2996 fd->chunk_size = get_formal_tmp_var (fd->chunk_size, &list);
2998 t = build_int_cst (type, (fd->cond_code == LT_EXPR ? -1 : 1));
2999 t = fold_build2 (PLUS_EXPR, type, fd->step, t);
3000 t = fold_build2 (PLUS_EXPR, type, t, fd->n2);
3001 t = fold_build2 (MINUS_EXPR, type, t, fd->n1);
3002 t = fold_build2 (TRUNC_DIV_EXPR, type, t, fd->step);
3003 t = fold_convert (type, t);
3004 if (is_gimple_val (t))
3007 n = get_formal_tmp_var (t, &list);
3009 t = build_int_cst (type, 0);
3010 trip = get_initialized_tmp_var (t, &list, NULL);
3012 si = bsi_last (entry_bb);
3013 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
3014 bsi_insert_after (&si, list, BSI_SAME_STMT);
3015 bsi_remove (&si, true);
3017 /* Iteration space partitioning goes in ITER_PART_BB. */
3018 list = alloc_stmt_list ();
3020 t = build2 (MULT_EXPR, type, trip, nthreads);
3021 t = build2 (PLUS_EXPR, type, t, threadid);
3022 t = build2 (MULT_EXPR, type, t, fd->chunk_size);
3023 s0 = get_formal_tmp_var (t, &list);
3025 t = build2 (PLUS_EXPR, type, s0, fd->chunk_size);
3026 t = build2 (MIN_EXPR, type, t, n);
3027 e0 = get_formal_tmp_var (t, &list);
3029 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3030 t = build3 (COND_EXPR, void_type_node, t,
3031 build_and_jump (&l1), build_and_jump (&l4));
3032 append_to_statement_list (t, &list);
3034 si = bsi_start (iter_part_bb);
3035 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
3037 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3038 list = alloc_stmt_list ();
3040 t = fold_convert (type, s0);
3041 t = build2 (MULT_EXPR, type, t, fd->step);
3042 t = build2 (PLUS_EXPR, type, t, fd->n1);
3043 t = build2 (MODIFY_EXPR, void_type_node, fd->v, t);
3044 gimplify_and_add (t, &list);
3046 t = fold_convert (type, e0);
3047 t = build2 (MULT_EXPR, type, t, fd->step);
3048 t = build2 (PLUS_EXPR, type, t, fd->n1);
3049 e = get_formal_tmp_var (t, &list);
3051 si = bsi_start (seq_start_bb);
3052 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
3054 /* The code controlling the sequential loop goes in CONT_BB,
3055 replacing the OMP_CONTINUE. */
3056 list = alloc_stmt_list ();
3058 t = build2 (PLUS_EXPR, type, fd->v, fd->step);
3059 t = build2 (MODIFY_EXPR, void_type_node, fd->v, t);
3060 gimplify_and_add (t, &list);
3062 t = build2 (fd->cond_code, boolean_type_node, fd->v, e);
3063 t = get_formal_tmp_var (t, &list);
3064 t = build3 (COND_EXPR, void_type_node, t,
3065 build_and_jump (&l2), build_and_jump (&l3));
3066 append_to_statement_list (t, &list);
3068 si = bsi_last (cont_bb);
3069 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
3070 bsi_insert_after (&si, list, BSI_SAME_STMT);
3071 bsi_remove (&si, true);
3073 /* Trip update code goes into TRIP_UPDATE_BB. */
3074 list = alloc_stmt_list ();
3076 t = build_int_cst (type, 1);
3077 t = build2 (PLUS_EXPR, type, trip, t);
3078 t = build2 (MODIFY_EXPR, void_type_node, trip, t);
3079 gimplify_and_add (t, &list);
3081 si = bsi_start (trip_update_bb);
3082 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
3084 /* Replace the OMP_RETURN with a barrier, or nothing. */
3085 si = bsi_last (exit_bb);
3086 if (!OMP_RETURN_NOWAIT (bsi_stmt (si)))
3088 list = alloc_stmt_list ();
3089 build_omp_barrier (&list);
3090 bsi_insert_after (&si, list, BSI_SAME_STMT);
3092 bsi_remove (&si, true);
3094 /* Connect the new blocks. */
3095 remove_edge (single_succ_edge (entry_bb));
3096 make_edge (entry_bb, iter_part_bb, EDGE_FALLTHRU);
3098 make_edge (iter_part_bb, seq_start_bb, EDGE_TRUE_VALUE);
3099 make_edge (iter_part_bb, fin_bb, EDGE_FALSE_VALUE);
3101 make_edge (seq_start_bb, body_bb, EDGE_FALLTHRU);
3103 remove_edge (single_succ_edge (cont_bb));
3104 make_edge (cont_bb, body_bb, EDGE_TRUE_VALUE);
3105 make_edge (cont_bb, trip_update_bb, EDGE_FALSE_VALUE);
3107 make_edge (trip_update_bb, iter_part_bb, EDGE_FALLTHRU);
3111 /* Expand the OpenMP loop defined by REGION. */
3114 expand_omp_for (struct omp_region *region)
3116 struct omp_for_data fd;
3118 push_gimplify_context ();
3120 extract_omp_for_data (last_stmt (region->entry), &fd);
3121 region->sched_kind = fd.sched_kind;
3123 if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
3128 if (fd.chunk_size == NULL)
3129 expand_omp_for_static_nochunk (region, &fd);
3131 expand_omp_for_static_chunk (region, &fd);
3135 int fn_index = fd.sched_kind + fd.have_ordered * 4;
3136 int start_ix = BUILT_IN_GOMP_LOOP_STATIC_START + fn_index;
3137 int next_ix = BUILT_IN_GOMP_LOOP_STATIC_NEXT + fn_index;
3138 expand_omp_for_generic (region, &fd, start_ix, next_ix);
3141 pop_gimplify_context (NULL);
3145 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
3147 v = GOMP_sections_start (n);
3164 v = GOMP_sections_next ();
3169 If this is a combined parallel sections, replace the call to
3170 GOMP_sections_start with 'goto L1'. */
3173 expand_omp_sections (struct omp_region *region)
3175 tree label_vec, l0, l1, l2, t, u, v, sections_stmt;
3177 basic_block entry_bb, exit_bb, l0_bb, l1_bb, l2_bb, default_bb;
3178 block_stmt_iterator si;
3179 struct omp_region *inner;
3182 entry_bb = region->entry;
3183 l0_bb = create_empty_bb (entry_bb);
3184 l0 = tree_block_label (l0_bb);
3186 gcc_assert ((region->cont != NULL) ^ (region->exit == NULL));
3187 l1_bb = region->cont;
3190 l2_bb = single_succ (l1_bb);
3191 default_bb = create_empty_bb (l1_bb->prev_bb);
3193 l1 = tree_block_label (l1_bb);
3197 l2_bb = create_empty_bb (l0_bb);
3202 l2 = tree_block_label (l2_bb);
3204 exit_bb = region->exit;
3206 v = create_tmp_var (unsigned_type_node, ".section");
3208 /* We will build a switch() with enough cases for all the
3209 OMP_SECTION regions, a '0' case to handle the end of more work
3210 and a default case to abort if something goes wrong. */
3211 len = EDGE_COUNT (entry_bb->succs);
3212 label_vec = make_tree_vec (len + 2);
3214 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
3215 OMP_SECTIONS statement. */
3216 si = bsi_last (entry_bb);
3217 sections_stmt = bsi_stmt (si);
3218 gcc_assert (TREE_CODE (sections_stmt) == OMP_SECTIONS);
3219 if (!is_combined_parallel (region))
3221 /* If we are not inside a combined parallel+sections region,
3222 call GOMP_sections_start. */
3223 t = build_int_cst (unsigned_type_node, len);
3224 t = tree_cons (NULL, t, NULL);
3225 u = built_in_decls[BUILT_IN_GOMP_SECTIONS_START];
3226 t = build_function_call_expr (u, t);
3227 t = build2 (MODIFY_EXPR, void_type_node, v, t);
3228 bsi_insert_after (&si, t, BSI_SAME_STMT);
3230 bsi_remove (&si, true);
3232 /* The switch() statement replacing OMP_SECTIONS goes in L0_BB. */
3233 si = bsi_start (l0_bb);
3235 t = build3 (SWITCH_EXPR, void_type_node, v, NULL, label_vec);
3236 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
3238 t = build3 (CASE_LABEL_EXPR, void_type_node,
3239 build_int_cst (unsigned_type_node, 0), NULL, l2);
3240 TREE_VEC_ELT (label_vec, 0) = t;
3241 make_edge (l0_bb, l2_bb, 0);
3243 /* Convert each OMP_SECTION into a CASE_LABEL_EXPR. */
3244 for (inner = region->inner, i = 1; inner; inner = inner->next, ++i)
3246 basic_block s_entry_bb, s_exit_bb;
3248 s_entry_bb = inner->entry;
3249 s_exit_bb = inner->exit;
3251 t = tree_block_label (s_entry_bb);
3252 u = build_int_cst (unsigned_type_node, i);
3253 u = build3 (CASE_LABEL_EXPR, void_type_node, u, NULL, t);
3254 TREE_VEC_ELT (label_vec, i) = u;
3256 si = bsi_last (s_entry_bb);
3257 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SECTION);
3258 gcc_assert (i < len || OMP_SECTION_LAST (bsi_stmt (si)));
3259 bsi_remove (&si, true);
3261 e = single_pred_edge (s_entry_bb);
3263 redirect_edge_pred (e, l0_bb);
3265 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
3267 if (s_exit_bb == NULL)
3270 si = bsi_last (s_exit_bb);
3271 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
3272 bsi_remove (&si, true);
3274 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
3277 /* Error handling code goes in DEFAULT_BB. */
3278 t = tree_block_label (default_bb);
3279 u = build3 (CASE_LABEL_EXPR, void_type_node, NULL, NULL, t);
3280 TREE_VEC_ELT (label_vec, len + 1) = u;
3281 make_edge (l0_bb, default_bb, 0);
3283 si = bsi_start (default_bb);
3284 t = built_in_decls[BUILT_IN_TRAP];
3285 t = build_function_call_expr (t, NULL);
3286 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
3288 /* Code to get the next section goes in L1_BB. */
3291 si = bsi_last (l1_bb);
3292 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
3294 t = built_in_decls[BUILT_IN_GOMP_SECTIONS_NEXT];
3295 t = build_function_call_expr (t, NULL);
3296 t = build2 (MODIFY_EXPR, void_type_node, v, t);
3297 bsi_insert_after (&si, t, BSI_SAME_STMT);
3298 bsi_remove (&si, true);
3301 /* Cleanup function replaces OMP_RETURN in EXIT_BB. */
3304 si = bsi_last (exit_bb);
3305 if (OMP_RETURN_NOWAIT (bsi_stmt (si)))
3306 t = built_in_decls[BUILT_IN_GOMP_SECTIONS_END_NOWAIT];
3308 t = built_in_decls[BUILT_IN_GOMP_SECTIONS_END];
3309 t = build_function_call_expr (t, NULL);
3310 bsi_insert_after (&si, t, BSI_SAME_STMT);
3311 bsi_remove (&si, true);
3314 /* Connect the new blocks. */
3315 if (is_combined_parallel (region))
3317 /* If this was a combined parallel+sections region, we did not
3318 emit a GOMP_sections_start in the entry block, so we just
3319 need to jump to L1_BB to get the next section. */
3320 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
3323 make_edge (entry_bb, l0_bb, EDGE_FALLTHRU);
3327 e = single_succ_edge (l1_bb);
3328 redirect_edge_succ (e, l0_bb);
3329 e->flags = EDGE_FALLTHRU;
3334 /* Expand code for an OpenMP single directive. We've already expanded
3335 much of the code, here we simply place the GOMP_barrier call. */
3338 expand_omp_single (struct omp_region *region)
3340 basic_block entry_bb, exit_bb;
3341 block_stmt_iterator si;
3342 bool need_barrier = false;
3344 entry_bb = region->entry;
3345 exit_bb = region->exit;
3347 si = bsi_last (entry_bb);
3348 /* The terminal barrier at the end of a GOMP_single_copy sequence cannot
3349 be removed. We need to ensure that the thread that entered the single
3350 does not exit before the data is copied out by the other threads. */
3351 if (find_omp_clause (OMP_SINGLE_CLAUSES (bsi_stmt (si)),
3352 OMP_CLAUSE_COPYPRIVATE))
3353 need_barrier = true;
3354 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SINGLE);
3355 bsi_remove (&si, true);
3356 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
3358 si = bsi_last (exit_bb);
3359 if (!OMP_RETURN_NOWAIT (bsi_stmt (si)) || need_barrier)
3361 tree t = alloc_stmt_list ();
3362 build_omp_barrier (&t);
3363 bsi_insert_after (&si, t, BSI_SAME_STMT);
3365 bsi_remove (&si, true);
3366 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
3370 /* Generic expansion for OpenMP synchronization directives: master,
3371 ordered and critical. All we need to do here is remove the entry
3372 and exit markers for REGION. */
3375 expand_omp_synch (struct omp_region *region)
3377 basic_block entry_bb, exit_bb;
3378 block_stmt_iterator si;
3380 entry_bb = region->entry;
3381 exit_bb = region->exit;
3383 si = bsi_last (entry_bb);
3384 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SINGLE
3385 || TREE_CODE (bsi_stmt (si)) == OMP_MASTER
3386 || TREE_CODE (bsi_stmt (si)) == OMP_ORDERED
3387 || TREE_CODE (bsi_stmt (si)) == OMP_CRITICAL);
3388 bsi_remove (&si, true);
3389 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
3393 si = bsi_last (exit_bb);
3394 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
3395 bsi_remove (&si, true);
3396 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
3401 /* Expand the parallel region tree rooted at REGION. Expansion
3402 proceeds in depth-first order. Innermost regions are expanded
3403 first. This way, parallel regions that require a new function to
3404 be created (e.g., OMP_PARALLEL) can be expanded without having any
3405 internal dependencies in their body. */
3408 expand_omp (struct omp_region *region)
3413 expand_omp (region->inner);
3415 switch (region->type)
3418 expand_omp_parallel (region);
3422 expand_omp_for (region);
3426 expand_omp_sections (region);
3430 /* Individual omp sections are handled together with their
3431 parent OMP_SECTIONS region. */
3435 expand_omp_single (region);
3441 expand_omp_synch (region);
3448 region = region->next;
3453 /* Helper for build_omp_regions. Scan the dominator tree starting at
3454 block BB. PARENT is the region that contains BB. */
3457 build_omp_regions_1 (basic_block bb, struct omp_region *parent)
3459 block_stmt_iterator si;
3464 if (!bsi_end_p (si) && OMP_DIRECTIVE_P (bsi_stmt (si)))
3466 struct omp_region *region;
3467 enum tree_code code;
3469 stmt = bsi_stmt (si);
3470 code = TREE_CODE (stmt);
3472 if (code == OMP_RETURN)
3474 /* STMT is the return point out of region PARENT. Mark it
3475 as the exit point and make PARENT the immediately
3476 enclosing region. */
3477 gcc_assert (parent);
3480 parent = parent->outer;
3482 /* If REGION is a parallel region, determine whether it is
3483 a combined parallel+workshare region. */
3484 if (region->type == OMP_PARALLEL)
3485 determine_parallel_type (region);
3487 else if (code == OMP_CONTINUE)
3489 gcc_assert (parent);
3494 /* Otherwise, this directive becomes the parent for a new
3496 region = new_omp_region (bb, code, parent);
3501 for (son = first_dom_son (CDI_DOMINATORS, bb);
3503 son = next_dom_son (CDI_DOMINATORS, son))
3504 build_omp_regions_1 (son, parent);
3508 /* Scan the CFG and build a tree of OMP regions. Return the root of
3509 the OMP region tree. */
3512 build_omp_regions (void)
3514 gcc_assert (root_omp_region == NULL);
3515 calculate_dominance_info (CDI_DOMINATORS);
3516 build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL);
3520 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
3523 execute_expand_omp (void)
3525 build_omp_regions ();
3527 if (!root_omp_region)
3532 fprintf (dump_file, "\nOMP region tree\n\n");
3533 dump_omp_region (dump_file, root_omp_region, 0);
3534 fprintf (dump_file, "\n");
3537 remove_exit_barriers (root_omp_region);
3539 expand_omp (root_omp_region);
3541 free_dominance_info (CDI_DOMINATORS);
3542 free_dominance_info (CDI_POST_DOMINATORS);
3543 cleanup_tree_cfg ();
3545 free_omp_regions ();
3551 gate_expand_omp (void)
3553 return flag_openmp != 0 && errorcount == 0;
3556 struct tree_opt_pass pass_expand_omp =
3558 "ompexp", /* name */
3559 gate_expand_omp, /* gate */
3560 execute_expand_omp, /* execute */
3563 0, /* static_pass_number */
3565 PROP_gimple_any, /* properties_required */
3566 PROP_gimple_lomp, /* properties_provided */
3567 0, /* properties_destroyed */
3568 0, /* todo_flags_start */
3569 TODO_dump_func, /* todo_flags_finish */
3573 /* Routines to lower OpenMP directives into OMP-GIMPLE. */
3575 /* Lower the OpenMP sections directive in *STMT_P. */
3578 lower_omp_sections (tree *stmt_p, omp_context *ctx)
3580 tree new_stmt, stmt, body, bind, block, ilist, olist, new_body;
3582 tree_stmt_iterator tsi;
3587 push_gimplify_context ();
3591 lower_rec_input_clauses (OMP_SECTIONS_CLAUSES (stmt), &ilist, &dlist, ctx);
3593 tsi = tsi_start (OMP_SECTIONS_BODY (stmt));
3594 for (len = 0; !tsi_end_p (tsi); len++, tsi_next (&tsi))
3597 tsi = tsi_start (OMP_SECTIONS_BODY (stmt));
3598 body = alloc_stmt_list ();
3599 for (i = 0; i < len; i++, tsi_next (&tsi))
3602 tree sec_start, sec_end;
3604 sec_start = tsi_stmt (tsi);
3605 sctx = maybe_lookup_ctx (sec_start);
3608 append_to_statement_list (sec_start, &body);
3610 lower_omp (&OMP_SECTION_BODY (sec_start), sctx);
3611 append_to_statement_list (OMP_SECTION_BODY (sec_start), &body);
3612 OMP_SECTION_BODY (sec_start) = NULL;
3616 tree l = alloc_stmt_list ();
3617 lower_lastprivate_clauses (OMP_SECTIONS_CLAUSES (stmt), NULL,
3619 append_to_statement_list (l, &body);
3620 OMP_SECTION_LAST (sec_start) = 1;
3623 sec_end = make_node (OMP_RETURN);
3624 append_to_statement_list (sec_end, &body);
3627 block = make_node (BLOCK);
3628 bind = build3 (BIND_EXPR, void_type_node, NULL, body, block);
3631 lower_reduction_clauses (OMP_SECTIONS_CLAUSES (stmt), &olist, ctx);
3633 pop_gimplify_context (NULL_TREE);
3634 record_vars_into (ctx->block_vars, ctx->cb.dst_fn);
3636 new_stmt = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
3637 TREE_SIDE_EFFECTS (new_stmt) = 1;
3639 new_body = alloc_stmt_list ();
3640 append_to_statement_list (ilist, &new_body);
3641 append_to_statement_list (stmt, &new_body);
3642 append_to_statement_list (bind, &new_body);
3644 t = make_node (OMP_CONTINUE);
3645 append_to_statement_list (t, &new_body);
3647 append_to_statement_list (olist, &new_body);
3648 append_to_statement_list (dlist, &new_body);
3650 maybe_catch_exception (&new_body);
3652 t = make_node (OMP_RETURN);
3653 OMP_RETURN_NOWAIT (t) = !!find_omp_clause (OMP_SECTIONS_CLAUSES (stmt),
3655 append_to_statement_list (t, &new_body);
3657 BIND_EXPR_BODY (new_stmt) = new_body;
3658 OMP_SECTIONS_BODY (stmt) = NULL;
3664 /* A subroutine of lower_omp_single. Expand the simple form of
3665 an OMP_SINGLE, without a copyprivate clause:
3667 if (GOMP_single_start ())
3669 [ GOMP_barrier (); ] -> unless 'nowait' is present.
3671 FIXME. It may be better to delay expanding the logic of this until
3672 pass_expand_omp. The expanded logic may make the job more difficult
3673 to a synchronization analysis pass. */
3676 lower_omp_single_simple (tree single_stmt, tree *pre_p)
3680 t = built_in_decls[BUILT_IN_GOMP_SINGLE_START];
3681 t = build_function_call_expr (t, NULL);
3682 t = build3 (COND_EXPR, void_type_node, t,
3683 OMP_SINGLE_BODY (single_stmt), NULL);
3684 gimplify_and_add (t, pre_p);
3688 /* A subroutine of lower_omp_single. Expand the simple form of
3689 an OMP_SINGLE, with a copyprivate clause:
3691 #pragma omp single copyprivate (a, b, c)
3693 Create a new structure to hold copies of 'a', 'b' and 'c' and emit:
3696 if ((copyout_p = GOMP_single_copy_start ()) == NULL)
3702 GOMP_single_copy_end (©out);
3713 FIXME. It may be better to delay expanding the logic of this until
3714 pass_expand_omp. The expanded logic may make the job more difficult
3715 to a synchronization analysis pass. */
3718 lower_omp_single_copy (tree single_stmt, tree *pre_p, omp_context *ctx)
3720 tree ptr_type, t, args, l0, l1, l2, copyin_seq;
3722 ctx->sender_decl = create_tmp_var (ctx->record_type, ".omp_copy_o");
3724 ptr_type = build_pointer_type (ctx->record_type);
3725 ctx->receiver_decl = create_tmp_var (ptr_type, ".omp_copy_i");
3727 l0 = create_artificial_label ();
3728 l1 = create_artificial_label ();
3729 l2 = create_artificial_label ();
3731 t = built_in_decls[BUILT_IN_GOMP_SINGLE_COPY_START];
3732 t = build_function_call_expr (t, NULL);
3733 t = fold_convert (ptr_type, t);
3734 t = build2 (MODIFY_EXPR, void_type_node, ctx->receiver_decl, t);
3735 gimplify_and_add (t, pre_p);
3737 t = build2 (EQ_EXPR, boolean_type_node, ctx->receiver_decl,
3738 build_int_cst (ptr_type, 0));
3739 t = build3 (COND_EXPR, void_type_node, t,
3740 build_and_jump (&l0), build_and_jump (&l1));
3741 gimplify_and_add (t, pre_p);
3743 t = build1 (LABEL_EXPR, void_type_node, l0);
3744 gimplify_and_add (t, pre_p);
3746 append_to_statement_list (OMP_SINGLE_BODY (single_stmt), pre_p);
3749 lower_copyprivate_clauses (OMP_SINGLE_CLAUSES (single_stmt), pre_p,
3752 t = build_fold_addr_expr (ctx->sender_decl);
3753 args = tree_cons (NULL, t, NULL);
3754 t = built_in_decls[BUILT_IN_GOMP_SINGLE_COPY_END];
3755 t = build_function_call_expr (t, args);
3756 gimplify_and_add (t, pre_p);
3758 t = build_and_jump (&l2);
3759 gimplify_and_add (t, pre_p);
3761 t = build1 (LABEL_EXPR, void_type_node, l1);
3762 gimplify_and_add (t, pre_p);
3764 append_to_statement_list (copyin_seq, pre_p);
3766 t = build1 (LABEL_EXPR, void_type_node, l2);
3767 gimplify_and_add (t, pre_p);
3771 /* Expand code for an OpenMP single directive. */
3774 lower_omp_single (tree *stmt_p, omp_context *ctx)
3776 tree t, bind, block, single_stmt = *stmt_p, dlist;
3778 push_gimplify_context ();
3780 block = make_node (BLOCK);
3781 *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
3782 TREE_SIDE_EFFECTS (bind) = 1;
3784 lower_rec_input_clauses (OMP_SINGLE_CLAUSES (single_stmt),
3785 &BIND_EXPR_BODY (bind), &dlist, ctx);
3786 lower_omp (&OMP_SINGLE_BODY (single_stmt), ctx);
3788 append_to_statement_list (single_stmt, &BIND_EXPR_BODY (bind));
3790 if (ctx->record_type)
3791 lower_omp_single_copy (single_stmt, &BIND_EXPR_BODY (bind), ctx);
3793 lower_omp_single_simple (single_stmt, &BIND_EXPR_BODY (bind));
3795 OMP_SINGLE_BODY (single_stmt) = NULL;
3797 append_to_statement_list (dlist, &BIND_EXPR_BODY (bind));
3799 maybe_catch_exception (&BIND_EXPR_BODY (bind));
3801 t = make_node (OMP_RETURN);
3802 OMP_RETURN_NOWAIT (t) = !!find_omp_clause (OMP_SINGLE_CLAUSES (single_stmt),
3804 append_to_statement_list (t, &BIND_EXPR_BODY (bind));
3806 pop_gimplify_context (bind);
3808 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
3809 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
3813 /* Expand code for an OpenMP master directive. */
3816 lower_omp_master (tree *stmt_p, omp_context *ctx)
3818 tree bind, block, stmt = *stmt_p, lab = NULL, x;
3820 push_gimplify_context ();
3822 block = make_node (BLOCK);
3823 *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
3824 TREE_SIDE_EFFECTS (bind) = 1;
3826 append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
3828 x = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
3829 x = build_function_call_expr (x, NULL);
3830 x = build2 (EQ_EXPR, boolean_type_node, x, integer_zero_node);
3831 x = build3 (COND_EXPR, void_type_node, x, NULL, build_and_jump (&lab));
3832 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
3834 lower_omp (&OMP_MASTER_BODY (stmt), ctx);
3835 maybe_catch_exception (&OMP_MASTER_BODY (stmt));
3836 append_to_statement_list (OMP_MASTER_BODY (stmt), &BIND_EXPR_BODY (bind));
3837 OMP_MASTER_BODY (stmt) = NULL;
3839 x = build1 (LABEL_EXPR, void_type_node, lab);
3840 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
3842 x = make_node (OMP_RETURN);
3843 OMP_RETURN_NOWAIT (x) = 1;
3844 append_to_statement_list (x, &BIND_EXPR_BODY (bind));
3846 pop_gimplify_context (bind);
3848 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
3849 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
3853 /* Expand code for an OpenMP ordered directive. */
3856 lower_omp_ordered (tree *stmt_p, omp_context *ctx)
3858 tree bind, block, stmt = *stmt_p, x;
3860 push_gimplify_context ();
3862 block = make_node (BLOCK);
3863 *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
3864 TREE_SIDE_EFFECTS (bind) = 1;
3866 append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
3868 x = built_in_decls[BUILT_IN_GOMP_ORDERED_START];
3869 x = build_function_call_expr (x, NULL);
3870 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
3872 lower_omp (&OMP_ORDERED_BODY (stmt), ctx);
3873 maybe_catch_exception (&OMP_ORDERED_BODY (stmt));
3874 append_to_statement_list (OMP_ORDERED_BODY (stmt), &BIND_EXPR_BODY (bind));
3875 OMP_ORDERED_BODY (stmt) = NULL;
3877 x = built_in_decls[BUILT_IN_GOMP_ORDERED_END];
3878 x = build_function_call_expr (x, NULL);
3879 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
3881 x = make_node (OMP_RETURN);
3882 OMP_RETURN_NOWAIT (x) = 1;
3883 append_to_statement_list (x, &BIND_EXPR_BODY (bind));
3885 pop_gimplify_context (bind);
3887 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
3888 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
3892 /* Gimplify an OMP_CRITICAL statement. This is a relatively simple
3893 substitution of a couple of function calls. But in the NAMED case,
3894 requires that languages coordinate a symbol name. It is therefore
3895 best put here in common code. */
3897 static GTY((param1_is (tree), param2_is (tree)))
3898 splay_tree critical_name_mutexes;
3901 lower_omp_critical (tree *stmt_p, omp_context *ctx)
3903 tree bind, block, stmt = *stmt_p;
3904 tree t, lock, unlock, name;
3906 name = OMP_CRITICAL_NAME (stmt);
3912 if (!critical_name_mutexes)
3913 critical_name_mutexes
3914 = splay_tree_new_ggc (splay_tree_compare_pointers);
3916 n = splay_tree_lookup (critical_name_mutexes, (splay_tree_key) name);
3921 decl = create_tmp_var_raw (ptr_type_node, NULL);
3923 new_str = ACONCAT ((".gomp_critical_user_",
3924 IDENTIFIER_POINTER (name), NULL));
3925 DECL_NAME (decl) = get_identifier (new_str);
3926 TREE_PUBLIC (decl) = 1;
3927 TREE_STATIC (decl) = 1;
3928 DECL_COMMON (decl) = 1;
3929 DECL_ARTIFICIAL (decl) = 1;
3930 DECL_IGNORED_P (decl) = 1;
3931 cgraph_varpool_finalize_decl (decl);
3933 splay_tree_insert (critical_name_mutexes, (splay_tree_key) name,
3934 (splay_tree_value) decl);
3937 decl = (tree) n->value;
3939 args = tree_cons (NULL, build_fold_addr_expr (decl), NULL);
3940 lock = built_in_decls[BUILT_IN_GOMP_CRITICAL_NAME_START];
3941 lock = build_function_call_expr (lock, args);
3943 args = tree_cons (NULL, build_fold_addr_expr (decl), NULL);
3944 unlock = built_in_decls[BUILT_IN_GOMP_CRITICAL_NAME_END];
3945 unlock = build_function_call_expr (unlock, args);
3949 lock = built_in_decls[BUILT_IN_GOMP_CRITICAL_START];
3950 lock = build_function_call_expr (lock, NULL);
3952 unlock = built_in_decls[BUILT_IN_GOMP_CRITICAL_END];
3953 unlock = build_function_call_expr (unlock, NULL);
3956 push_gimplify_context ();
3958 block = make_node (BLOCK);
3959 *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
3960 TREE_SIDE_EFFECTS (bind) = 1;
3962 append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
3964 gimplify_and_add (lock, &BIND_EXPR_BODY (bind));
3966 lower_omp (&OMP_CRITICAL_BODY (stmt), ctx);
3967 maybe_catch_exception (&OMP_CRITICAL_BODY (stmt));
3968 append_to_statement_list (OMP_CRITICAL_BODY (stmt), &BIND_EXPR_BODY (bind));
3969 OMP_CRITICAL_BODY (stmt) = NULL;
3971 gimplify_and_add (unlock, &BIND_EXPR_BODY (bind));
3973 t = make_node (OMP_RETURN);
3974 OMP_RETURN_NOWAIT (t) = 1;
3975 append_to_statement_list (t, &BIND_EXPR_BODY (bind));
3977 pop_gimplify_context (bind);
3978 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
3979 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
3983 /* A subroutine of lower_omp_for. Generate code to emit the predicate
3984 for a lastprivate clause. Given a loop control predicate of (V
3985 cond N2), we gate the clause on (!(V cond N2)). The lowered form
3986 is appended to *BODY_P. */
3989 lower_omp_for_lastprivate (struct omp_for_data *fd, tree *body_p,
3990 struct omp_context *ctx)
3993 enum tree_code cond_code;
3995 cond_code = fd->cond_code;
3996 cond_code = cond_code == LT_EXPR ? GE_EXPR : LE_EXPR;
3998 /* When possible, use a strict equality expression. This can let VRP
3999 type optimizations deduce the value and remove a copy. */
4000 if (host_integerp (fd->step, 0))
4002 HOST_WIDE_INT step = TREE_INT_CST_LOW (fd->step);
4003 if (step == 1 || step == -1)
4004 cond_code = EQ_EXPR;
4007 cond = build2 (cond_code, boolean_type_node, fd->v, fd->n2);
4009 clauses = OMP_FOR_CLAUSES (fd->for_stmt);
4010 lower_lastprivate_clauses (clauses, cond, body_p, ctx);
4014 /* Lower code for an OpenMP loop directive. */
4017 lower_omp_for (tree *stmt_p, omp_context *ctx)
4019 tree t, stmt, ilist, dlist, new_stmt, *body_p, *rhs_p;
4020 struct omp_for_data fd;
4024 push_gimplify_context ();
4026 lower_omp (&OMP_FOR_PRE_BODY (stmt), ctx);
4027 lower_omp (&OMP_FOR_BODY (stmt), ctx);
4029 /* Move declaration of temporaries in the loop body before we make
4031 if (TREE_CODE (OMP_FOR_BODY (stmt)) == BIND_EXPR)
4032 record_vars_into (BIND_EXPR_VARS (OMP_FOR_BODY (stmt)), ctx->cb.dst_fn);
4034 new_stmt = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
4035 TREE_SIDE_EFFECTS (new_stmt) = 1;
4036 body_p = &BIND_EXPR_BODY (new_stmt);
4038 /* The pre-body and input clauses go before the lowered OMP_FOR. */
4041 append_to_statement_list (OMP_FOR_PRE_BODY (stmt), body_p);
4042 lower_rec_input_clauses (OMP_FOR_CLAUSES (stmt), body_p, &dlist, ctx);
4044 /* Lower the header expressions. At this point, we can assume that
4045 the header is of the form:
4047 #pragma omp for (V = VAL1; V {<|>|<=|>=} VAL2; V = V [+-] VAL3)
4049 We just need to make sure that VAL1, VAL2 and VAL3 are lowered
4050 using the .omp_data_s mapping, if needed. */
4051 rhs_p = &TREE_OPERAND (OMP_FOR_INIT (stmt), 1);
4052 if (!is_gimple_min_invariant (*rhs_p))
4053 *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
4055 rhs_p = &TREE_OPERAND (OMP_FOR_COND (stmt), 1);
4056 if (!is_gimple_min_invariant (*rhs_p))
4057 *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
4059 rhs_p = &TREE_OPERAND (TREE_OPERAND (OMP_FOR_INCR (stmt), 1), 1);
4060 if (!is_gimple_min_invariant (*rhs_p))
4061 *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
4063 /* Once lowered, extract the bounds and clauses. */
4064 extract_omp_for_data (stmt, &fd);
4066 append_to_statement_list (stmt, body_p);
4068 append_to_statement_list (OMP_FOR_BODY (stmt), body_p);
4070 t = make_node (OMP_CONTINUE);
4071 append_to_statement_list (t, body_p);
4073 /* After the loop, add exit clauses. */
4074 lower_omp_for_lastprivate (&fd, &dlist, ctx);
4075 lower_reduction_clauses (OMP_FOR_CLAUSES (stmt), body_p, ctx);
4076 append_to_statement_list (dlist, body_p);
4078 maybe_catch_exception (body_p);
4080 /* Region exit marker goes at the end of the loop body. */
4081 t = make_node (OMP_RETURN);
4082 OMP_RETURN_NOWAIT (t) = fd.have_nowait;
4083 append_to_statement_list (t, body_p);
4085 pop_gimplify_context (NULL_TREE);
4086 record_vars_into (ctx->block_vars, ctx->cb.dst_fn);
4088 OMP_FOR_BODY (stmt) = NULL_TREE;
4089 OMP_FOR_PRE_BODY (stmt) = NULL_TREE;
4094 /* Lower the OpenMP parallel directive in *STMT_P. CTX holds context
4095 information for the directive. */
4098 lower_omp_parallel (tree *stmt_p, omp_context *ctx)
4100 tree clauses, par_bind, par_body, new_body, bind;
4101 tree olist, ilist, par_olist, par_ilist;
4102 tree stmt, child_fn, t;
4106 clauses = OMP_PARALLEL_CLAUSES (stmt);
4107 par_bind = OMP_PARALLEL_BODY (stmt);
4108 par_body = BIND_EXPR_BODY (par_bind);
4109 child_fn = ctx->cb.dst_fn;
4111 push_gimplify_context ();
4113 par_olist = NULL_TREE;
4114 par_ilist = NULL_TREE;
4115 lower_rec_input_clauses (clauses, &par_ilist, &par_olist, ctx);
4116 lower_omp (&par_body, ctx);
4117 lower_reduction_clauses (clauses, &par_olist, ctx);
4119 /* Declare all the variables created by mapping and the variables
4120 declared in the scope of the parallel body. */
4121 record_vars_into (ctx->block_vars, child_fn);
4122 record_vars_into (BIND_EXPR_VARS (par_bind), child_fn);
4124 if (ctx->record_type)
4126 ctx->sender_decl = create_tmp_var (ctx->record_type, ".omp_data_o");
4127 OMP_PARALLEL_DATA_ARG (stmt) = ctx->sender_decl;
4132 lower_send_clauses (clauses, &ilist, &olist, ctx);
4133 lower_send_shared_vars (&ilist, &olist, ctx);
4135 /* Once all the expansions are done, sequence all the different
4136 fragments inside OMP_PARALLEL_BODY. */
4137 bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
4138 append_to_statement_list (ilist, &BIND_EXPR_BODY (bind));
4140 new_body = alloc_stmt_list ();
4142 if (ctx->record_type)
4144 t = build_fold_addr_expr (ctx->sender_decl);
4145 /* fixup_child_record_type might have changed receiver_decl's type. */
4146 t = fold_convert (TREE_TYPE (ctx->receiver_decl), t);
4147 t = build2 (MODIFY_EXPR, void_type_node, ctx->receiver_decl, t);
4148 append_to_statement_list (t, &new_body);
4151 append_to_statement_list (par_ilist, &new_body);
4152 append_to_statement_list (par_body, &new_body);
4153 append_to_statement_list (par_olist, &new_body);
4154 maybe_catch_exception (&new_body);
4155 t = make_node (OMP_RETURN);
4156 append_to_statement_list (t, &new_body);
4157 OMP_PARALLEL_BODY (stmt) = new_body;
4159 append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
4160 append_to_statement_list (olist, &BIND_EXPR_BODY (bind));
4164 pop_gimplify_context (NULL_TREE);
4168 /* Pass *TP back through the gimplifier within the context determined by WI.
4169 This handles replacement of DECL_VALUE_EXPR, as well as adjusting the
4170 flags on ADDR_EXPR. */
4173 lower_regimplify (tree *tp, struct walk_stmt_info *wi)
4175 enum gimplify_status gs;
4179 gs = gimplify_expr (tp, &pre, NULL, is_gimple_lvalue, fb_lvalue);
4180 else if (wi->val_only)
4181 gs = gimplify_expr (tp, &pre, NULL, is_gimple_val, fb_rvalue);
4183 gs = gimplify_expr (tp, &pre, NULL, is_gimple_formal_tmp_var, fb_rvalue);
4184 gcc_assert (gs == GS_ALL_DONE);
4187 tsi_link_before (&wi->tsi, pre, TSI_SAME_STMT);
4191 /* Callback for walk_stmts. Lower the OpenMP directive pointed by TP. */
4194 lower_omp_1 (tree *tp, int *walk_subtrees, void *data)
4196 struct walk_stmt_info *wi = data;
4197 omp_context *ctx = wi->info;
4200 /* If we have issued syntax errors, avoid doing any heavy lifting.
4201 Just replace the OpenMP directives with a NOP to avoid
4202 confusing RTL expansion. */
4203 if (errorcount && OMP_DIRECTIVE_P (*tp))
4205 *tp = build_empty_stmt ();
4210 switch (TREE_CODE (*tp))
4213 ctx = maybe_lookup_ctx (t);
4214 lower_omp_parallel (tp, ctx);
4218 ctx = maybe_lookup_ctx (t);
4220 lower_omp_for (tp, ctx);
4224 ctx = maybe_lookup_ctx (t);
4226 lower_omp_sections (tp, ctx);
4230 ctx = maybe_lookup_ctx (t);
4232 lower_omp_single (tp, ctx);
4236 ctx = maybe_lookup_ctx (t);
4238 lower_omp_master (tp, ctx);
4242 ctx = maybe_lookup_ctx (t);
4244 lower_omp_ordered (tp, ctx);
4248 ctx = maybe_lookup_ctx (t);
4250 lower_omp_critical (tp, ctx);
4254 if (ctx && DECL_HAS_VALUE_EXPR_P (t))
4255 lower_regimplify (tp, wi);
4260 lower_regimplify (tp, wi);
4264 case ARRAY_RANGE_REF:
4268 case VIEW_CONVERT_EXPR:
4270 lower_regimplify (tp, wi);
4277 wi->val_only = true;
4278 lower_regimplify (&TREE_OPERAND (t, 0), wi);
4283 if (!TYPE_P (t) && !DECL_P (t))
4292 lower_omp (tree *stmt_p, omp_context *ctx)
4294 struct walk_stmt_info wi;
4296 memset (&wi, 0, sizeof (wi));
4297 wi.callback = lower_omp_1;
4300 wi.want_locations = true;
4302 walk_stmts (&wi, stmt_p);
4305 /* Main entry point. */
4308 execute_lower_omp (void)
4310 all_contexts = splay_tree_new (splay_tree_compare_pointers, 0,
4311 delete_omp_context);
4313 scan_omp (&DECL_SAVED_TREE (current_function_decl), NULL);
4314 gcc_assert (parallel_nesting_level == 0);
4316 if (all_contexts->root)
4317 lower_omp (&DECL_SAVED_TREE (current_function_decl), NULL);
4321 splay_tree_delete (all_contexts);
4322 all_contexts = NULL;
4328 gate_lower_omp (void)
4330 return flag_openmp != 0;
4333 struct tree_opt_pass pass_lower_omp =
4335 "omplower", /* name */
4336 gate_lower_omp, /* gate */
4337 execute_lower_omp, /* execute */
4340 0, /* static_pass_number */
4342 PROP_gimple_any, /* properties_required */
4343 PROP_gimple_lomp, /* properties_provided */
4344 0, /* properties_destroyed */
4345 0, /* todo_flags_start */
4346 TODO_dump_func, /* todo_flags_finish */
4350 /* The following is a utility to diagnose OpenMP structured block violations.
4351 It is not part of the "omplower" pass, as that's invoked too late. It
4352 should be invoked by the respective front ends after gimplification. */
4354 static splay_tree all_labels;
4356 /* Check for mismatched contexts and generate an error if needed. Return
4357 true if an error is detected. */
4360 diagnose_sb_0 (tree *stmt_p, tree branch_ctx, tree label_ctx)
4364 if ((label_ctx ? TREE_VALUE (label_ctx) : NULL) == branch_ctx)
4367 /* Try to avoid confusing the user by producing and error message
4368 with correct "exit" or "enter" verbage. We prefer "exit"
4369 unless we can show that LABEL_CTX is nested within BRANCH_CTX. */
4370 if (branch_ctx == NULL)
4376 if (TREE_VALUE (label_ctx) == branch_ctx)
4381 label_ctx = TREE_CHAIN (label_ctx);
4386 error ("invalid exit from OpenMP structured block");
4388 error ("invalid entry to OpenMP structured block");
4390 *stmt_p = build_empty_stmt ();
4394 /* Pass 1: Create a minimal tree of OpenMP structured blocks, and record
4395 where in the tree each label is found. */
4398 diagnose_sb_1 (tree *tp, int *walk_subtrees, void *data)
4400 struct walk_stmt_info *wi = data;
4401 tree context = (tree) wi->info;
4406 switch (TREE_CODE (t))
4411 walk_tree (&OMP_CLAUSES (t), diagnose_sb_1, wi, NULL);
4417 /* The minimal context here is just a tree of statements. */
4418 inner_context = tree_cons (NULL, t, context);
4419 wi->info = inner_context;
4420 walk_stmts (wi, &OMP_BODY (t));
4425 walk_tree (&OMP_FOR_CLAUSES (t), diagnose_sb_1, wi, NULL);
4426 inner_context = tree_cons (NULL, t, context);
4427 wi->info = inner_context;
4428 walk_tree (&OMP_FOR_INIT (t), diagnose_sb_1, wi, NULL);
4429 walk_tree (&OMP_FOR_COND (t), diagnose_sb_1, wi, NULL);
4430 walk_tree (&OMP_FOR_INCR (t), diagnose_sb_1, wi, NULL);
4431 walk_stmts (wi, &OMP_FOR_PRE_BODY (t));
4432 walk_stmts (wi, &OMP_FOR_BODY (t));
4437 splay_tree_insert (all_labels, (splay_tree_key) LABEL_EXPR_LABEL (t),
4438 (splay_tree_value) context);
4448 /* Pass 2: Check each branch and see if its context differs from that of
4449 the destination label's context. */
4452 diagnose_sb_2 (tree *tp, int *walk_subtrees, void *data)
4454 struct walk_stmt_info *wi = data;
4455 tree context = (tree) wi->info;
4460 switch (TREE_CODE (t))
4465 walk_tree (&OMP_CLAUSES (t), diagnose_sb_2, wi, NULL);
4472 walk_stmts (wi, &OMP_BODY (t));
4477 walk_tree (&OMP_FOR_CLAUSES (t), diagnose_sb_2, wi, NULL);
4479 walk_tree (&OMP_FOR_INIT (t), diagnose_sb_2, wi, NULL);
4480 walk_tree (&OMP_FOR_COND (t), diagnose_sb_2, wi, NULL);
4481 walk_tree (&OMP_FOR_INCR (t), diagnose_sb_2, wi, NULL);
4482 walk_stmts (wi, &OMP_FOR_PRE_BODY (t));
4483 walk_stmts (wi, &OMP_FOR_BODY (t));
4489 tree lab = GOTO_DESTINATION (t);
4490 if (TREE_CODE (lab) != LABEL_DECL)
4493 n = splay_tree_lookup (all_labels, (splay_tree_key) lab);
4494 diagnose_sb_0 (tp, context, n ? (tree) n->value : NULL_TREE);
4500 tree vec = SWITCH_LABELS (t);
4501 int i, len = TREE_VEC_LENGTH (vec);
4502 for (i = 0; i < len; ++i)
4504 tree lab = CASE_LABEL (TREE_VEC_ELT (vec, i));
4505 n = splay_tree_lookup (all_labels, (splay_tree_key) lab);
4506 if (diagnose_sb_0 (tp, context, (tree) n->value))
4513 diagnose_sb_0 (tp, context, NULL_TREE);
4524 diagnose_omp_structured_block_errors (tree fndecl)
4526 tree save_current = current_function_decl;
4527 struct walk_stmt_info wi;
4529 current_function_decl = fndecl;
4531 all_labels = splay_tree_new (splay_tree_compare_pointers, 0, 0);
4533 memset (&wi, 0, sizeof (wi));
4534 wi.callback = diagnose_sb_1;
4535 walk_stmts (&wi, &DECL_SAVED_TREE (fndecl));
4537 memset (&wi, 0, sizeof (wi));
4538 wi.callback = diagnose_sb_2;
4539 wi.want_locations = true;
4540 wi.want_return_expr = true;
4541 walk_stmts (&wi, &DECL_SAVED_TREE (fndecl));
4543 splay_tree_delete (all_labels);
4546 current_function_decl = save_current;
4549 #include "gt-omp-low.h"