1 /* Lowering pass for OpenMP directives. Converts OpenMP directives
2 into explicit calls to the runtime library (libgomp) and data
3 marshalling to implement data sharing and copying clauses.
4 Contributed by Diego Novillo <dnovillo@redhat.com>
6 Copyright (C) 2005, 2006 Free Software Foundation, Inc.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to the Free
22 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
27 #include "coretypes.h"
31 #include "tree-gimple.h"
32 #include "tree-inline.h"
33 #include "langhooks.h"
34 #include "diagnostic.h"
35 #include "tree-flow.h"
41 #include "tree-pass.h"
46 /* Lowering of OpenMP parallel and workshare constructs proceeds in two
47 phases. The first phase scans the function looking for OMP statements
48 and then for variables that must be replaced to satisfy data sharing
49 clauses. The second phase expands code for the constructs, as well as
50 re-gimplifying things when variables have been replaced with complex
53 Final code generation is done by pass_expand_omp. The flowgraph is
54 scanned for parallel regions which are then moved to a new
55 function, to be invoked by the thread library. */
57 /* Parallel region information. Every parallel and workshare
58 directive is enclosed between two markers, the OMP_* directive
59 and a corresponding OMP_RETURN_EXPR statement. */
61 struct omp_region GTY(())
63 /* The enclosing region. */
64 struct omp_region *outer;
66 /* First child region. */
67 struct omp_region *inner;
69 /* Next peer region. */
70 struct omp_region *next;
72 /* Entry point to this region. */
75 /* Exit label from this region. */
81 /* True if this is a combined parallel+workshare region. */
82 bool is_combined_parallel;
84 /* If this is a combined parallel+workshare region, this is a list
85 of additional arguments needed by the combined parallel+workshare
91 /* Context structure. Used to store information about each parallel
92 directive in the code. */
94 typedef struct omp_context
96 /* This field must be at the beginning, as we do "inheritance": Some
97 callback functions for tree-inline.c (e.g., omp_copy_decl)
98 receive a copy_body_data pointer that is up-casted to an
99 omp_context pointer. */
102 /* The tree of contexts corresponding to the encountered constructs. */
103 struct omp_context *outer;
106 /* Map variables to fields in a structure that allows communication
107 between sending and receiving threads. */
108 splay_tree field_map;
113 /* A chain of variables to add to the top-level block surrounding the
114 construct. In the case of a parallel, this is in the child function. */
117 /* What to do with variables with implicitly determined sharing
119 enum omp_clause_default_kind default_kind;
121 /* Nesting depth of this context. Used to beautify error messages re
122 invalid gotos. The outermost ctx is depth 1, with depth 0 being
123 reserved for the main body of the function. */
126 /* True if this parallel directive is nested within another. */
131 /* A structure describing the main elements of a parallel loop. */
135 tree v, n1, n2, step, chunk_size, for_stmt;
136 enum tree_code cond_code;
138 bool have_nowait, have_ordered;
139 enum omp_clause_schedule_kind sched_kind;
143 static splay_tree all_contexts;
144 static int parallel_nesting_level;
145 static splay_tree omp_regions;
146 static struct omp_region *root_omp_region;
148 static void scan_omp (tree *, omp_context *);
149 static void lower_omp (tree *, omp_context *);
151 /* Find an OpenMP clause of type KIND within CLAUSES. */
154 find_omp_clause (tree clauses, enum tree_code kind)
156 for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses))
157 if (OMP_CLAUSE_CODE (clauses) == kind)
163 /* Return true if CTX is for an omp parallel. */
166 is_parallel_ctx (omp_context *ctx)
168 return TREE_CODE (ctx->stmt) == OMP_PARALLEL;
172 /* Return true if REGION is a combined parallel+workshare region. */
175 is_combined_parallel (struct omp_region *region)
177 return region->is_combined_parallel;
181 /* Extract the header elements of parallel loop FOR_STMT and store
185 extract_omp_for_data (tree for_stmt, struct omp_for_data *fd)
189 fd->for_stmt = for_stmt;
192 t = OMP_FOR_INIT (for_stmt);
193 gcc_assert (TREE_CODE (t) == MODIFY_EXPR);
194 fd->v = TREE_OPERAND (t, 0);
195 gcc_assert (DECL_P (fd->v));
196 gcc_assert (TREE_CODE (TREE_TYPE (fd->v)) == INTEGER_TYPE);
197 fd->n1 = TREE_OPERAND (t, 1);
199 t = OMP_FOR_COND (for_stmt);
200 fd->cond_code = TREE_CODE (t);
201 gcc_assert (TREE_OPERAND (t, 0) == fd->v);
202 fd->n2 = TREE_OPERAND (t, 1);
203 switch (fd->cond_code)
209 fd->n2 = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->n2), fd->n2,
210 build_int_cst (TREE_TYPE (fd->n2), 1));
211 fd->cond_code = LT_EXPR;
214 fd->n2 = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->n2), fd->n2,
215 build_int_cst (TREE_TYPE (fd->n2), 1));
216 fd->cond_code = GT_EXPR;
222 t = OMP_FOR_INCR (fd->for_stmt);
223 gcc_assert (TREE_CODE (t) == MODIFY_EXPR);
224 gcc_assert (TREE_OPERAND (t, 0) == fd->v);
225 t = TREE_OPERAND (t, 1);
226 gcc_assert (TREE_OPERAND (t, 0) == fd->v);
227 switch (TREE_CODE (t))
230 fd->step = TREE_OPERAND (t, 1);
233 fd->step = TREE_OPERAND (t, 1);
234 fd->step = fold_build1 (NEGATE_EXPR, TREE_TYPE (fd->step), fd->step);
240 fd->have_nowait = fd->have_ordered = false;
241 fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
242 fd->chunk_size = NULL_TREE;
244 for (t = OMP_FOR_CLAUSES (for_stmt); t ; t = OMP_CLAUSE_CHAIN (t))
245 switch (OMP_CLAUSE_CODE (t))
247 case OMP_CLAUSE_NOWAIT:
248 fd->have_nowait = true;
250 case OMP_CLAUSE_ORDERED:
251 fd->have_ordered = true;
253 case OMP_CLAUSE_SCHEDULE:
254 fd->sched_kind = OMP_CLAUSE_SCHEDULE_KIND (t);
255 fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t);
261 if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME)
262 gcc_assert (fd->chunk_size == NULL);
263 else if (fd->chunk_size == NULL)
265 /* We only need to compute a default chunk size for ordered
266 static loops and dynamic loops. */
267 if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC || fd->have_ordered)
268 fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
269 ? integer_zero_node : integer_one_node;
274 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
275 is the immediate dominator of PAR_ENTRY_BB, return true if there
276 are no data dependencies that would prevent expanding the parallel
277 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
279 When expanding a combined parallel+workshare region, the call to
280 the child function may need additional arguments in the case of
281 OMP_FOR regions. In some cases, these arguments are computed out
282 of variables passed in from the parent to the child via 'struct
283 .omp_data_s'. For instance:
285 #pragma omp parallel for schedule (guided, i * 4)
290 # BLOCK 2 (PAR_ENTRY_BB)
292 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
294 # BLOCK 3 (WS_ENTRY_BB)
295 .omp_data_i = &.omp_data_o;
296 D.1667 = .omp_data_i->i;
298 #pragma omp for schedule (guided, D.1598)
300 When we outline the parallel region, the call to the child function
301 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
302 that value is computed *after* the call site. So, in principle we
303 cannot do the transformation.
305 To see whether the code in WS_ENTRY_BB blocks the combined
306 parallel+workshare call, we collect all the variables used in the
307 OMP_FOR header check whether they appear on the LHS of any
308 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
311 FIXME. If we had the SSA form built at this point, we could merely
312 hoist the code in block 3 into block 2 and be done with it. But at
313 this point we don't have dataflow information and though we could
314 hack something up here, it is really not worth the aggravation. */
317 workshare_safe_to_combine_p (basic_block par_entry_bb, basic_block ws_entry_bb)
319 struct omp_for_data fd;
320 tree par_stmt, ws_stmt;
322 par_stmt = last_stmt (par_entry_bb);
323 ws_stmt = last_stmt (ws_entry_bb);
325 if (TREE_CODE (ws_stmt) == OMP_SECTIONS)
328 gcc_assert (TREE_CODE (ws_stmt) == OMP_FOR);
330 extract_omp_for_data (ws_stmt, &fd);
332 /* FIXME. We give up too easily here. If any of these arguments
333 are not constants, they will likely involve variables that have
334 been mapped into fields of .omp_data_s for sharing with the child
335 function. With appropriate data flow, it would be possible to
337 if (!is_gimple_min_invariant (fd.n1)
338 || !is_gimple_min_invariant (fd.n2)
339 || !is_gimple_min_invariant (fd.step)
340 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
347 /* Collect additional arguments needed to emit a combined
348 parallel+workshare call. WS_STMT is the workshare directive being
352 get_ws_args_for (tree ws_stmt)
356 if (TREE_CODE (ws_stmt) == OMP_FOR)
358 struct omp_for_data fd;
361 extract_omp_for_data (ws_stmt, &fd);
366 t = fold_convert (long_integer_type_node, fd.chunk_size);
367 ws_args = tree_cons (NULL, t, ws_args);
370 t = fold_convert (long_integer_type_node, fd.step);
371 ws_args = tree_cons (NULL, t, ws_args);
373 t = fold_convert (long_integer_type_node, fd.n2);
374 ws_args = tree_cons (NULL, t, ws_args);
376 t = fold_convert (long_integer_type_node, fd.n1);
377 ws_args = tree_cons (NULL, t, ws_args);
381 else if (TREE_CODE (ws_stmt) == OMP_SECTIONS)
383 basic_block bb = bb_for_stmt (ws_stmt);
384 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs));
385 t = tree_cons (NULL, t, NULL);
393 /* Discover whether REGION is a combined parallel+workshare region. */
396 determine_parallel_type (struct omp_region *region)
398 basic_block par_entry_bb, par_exit_bb;
399 basic_block ws_entry_bb, ws_exit_bb;
401 if (region == NULL || region->inner == NULL)
404 /* We only support parallel+for and parallel+sections. */
405 if (TREE_CODE (region->entry) != OMP_PARALLEL
406 || (TREE_CODE (region->inner->entry) != OMP_FOR
407 && TREE_CODE (region->inner->entry) != OMP_SECTIONS))
410 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
411 WS_EXIT_BB -> PAR_EXIT_BB. */
412 par_entry_bb = bb_for_stmt (region->entry);
413 par_exit_bb = bb_for_stmt (region->exit);
415 ws_entry_bb = bb_for_stmt (region->inner->entry);
416 ws_exit_bb = bb_for_stmt (region->inner->exit);
418 if (single_succ (par_entry_bb) == ws_entry_bb
419 && single_succ (ws_exit_bb) == par_exit_bb
420 && workshare_safe_to_combine_p (par_entry_bb, ws_entry_bb))
422 if (TREE_CODE (region->inner->entry) == OMP_FOR)
424 /* If this is a combined parallel loop, we need to determine
425 whether or not to use the combined library calls. There
426 are two cases where we do not apply the transformation:
427 static loops and any kind of ordered loop. In the first
428 case, we already open code the loop so there is no need
429 to do anything else. In the latter case, the combined
430 parallel loop call would still need extra synchronization
431 to implement ordered semantics, so there would not be any
432 gain in using the combined call. */
433 tree clauses = OMP_FOR_CLAUSES (region->inner->entry);
434 tree c = find_omp_clause (clauses, OMP_CLAUSE_SCHEDULE);
436 || OMP_CLAUSE_SCHEDULE_KIND (c) == OMP_CLAUSE_SCHEDULE_STATIC
437 || find_omp_clause (clauses, OMP_CLAUSE_ORDERED))
439 region->is_combined_parallel = false;
440 region->inner->is_combined_parallel = false;
445 region->is_combined_parallel = true;
446 region->inner->is_combined_parallel = true;
447 region->ws_args = get_ws_args_for (region->inner->entry);
452 /* Return true if EXPR is variable sized. */
455 is_variable_sized (tree expr)
457 return !TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
460 /* Return true if DECL is a reference type. */
463 is_reference (tree decl)
465 return lang_hooks.decls.omp_privatize_by_reference (decl);
468 /* Lookup variables in the decl or field splay trees. The "maybe" form
469 allows for the variable form to not have been entered, otherwise we
470 assert that the variable must have been entered. */
473 lookup_decl (tree var, omp_context *ctx)
476 n = splay_tree_lookup (ctx->cb.decl_map, (splay_tree_key) var);
477 return (tree) n->value;
481 maybe_lookup_decl (tree var, omp_context *ctx)
484 n = splay_tree_lookup (ctx->cb.decl_map, (splay_tree_key) var);
485 return n ? (tree) n->value : NULL_TREE;
489 lookup_field (tree var, omp_context *ctx)
492 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) var);
493 return (tree) n->value;
497 maybe_lookup_field (tree var, omp_context *ctx)
500 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) var);
501 return n ? (tree) n->value : NULL_TREE;
504 /* Return true if DECL should be copied by pointer. SHARED_P is true
505 if DECL is to be shared. */
508 use_pointer_for_field (tree decl, bool shared_p)
510 if (AGGREGATE_TYPE_P (TREE_TYPE (decl)))
513 /* We can only use copy-in/copy-out semantics for shared varibles
514 when we know the value is not accessible from an outer scope. */
517 /* ??? Trivially accessible from anywhere. But why would we even
518 be passing an address in this case? Should we simply assert
519 this to be false, or should we have a cleanup pass that removes
520 these from the list of mappings? */
521 if (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
524 /* For variables with DECL_HAS_VALUE_EXPR_P set, we cannot tell
525 without analyzing the expression whether or not its location
526 is accessible to anyone else. In the case of nested parallel
527 regions it certainly may be. */
528 if (DECL_HAS_VALUE_EXPR_P (decl))
531 /* Do not use copy-in/copy-out for variables that have their
533 if (TREE_ADDRESSABLE (decl))
540 /* Construct a new automatic decl similar to VAR. */
543 omp_copy_decl_2 (tree var, tree name, tree type, omp_context *ctx)
545 tree copy = build_decl (VAR_DECL, name, type);
547 TREE_ADDRESSABLE (copy) = TREE_ADDRESSABLE (var);
548 DECL_COMPLEX_GIMPLE_REG_P (copy) = DECL_COMPLEX_GIMPLE_REG_P (var);
549 DECL_ARTIFICIAL (copy) = DECL_ARTIFICIAL (var);
550 DECL_IGNORED_P (copy) = DECL_IGNORED_P (var);
551 TREE_USED (copy) = 1;
552 DECL_CONTEXT (copy) = current_function_decl;
553 DECL_SEEN_IN_BIND_EXPR_P (copy) = 1;
555 TREE_CHAIN (copy) = ctx->block_vars;
556 ctx->block_vars = copy;
562 omp_copy_decl_1 (tree var, omp_context *ctx)
564 return omp_copy_decl_2 (var, DECL_NAME (var), TREE_TYPE (var), ctx);
567 /* Build tree nodes to access the field for VAR on the receiver side. */
570 build_receiver_ref (tree var, bool by_ref, omp_context *ctx)
572 tree x, field = lookup_field (var, ctx);
574 /* If the receiver record type was remapped in the child function,
575 remap the field into the new record type. */
576 x = maybe_lookup_field (field, ctx);
580 x = build_fold_indirect_ref (ctx->receiver_decl);
581 x = build3 (COMPONENT_REF, TREE_TYPE (field), x, field, NULL);
583 x = build_fold_indirect_ref (x);
588 /* Build tree nodes to access VAR in the scope outer to CTX. In the case
589 of a parallel, this is a component reference; for workshare constructs
590 this is some variable. */
593 build_outer_var_ref (tree var, omp_context *ctx)
597 if (is_global_var (var))
599 else if (is_variable_sized (var))
601 x = TREE_OPERAND (DECL_VALUE_EXPR (var), 0);
602 x = build_outer_var_ref (x, ctx);
603 x = build_fold_indirect_ref (x);
605 else if (is_parallel_ctx (ctx))
607 bool by_ref = use_pointer_for_field (var, false);
608 x = build_receiver_ref (var, by_ref, ctx);
611 x = lookup_decl (var, ctx->outer);
615 if (is_reference (var))
616 x = build_fold_indirect_ref (x);
621 /* Build tree nodes to access the field for VAR on the sender side. */
624 build_sender_ref (tree var, omp_context *ctx)
626 tree field = lookup_field (var, ctx);
627 return build3 (COMPONENT_REF, TREE_TYPE (field),
628 ctx->sender_decl, field, NULL);
631 /* Add a new field for VAR inside the structure CTX->SENDER_DECL. */
634 install_var_field (tree var, bool by_ref, omp_context *ctx)
638 gcc_assert (!splay_tree_lookup (ctx->field_map, (splay_tree_key) var));
640 type = TREE_TYPE (var);
642 type = build_pointer_type (type);
644 field = build_decl (FIELD_DECL, DECL_NAME (var), type);
646 /* Remember what variable this field was created for. This does have a
647 side effect of making dwarf2out ignore this member, so for helpful
648 debugging we clear it later in delete_omp_context. */
649 DECL_ABSTRACT_ORIGIN (field) = var;
651 insert_field_into_struct (ctx->record_type, field);
653 splay_tree_insert (ctx->field_map, (splay_tree_key) var,
654 (splay_tree_value) field);
658 install_var_local (tree var, omp_context *ctx)
660 tree new_var = omp_copy_decl_1 (var, ctx);
661 insert_decl_map (&ctx->cb, var, new_var);
665 /* Adjust the replacement for DECL in CTX for the new context. This means
666 copying the DECL_VALUE_EXPR, and fixing up the type. */
669 fixup_remapped_decl (tree decl, omp_context *ctx, bool private_debug)
673 new_decl = lookup_decl (decl, ctx);
675 TREE_TYPE (new_decl) = remap_type (TREE_TYPE (decl), &ctx->cb);
677 if ((!TREE_CONSTANT (DECL_SIZE (new_decl)) || private_debug)
678 && DECL_HAS_VALUE_EXPR_P (decl))
680 tree ve = DECL_VALUE_EXPR (decl);
681 walk_tree (&ve, copy_body_r, &ctx->cb, NULL);
682 SET_DECL_VALUE_EXPR (new_decl, ve);
683 DECL_HAS_VALUE_EXPR_P (new_decl) = 1;
686 if (!TREE_CONSTANT (DECL_SIZE (new_decl)))
688 size = remap_decl (DECL_SIZE (decl), &ctx->cb);
689 if (size == error_mark_node)
690 size = TYPE_SIZE (TREE_TYPE (new_decl));
691 DECL_SIZE (new_decl) = size;
693 size = remap_decl (DECL_SIZE_UNIT (decl), &ctx->cb);
694 if (size == error_mark_node)
695 size = TYPE_SIZE_UNIT (TREE_TYPE (new_decl));
696 DECL_SIZE_UNIT (new_decl) = size;
700 /* The callback for remap_decl. Search all containing contexts for a
701 mapping of the variable; this avoids having to duplicate the splay
702 tree ahead of time. We know a mapping doesn't already exist in the
703 given context. Create new mappings to implement default semantics. */
706 omp_copy_decl (tree var, copy_body_data *cb)
708 omp_context *ctx = (omp_context *) cb;
711 if (is_global_var (var) || decl_function_context (var) != ctx->cb.src_fn)
714 if (TREE_CODE (var) == LABEL_DECL)
716 new_var = create_artificial_label ();
717 DECL_CONTEXT (new_var) = current_function_decl;
718 insert_decl_map (&ctx->cb, var, new_var);
722 while (!is_parallel_ctx (ctx))
727 new_var = maybe_lookup_decl (var, ctx);
732 return error_mark_node;
736 /* Return the parallel region associated with STMT. */
738 static inline struct omp_region *
739 lookup_omp_region (tree stmt)
741 splay_tree_node n = splay_tree_lookup (omp_regions, (splay_tree_key) stmt);
742 return n ? (struct omp_region *) n->value : NULL;
746 /* Debugging dumps for parallel regions. */
747 void dump_omp_region (FILE *, struct omp_region *, int);
748 void debug_omp_region (struct omp_region *);
749 void debug_all_omp_regions (void);
751 /* Dump the parallel region tree rooted at REGION. */
754 dump_omp_region (FILE *file, struct omp_region *region, int indent)
756 fprintf (file, "%*s", indent, "");
757 print_generic_stmt (file, region->entry, TDF_SLIM);
760 dump_omp_region (file, region->inner, indent + 4);
762 fprintf (file, "%*s", indent, "");
764 print_generic_stmt (file, region->exit, 0);
766 fprintf (file, "[no exit marker]\n");
770 fprintf (file, "\n");
771 dump_omp_region (file, region->next, indent);
776 debug_omp_region (struct omp_region *region)
778 dump_omp_region (stderr, region, 0);
782 debug_all_omp_regions (void)
784 dump_omp_region (stderr, root_omp_region, 0);
788 /* Create a new parallel region starting at STMT inside region PARENT. */
790 static struct omp_region *
791 new_omp_region (tree stmt, struct omp_region *parent)
793 struct omp_region *region = ggc_alloc_cleared (sizeof (*region));
796 region->outer = parent;
797 region->entry = stmt;
802 /* This is a nested region. Add it to the list of inner
803 regions in PARENT. */
804 region->next = parent->inner;
805 parent->inner = region;
807 else if (omp_regions)
809 /* This is a toplevel region. Add it to the list of toplevel
810 regions in ROOT_OMP_REGION. */
811 region->next = root_omp_region;
812 root_omp_region = region;
816 /* Create a new root region with the first region we find. */
817 root_omp_region = region;
818 omp_regions = splay_tree_new (splay_tree_compare_pointers, 0, 0);
821 splay_tree_insert (omp_regions, (splay_tree_key) stmt,
822 (splay_tree_value) region);
828 /* Create a new context, with OUTER_CTX being the surrounding context. */
831 new_omp_context (tree stmt, omp_context *outer_ctx)
833 omp_context *ctx = XCNEW (omp_context);
835 splay_tree_insert (all_contexts, (splay_tree_key) stmt,
836 (splay_tree_value) ctx);
841 ctx->outer = outer_ctx;
842 ctx->cb = outer_ctx->cb;
843 ctx->cb.block = NULL;
844 ctx->depth = outer_ctx->depth + 1;
848 ctx->cb.src_fn = current_function_decl;
849 ctx->cb.dst_fn = current_function_decl;
850 ctx->cb.src_node = cgraph_node (current_function_decl);
851 ctx->cb.dst_node = ctx->cb.src_node;
852 ctx->cb.src_cfun = cfun;
853 ctx->cb.copy_decl = omp_copy_decl;
854 ctx->cb.eh_region = -1;
855 ctx->cb.transform_call_graph_edges = CB_CGE_MOVE;
859 ctx->cb.decl_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
864 /* Destroy a omp_context data structures. Called through the splay tree
865 value delete callback. */
868 delete_omp_context (splay_tree_value value)
870 omp_context *ctx = (omp_context *) value;
872 splay_tree_delete (ctx->cb.decl_map);
875 splay_tree_delete (ctx->field_map);
877 /* We hijacked DECL_ABSTRACT_ORIGIN earlier. We need to clear it before
878 it produces corrupt debug information. */
879 if (ctx->record_type)
882 for (t = TYPE_FIELDS (ctx->record_type); t ; t = TREE_CHAIN (t))
883 DECL_ABSTRACT_ORIGIN (t) = NULL;
889 /* Fix up RECEIVER_DECL with a type that has been remapped to the child
893 fixup_child_record_type (omp_context *ctx)
895 tree f, type = ctx->record_type;
897 /* ??? It isn't sufficient to just call remap_type here, because
898 variably_modified_type_p doesn't work the way we expect for
899 record types. Testing each field for whether it needs remapping
900 and creating a new record by hand works, however. */
901 for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
902 if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn))
906 tree name, new_fields = NULL;
908 type = lang_hooks.types.make_type (RECORD_TYPE);
909 name = DECL_NAME (TYPE_NAME (ctx->record_type));
910 name = build_decl (TYPE_DECL, name, type);
911 TYPE_NAME (type) = name;
913 for (f = TYPE_FIELDS (ctx->record_type); f ; f = TREE_CHAIN (f))
915 tree new_f = copy_node (f);
916 DECL_CONTEXT (new_f) = type;
917 TREE_TYPE (new_f) = remap_type (TREE_TYPE (f), &ctx->cb);
918 TREE_CHAIN (new_f) = new_fields;
921 /* Arrange to be able to look up the receiver field
922 given the sender field. */
923 splay_tree_insert (ctx->field_map, (splay_tree_key) f,
924 (splay_tree_value) new_f);
926 TYPE_FIELDS (type) = nreverse (new_fields);
930 TREE_TYPE (ctx->receiver_decl) = build_pointer_type (type);
933 /* Instantiate decls as necessary in CTX to satisfy the data sharing
934 specified by CLAUSES. */
937 scan_sharing_clauses (tree clauses, omp_context *ctx)
940 bool scan_array_reductions = false;
942 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
946 switch (OMP_CLAUSE_CODE (c))
948 case OMP_CLAUSE_PRIVATE:
949 decl = OMP_CLAUSE_DECL (c);
950 if (!is_variable_sized (decl))
951 install_var_local (decl, ctx);
954 case OMP_CLAUSE_SHARED:
955 gcc_assert (is_parallel_ctx (ctx));
956 decl = OMP_CLAUSE_DECL (c);
957 gcc_assert (!is_variable_sized (decl));
958 by_ref = use_pointer_for_field (decl, true);
959 if (! TREE_READONLY (decl)
960 || TREE_ADDRESSABLE (decl)
962 || is_reference (decl))
964 install_var_field (decl, by_ref, ctx);
965 install_var_local (decl, ctx);
968 /* We don't need to copy const scalar vars back. */
969 OMP_CLAUSE_SET_CODE (c, OMP_CLAUSE_FIRSTPRIVATE);
972 case OMP_CLAUSE_LASTPRIVATE:
973 /* Let the corresponding firstprivate clause create
975 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
979 case OMP_CLAUSE_FIRSTPRIVATE:
980 case OMP_CLAUSE_REDUCTION:
981 decl = OMP_CLAUSE_DECL (c);
983 if (is_variable_sized (decl))
985 else if (is_parallel_ctx (ctx))
987 by_ref = use_pointer_for_field (decl, false);
988 install_var_field (decl, by_ref, ctx);
990 install_var_local (decl, ctx);
993 case OMP_CLAUSE_COPYPRIVATE:
995 scan_omp (&OMP_CLAUSE_DECL (c), ctx->outer);
998 case OMP_CLAUSE_COPYIN:
999 decl = OMP_CLAUSE_DECL (c);
1000 by_ref = use_pointer_for_field (decl, false);
1001 install_var_field (decl, by_ref, ctx);
1004 case OMP_CLAUSE_DEFAULT:
1005 ctx->default_kind = OMP_CLAUSE_DEFAULT_KIND (c);
1009 case OMP_CLAUSE_NUM_THREADS:
1010 case OMP_CLAUSE_SCHEDULE:
1012 scan_omp (&OMP_CLAUSE_OPERAND (c, 0), ctx->outer);
1015 case OMP_CLAUSE_NOWAIT:
1016 case OMP_CLAUSE_ORDERED:
1024 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
1026 switch (OMP_CLAUSE_CODE (c))
1028 case OMP_CLAUSE_LASTPRIVATE:
1029 /* Let the corresponding firstprivate clause create
1031 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
1035 case OMP_CLAUSE_PRIVATE:
1036 case OMP_CLAUSE_FIRSTPRIVATE:
1037 case OMP_CLAUSE_REDUCTION:
1038 decl = OMP_CLAUSE_DECL (c);
1039 if (is_variable_sized (decl))
1040 install_var_local (decl, ctx);
1041 fixup_remapped_decl (decl, ctx,
1042 OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE
1043 && OMP_CLAUSE_PRIVATE_DEBUG (c));
1044 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
1045 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1046 scan_array_reductions = true;
1049 case OMP_CLAUSE_SHARED:
1050 decl = OMP_CLAUSE_DECL (c);
1051 fixup_remapped_decl (decl, ctx, false);
1054 case OMP_CLAUSE_COPYPRIVATE:
1055 case OMP_CLAUSE_COPYIN:
1056 case OMP_CLAUSE_DEFAULT:
1058 case OMP_CLAUSE_NUM_THREADS:
1059 case OMP_CLAUSE_SCHEDULE:
1060 case OMP_CLAUSE_NOWAIT:
1061 case OMP_CLAUSE_ORDERED:
1069 if (scan_array_reductions)
1070 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
1071 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
1072 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1074 scan_omp (&OMP_CLAUSE_REDUCTION_INIT (c), ctx);
1075 scan_omp (&OMP_CLAUSE_REDUCTION_MERGE (c), ctx);
1079 /* Create a new name for omp child function. Returns an identifier. */
1081 static GTY(()) unsigned int tmp_ompfn_id_num;
1084 create_omp_child_function_name (void)
1086 tree name = DECL_ASSEMBLER_NAME (current_function_decl);
1087 size_t len = IDENTIFIER_LENGTH (name);
1088 char *tmp_name, *prefix;
1090 prefix = alloca (len + sizeof ("_omp_fn"));
1091 memcpy (prefix, IDENTIFIER_POINTER (name), len);
1092 strcpy (prefix + len, "_omp_fn");
1093 #ifndef NO_DOT_IN_LABEL
1095 #elif !defined NO_DOLLAR_IN_LABEL
1098 ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, tmp_ompfn_id_num++);
1099 return get_identifier (tmp_name);
1102 /* Build a decl for the omp child function. It'll not contain a body
1103 yet, just the bare decl. */
1106 create_omp_child_function (omp_context *ctx)
1108 tree decl, type, name, t;
1110 name = create_omp_child_function_name ();
1111 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1113 decl = build_decl (FUNCTION_DECL, name, type);
1114 decl = lang_hooks.decls.pushdecl (decl);
1116 ctx->cb.dst_fn = decl;
1118 TREE_STATIC (decl) = 1;
1119 TREE_USED (decl) = 1;
1120 DECL_ARTIFICIAL (decl) = 1;
1121 DECL_IGNORED_P (decl) = 0;
1122 TREE_PUBLIC (decl) = 0;
1123 DECL_UNINLINABLE (decl) = 1;
1124 DECL_EXTERNAL (decl) = 0;
1125 DECL_CONTEXT (decl) = NULL_TREE;
1126 DECL_INITIAL (decl) = make_node (BLOCK);
1128 t = build_decl (RESULT_DECL, NULL_TREE, void_type_node);
1129 DECL_ARTIFICIAL (t) = 1;
1130 DECL_IGNORED_P (t) = 1;
1131 DECL_RESULT (decl) = t;
1133 t = build_decl (PARM_DECL, get_identifier (".omp_data_i"), ptr_type_node);
1134 DECL_ARTIFICIAL (t) = 1;
1135 DECL_ARG_TYPE (t) = ptr_type_node;
1136 DECL_CONTEXT (t) = current_function_decl;
1138 DECL_ARGUMENTS (decl) = t;
1139 ctx->receiver_decl = t;
1141 /* Allocate memory for the function structure. The call to
1142 allocate_struct_function clobbers CFUN, so we need to restore
1144 allocate_struct_function (decl);
1145 DECL_SOURCE_LOCATION (decl) = EXPR_LOCATION (ctx->stmt);
1146 cfun->function_end_locus = EXPR_LOCATION (ctx->stmt);
1147 cfun = ctx->cb.src_cfun;
1151 /* Scan an OpenMP parallel directive. */
1154 scan_omp_parallel (tree *stmt_p, omp_context *outer_ctx)
1159 /* Ignore parallel directives with empty bodies, unless there
1160 are copyin clauses. */
1162 && empty_body_p (OMP_PARALLEL_BODY (*stmt_p))
1163 && find_omp_clause (OMP_CLAUSES (*stmt_p), OMP_CLAUSE_COPYIN) == NULL)
1165 *stmt_p = build_empty_stmt ();
1169 ctx = new_omp_context (*stmt_p, outer_ctx);
1170 if (parallel_nesting_level > 1)
1171 ctx->is_nested = true;
1172 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
1173 ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED;
1174 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
1175 name = create_tmp_var_name (".omp_data_s");
1176 name = build_decl (TYPE_DECL, name, ctx->record_type);
1177 TYPE_NAME (ctx->record_type) = name;
1178 create_omp_child_function (ctx);
1179 OMP_PARALLEL_FN (*stmt_p) = ctx->cb.dst_fn;
1181 scan_sharing_clauses (OMP_PARALLEL_CLAUSES (*stmt_p), ctx);
1182 scan_omp (&OMP_PARALLEL_BODY (*stmt_p), ctx);
1184 if (TYPE_FIELDS (ctx->record_type) == NULL)
1185 ctx->record_type = ctx->receiver_decl = NULL;
1188 layout_type (ctx->record_type);
1189 fixup_child_record_type (ctx);
1194 /* Scan an OpenMP loop directive. */
1197 scan_omp_for (tree *stmt_p, omp_context *outer_ctx)
1203 ctx = new_omp_context (stmt, outer_ctx);
1205 scan_sharing_clauses (OMP_FOR_CLAUSES (stmt), ctx);
1207 scan_omp (&OMP_FOR_PRE_BODY (stmt), ctx);
1208 scan_omp (&OMP_FOR_INIT (stmt), ctx);
1209 scan_omp (&OMP_FOR_COND (stmt), ctx);
1210 scan_omp (&OMP_FOR_INCR (stmt), ctx);
1211 scan_omp (&OMP_FOR_BODY (stmt), ctx);
1214 /* Scan an OpenMP sections directive. */
1217 scan_omp_sections (tree *stmt_p, omp_context *outer_ctx)
1223 ctx = new_omp_context (stmt, outer_ctx);
1224 scan_sharing_clauses (OMP_SECTIONS_CLAUSES (stmt), ctx);
1225 scan_omp (&OMP_SECTIONS_BODY (stmt), ctx);
1228 /* Scan an OpenMP single directive. */
1231 scan_omp_single (tree *stmt_p, omp_context *outer_ctx)
1233 tree stmt = *stmt_p;
1237 ctx = new_omp_context (stmt, outer_ctx);
1238 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
1239 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
1240 name = create_tmp_var_name (".omp_copy_s");
1241 name = build_decl (TYPE_DECL, name, ctx->record_type);
1242 TYPE_NAME (ctx->record_type) = name;
1244 scan_sharing_clauses (OMP_SINGLE_CLAUSES (stmt), ctx);
1245 scan_omp (&OMP_SINGLE_BODY (stmt), ctx);
1247 if (TYPE_FIELDS (ctx->record_type) == NULL)
1248 ctx->record_type = NULL;
1250 layout_type (ctx->record_type);
1254 /* Callback for walk_stmts used to scan for OpenMP directives at TP. */
1257 scan_omp_1 (tree *tp, int *walk_subtrees, void *data)
1259 struct walk_stmt_info *wi = data;
1260 omp_context *ctx = wi->info;
1263 if (EXPR_HAS_LOCATION (t))
1264 input_location = EXPR_LOCATION (t);
1267 switch (TREE_CODE (t))
1270 parallel_nesting_level++;
1271 scan_omp_parallel (tp, ctx);
1272 parallel_nesting_level--;
1276 scan_omp_for (tp, ctx);
1280 scan_omp_sections (tp, ctx);
1284 scan_omp_single (tp, ctx);
1291 ctx = new_omp_context (*tp, ctx);
1292 scan_omp (&OMP_BODY (*tp), ctx);
1300 for (var = BIND_EXPR_VARS (t); var ; var = TREE_CHAIN (var))
1301 insert_decl_map (&ctx->cb, var, var);
1309 *tp = remap_decl (t, &ctx->cb);
1313 if (ctx && TYPE_P (t))
1314 *tp = remap_type (t, &ctx->cb);
1315 else if (!DECL_P (t))
1324 /* Scan all the statements starting at STMT_P. CTX contains context
1325 information about the OpenMP directives and clauses found during
1329 scan_omp (tree *stmt_p, omp_context *ctx)
1331 location_t saved_location;
1332 struct walk_stmt_info wi;
1334 memset (&wi, 0, sizeof (wi));
1335 wi.callback = scan_omp_1;
1337 wi.want_bind_expr = (ctx != NULL);
1338 wi.want_locations = true;
1340 saved_location = input_location;
1341 walk_stmts (&wi, stmt_p);
1342 input_location = saved_location;
1345 /* Re-gimplification and code generation routines. */
1347 /* Build a call to GOMP_barrier. */
1350 build_omp_barrier (tree *stmt_list)
1354 t = built_in_decls[BUILT_IN_GOMP_BARRIER];
1355 t = build_function_call_expr (t, NULL);
1356 gimplify_and_add (t, stmt_list);
1359 /* If a context was created for STMT when it was scanned, return it. */
1361 static omp_context *
1362 maybe_lookup_ctx (tree stmt)
1365 n = splay_tree_lookup (all_contexts, (splay_tree_key) stmt);
1366 return n ? (omp_context *) n->value : NULL;
1370 /* Find the mapping for DECL in CTX or the immediately enclosing
1371 context that has a mapping for DECL.
1373 If CTX is a nested parallel directive, we may have to use the decl
1374 mappings created in CTX's parent context. Suppose that we have the
1375 following parallel nesting (variable UIDs showed for clarity):
1378 #omp parallel shared(iD.1562) -> outer parallel
1379 iD.1562 = iD.1562 + 1;
1381 #omp parallel shared (iD.1562) -> inner parallel
1382 iD.1562 = iD.1562 - 1;
1384 Each parallel structure will create a distinct .omp_data_s structure
1385 for copying iD.1562 in/out of the directive:
1387 outer parallel .omp_data_s.1.i -> iD.1562
1388 inner parallel .omp_data_s.2.i -> iD.1562
1390 A shared variable mapping will produce a copy-out operation before
1391 the parallel directive and a copy-in operation after it. So, in
1392 this case we would have:
1395 .omp_data_o.1.i = iD.1562;
1396 #omp parallel shared(iD.1562) -> outer parallel
1397 .omp_data_i.1 = &.omp_data_o.1
1398 .omp_data_i.1->i = .omp_data_i.1->i + 1;
1400 .omp_data_o.2.i = iD.1562; -> **
1401 #omp parallel shared(iD.1562) -> inner parallel
1402 .omp_data_i.2 = &.omp_data_o.2
1403 .omp_data_i.2->i = .omp_data_i.2->i - 1;
1406 ** This is a problem. The symbol iD.1562 cannot be referenced
1407 inside the body of the outer parallel region. But since we are
1408 emitting this copy operation while expanding the inner parallel
1409 directive, we need to access the CTX structure of the outer
1410 parallel directive to get the correct mapping:
1412 .omp_data_o.2.i = .omp_data_i.1->i
1414 Since there may be other workshare or parallel directives enclosing
1415 the parallel directive, it may be necessary to walk up the context
1416 parent chain. This is not a problem in general because nested
1417 parallelism happens only rarely. */
1420 lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
1425 gcc_assert (ctx->is_nested);
1427 for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
1428 t = maybe_lookup_decl (decl, up);
1436 /* Construct the initialization value for reduction CLAUSE. */
1439 omp_reduction_init (tree clause, tree type)
1441 switch (OMP_CLAUSE_REDUCTION_CODE (clause))
1448 case TRUTH_ORIF_EXPR:
1449 case TRUTH_XOR_EXPR:
1451 return fold_convert (type, integer_zero_node);
1454 case TRUTH_AND_EXPR:
1455 case TRUTH_ANDIF_EXPR:
1457 return fold_convert (type, integer_one_node);
1460 return fold_convert (type, integer_minus_one_node);
1463 if (SCALAR_FLOAT_TYPE_P (type))
1465 REAL_VALUE_TYPE max, min;
1466 if (HONOR_INFINITIES (TYPE_MODE (type)))
1469 real_arithmetic (&min, NEGATE_EXPR, &max, NULL);
1472 real_maxval (&min, 1, TYPE_MODE (type));
1473 return build_real (type, min);
1477 gcc_assert (INTEGRAL_TYPE_P (type));
1478 return TYPE_MIN_VALUE (type);
1482 if (SCALAR_FLOAT_TYPE_P (type))
1484 REAL_VALUE_TYPE max;
1485 if (HONOR_INFINITIES (TYPE_MODE (type)))
1488 real_maxval (&max, 0, TYPE_MODE (type));
1489 return build_real (type, max);
1493 gcc_assert (INTEGRAL_TYPE_P (type));
1494 return TYPE_MAX_VALUE (type);
1502 /* Generate code to implement the input clauses, FIRSTPRIVATE and COPYIN,
1503 from the receiver (aka child) side and initializers for REFERENCE_TYPE
1504 private variables. Initialization statements go in ILIST, while calls
1505 to destructors go in DLIST. */
1508 lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist,
1511 tree_stmt_iterator diter;
1512 tree c, dtor, copyin_seq, x, args, ptr;
1513 bool copyin_by_ref = false;
1516 *dlist = alloc_stmt_list ();
1517 diter = tsi_start (*dlist);
1520 /* Do all the fixed sized types in the first pass, and the variable sized
1521 types in the second pass. This makes sure that the scalar arguments to
1522 the variable sized types are processed before we use them in the
1523 variable sized operations. */
1524 for (pass = 0; pass < 2; ++pass)
1526 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1528 enum omp_clause_code c_kind = OMP_CLAUSE_CODE (c);
1534 case OMP_CLAUSE_PRIVATE:
1535 if (OMP_CLAUSE_PRIVATE_DEBUG (c))
1538 case OMP_CLAUSE_SHARED:
1539 case OMP_CLAUSE_FIRSTPRIVATE:
1540 case OMP_CLAUSE_LASTPRIVATE:
1541 case OMP_CLAUSE_COPYIN:
1542 case OMP_CLAUSE_REDUCTION:
1548 new_var = var = OMP_CLAUSE_DECL (c);
1549 if (c_kind != OMP_CLAUSE_COPYIN)
1550 new_var = lookup_decl (var, ctx);
1552 if (c_kind == OMP_CLAUSE_SHARED || c_kind == OMP_CLAUSE_COPYIN)
1557 else if (is_variable_sized (var))
1559 /* For variable sized types, we need to allocate the
1560 actual storage here. Call alloca and store the
1561 result in the pointer decl that we created elsewhere. */
1565 ptr = DECL_VALUE_EXPR (new_var);
1566 gcc_assert (TREE_CODE (ptr) == INDIRECT_REF);
1567 ptr = TREE_OPERAND (ptr, 0);
1568 gcc_assert (DECL_P (ptr));
1570 x = TYPE_SIZE_UNIT (TREE_TYPE (new_var));
1571 args = tree_cons (NULL, x, NULL);
1572 x = built_in_decls[BUILT_IN_ALLOCA];
1573 x = build_function_call_expr (x, args);
1574 x = fold_convert (TREE_TYPE (ptr), x);
1575 x = build2 (MODIFY_EXPR, void_type_node, ptr, x);
1576 gimplify_and_add (x, ilist);
1578 else if (is_reference (var))
1580 /* For references that are being privatized for Fortran,
1581 allocate new backing storage for the new pointer
1582 variable. This allows us to avoid changing all the
1583 code that expects a pointer to something that expects
1584 a direct variable. Note that this doesn't apply to
1585 C++, since reference types are disallowed in data
1586 sharing clauses there. */
1590 x = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_var)));
1591 if (TREE_CONSTANT (x))
1593 const char *name = NULL;
1594 if (DECL_NAME (var))
1595 name = IDENTIFIER_POINTER (DECL_NAME (new_var));
1597 x = create_tmp_var (TREE_TYPE (TREE_TYPE (new_var)), name);
1598 x = build_fold_addr_expr_with_type (x, TREE_TYPE (new_var));
1602 args = tree_cons (NULL, x, NULL);
1603 x = built_in_decls[BUILT_IN_ALLOCA];
1604 x = build_function_call_expr (x, args);
1605 x = fold_convert (TREE_TYPE (new_var), x);
1608 x = build2 (MODIFY_EXPR, void_type_node, new_var, x);
1609 gimplify_and_add (x, ilist);
1611 new_var = build_fold_indirect_ref (new_var);
1613 else if (c_kind == OMP_CLAUSE_REDUCTION
1614 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1622 switch (OMP_CLAUSE_CODE (c))
1624 case OMP_CLAUSE_SHARED:
1625 /* Set up the DECL_VALUE_EXPR for shared variables now. This
1626 needs to be delayed until after fixup_child_record_type so
1627 that we get the correct type during the dereference. */
1628 by_ref = use_pointer_for_field (var, true);
1629 x = build_receiver_ref (var, by_ref, ctx);
1630 SET_DECL_VALUE_EXPR (new_var, x);
1631 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
1633 /* ??? If VAR is not passed by reference, and the variable
1634 hasn't been initialized yet, then we'll get a warning for
1635 the store into the omp_data_s structure. Ideally, we'd be
1636 able to notice this and not store anything at all, but
1637 we're generating code too early. Suppress the warning. */
1639 TREE_NO_WARNING (var) = 1;
1642 case OMP_CLAUSE_LASTPRIVATE:
1643 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
1647 case OMP_CLAUSE_PRIVATE:
1648 x = lang_hooks.decls.omp_clause_default_ctor (c, new_var);
1650 gimplify_and_add (x, ilist);
1654 x = lang_hooks.decls.omp_clause_dtor (c, new_var);
1658 gimplify_stmt (&dtor);
1659 tsi_link_before (&diter, dtor, TSI_SAME_STMT);
1663 case OMP_CLAUSE_FIRSTPRIVATE:
1664 x = build_outer_var_ref (var, ctx);
1665 x = lang_hooks.decls.omp_clause_copy_ctor (c, new_var, x);
1666 gimplify_and_add (x, ilist);
1670 case OMP_CLAUSE_COPYIN:
1671 by_ref = use_pointer_for_field (var, false);
1672 x = build_receiver_ref (var, by_ref, ctx);
1673 x = lang_hooks.decls.omp_clause_assign_op (c, new_var, x);
1674 append_to_statement_list (x, ©in_seq);
1675 copyin_by_ref |= by_ref;
1678 case OMP_CLAUSE_REDUCTION:
1679 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1681 gimplify_and_add (OMP_CLAUSE_REDUCTION_INIT (c), ilist);
1682 OMP_CLAUSE_REDUCTION_INIT (c) = NULL;
1686 x = omp_reduction_init (c, TREE_TYPE (new_var));
1687 gcc_assert (TREE_CODE (TREE_TYPE (new_var)) != ARRAY_TYPE);
1688 x = build2 (MODIFY_EXPR, void_type_node, new_var, x);
1689 gimplify_and_add (x, ilist);
1699 /* The copyin sequence is not to be executed by the main thread, since
1700 that would result in self-copies. Perhaps not visible to scalars,
1701 but it certainly is to C++ operator=. */
1704 x = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
1705 x = build_function_call_expr (x, NULL);
1706 x = build2 (NE_EXPR, boolean_type_node, x,
1707 build_int_cst (TREE_TYPE (x), 0));
1708 x = build3 (COND_EXPR, void_type_node, x, copyin_seq, NULL);
1709 gimplify_and_add (x, ilist);
1712 /* If any copyin variable is passed by reference, we must ensure the
1713 master thread doesn't modify it before it is copied over in all
1716 build_omp_barrier (ilist);
1720 /* Generate code to implement the LASTPRIVATE clauses. This is used for
1721 both parallel and workshare constructs. PREDICATE may be NULL if it's
1725 lower_lastprivate_clauses (tree clauses, tree predicate, tree *stmt_list,
1728 tree sub_list, x, c;
1730 /* Early exit if there are no lastprivate clauses. */
1731 clauses = find_omp_clause (clauses, OMP_CLAUSE_LASTPRIVATE);
1732 if (clauses == NULL)
1734 /* If this was a workshare clause, see if it had been combined
1735 with its parallel. In that case, look for the clauses on the
1736 parallel statement itself. */
1737 if (is_parallel_ctx (ctx))
1741 if (ctx == NULL || !is_parallel_ctx (ctx))
1744 clauses = find_omp_clause (OMP_PARALLEL_CLAUSES (ctx->stmt),
1745 OMP_CLAUSE_LASTPRIVATE);
1746 if (clauses == NULL)
1750 sub_list = alloc_stmt_list ();
1752 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1756 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_LASTPRIVATE)
1759 var = OMP_CLAUSE_DECL (c);
1760 new_var = lookup_decl (var, ctx);
1762 x = build_outer_var_ref (var, ctx);
1763 if (is_reference (var))
1764 new_var = build_fold_indirect_ref (new_var);
1765 x = lang_hooks.decls.omp_clause_assign_op (c, x, new_var);
1766 append_to_statement_list (x, &sub_list);
1770 x = build3 (COND_EXPR, void_type_node, predicate, sub_list, NULL);
1774 gimplify_and_add (x, stmt_list);
1778 /* Generate code to implement the REDUCTION clauses. */
1781 lower_reduction_clauses (tree clauses, tree *stmt_list, omp_context *ctx)
1783 tree sub_list = NULL, x, c;
1786 /* First see if there is exactly one reduction clause. Use OMP_ATOMIC
1787 update in that case, otherwise use a lock. */
1788 for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c))
1789 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
1791 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1793 /* Never use OMP_ATOMIC for array reductions. */
1803 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1805 tree var, ref, new_var;
1806 enum tree_code code;
1808 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION)
1811 var = OMP_CLAUSE_DECL (c);
1812 new_var = lookup_decl (var, ctx);
1813 if (is_reference (var))
1814 new_var = build_fold_indirect_ref (new_var);
1815 ref = build_outer_var_ref (var, ctx);
1816 code = OMP_CLAUSE_REDUCTION_CODE (c);
1818 /* reduction(-:var) sums up the partial results, so it acts
1819 identically to reduction(+:var). */
1820 if (code == MINUS_EXPR)
1825 tree addr = build_fold_addr_expr (ref);
1827 addr = save_expr (addr);
1828 ref = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (addr)), addr);
1829 x = fold_build2 (code, TREE_TYPE (ref), ref, new_var);
1830 x = build2 (OMP_ATOMIC, void_type_node, addr, x);
1831 gimplify_and_add (x, stmt_list);
1835 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1837 tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
1839 if (is_reference (var))
1840 ref = build_fold_addr_expr (ref);
1841 SET_DECL_VALUE_EXPR (placeholder, ref);
1842 DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
1843 gimplify_and_add (OMP_CLAUSE_REDUCTION_MERGE (c), &sub_list);
1844 OMP_CLAUSE_REDUCTION_MERGE (c) = NULL;
1845 OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL;
1849 x = build2 (code, TREE_TYPE (ref), ref, new_var);
1850 ref = build_outer_var_ref (var, ctx);
1851 x = build2 (MODIFY_EXPR, void_type_node, ref, x);
1852 append_to_statement_list (x, &sub_list);
1856 x = built_in_decls[BUILT_IN_GOMP_ATOMIC_START];
1857 x = build_function_call_expr (x, NULL);
1858 gimplify_and_add (x, stmt_list);
1860 gimplify_and_add (sub_list, stmt_list);
1862 x = built_in_decls[BUILT_IN_GOMP_ATOMIC_END];
1863 x = build_function_call_expr (x, NULL);
1864 gimplify_and_add (x, stmt_list);
1868 /* Generate code to implement the COPYPRIVATE clauses. */
1871 lower_copyprivate_clauses (tree clauses, tree *slist, tree *rlist,
1876 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1881 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYPRIVATE)
1884 var = OMP_CLAUSE_DECL (c);
1885 by_ref = use_pointer_for_field (var, false);
1887 ref = build_sender_ref (var, ctx);
1888 x = (ctx->is_nested) ? lookup_decl_in_outer_ctx (var, ctx) : var;
1889 x = by_ref ? build_fold_addr_expr (x) : x;
1890 x = build2 (MODIFY_EXPR, void_type_node, ref, x);
1891 gimplify_and_add (x, slist);
1893 ref = build_receiver_ref (var, by_ref, ctx);
1894 if (is_reference (var))
1896 ref = build_fold_indirect_ref (ref);
1897 var = build_fold_indirect_ref (var);
1899 x = lang_hooks.decls.omp_clause_assign_op (c, var, ref);
1900 gimplify_and_add (x, rlist);
1905 /* Generate code to implement the clauses, FIRSTPRIVATE, COPYIN, LASTPRIVATE,
1906 and REDUCTION from the sender (aka parent) side. */
1909 lower_send_clauses (tree clauses, tree *ilist, tree *olist, omp_context *ctx)
1913 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1915 tree val, ref, x, var;
1916 bool by_ref, do_in = false, do_out = false;
1918 switch (OMP_CLAUSE_CODE (c))
1920 case OMP_CLAUSE_FIRSTPRIVATE:
1921 case OMP_CLAUSE_COPYIN:
1922 case OMP_CLAUSE_LASTPRIVATE:
1923 case OMP_CLAUSE_REDUCTION:
1929 var = val = OMP_CLAUSE_DECL (c);
1931 var = lookup_decl_in_outer_ctx (val, ctx);
1933 if (is_variable_sized (val))
1935 by_ref = use_pointer_for_field (val, false);
1937 switch (OMP_CLAUSE_CODE (c))
1939 case OMP_CLAUSE_FIRSTPRIVATE:
1940 case OMP_CLAUSE_COPYIN:
1944 case OMP_CLAUSE_LASTPRIVATE:
1945 if (by_ref || is_reference (val))
1947 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
1955 case OMP_CLAUSE_REDUCTION:
1957 do_out = !(by_ref || is_reference (val));
1966 ref = build_sender_ref (val, ctx);
1967 x = by_ref ? build_fold_addr_expr (var) : var;
1968 x = build2 (MODIFY_EXPR, void_type_node, ref, x);
1969 gimplify_and_add (x, ilist);
1974 ref = build_sender_ref (val, ctx);
1975 x = build2 (MODIFY_EXPR, void_type_node, var, ref);
1976 gimplify_and_add (x, olist);
1981 /* Generate code to implement SHARED from the sender (aka parent) side.
1982 This is trickier, since OMP_PARALLEL_CLAUSES doesn't list things that
1983 got automatically shared. */
1986 lower_send_shared_vars (tree *ilist, tree *olist, omp_context *ctx)
1988 tree var, ovar, nvar, f, x;
1990 if (ctx->record_type == NULL)
1993 for (f = TYPE_FIELDS (ctx->record_type); f ; f = TREE_CHAIN (f))
1995 ovar = DECL_ABSTRACT_ORIGIN (f);
1996 nvar = maybe_lookup_decl (ovar, ctx);
1997 if (!nvar || !DECL_HAS_VALUE_EXPR_P (nvar))
2002 /* If CTX is a nested parallel directive. Find the immediately
2003 enclosing parallel or workshare construct that contains a
2004 mapping for OVAR. */
2006 var = lookup_decl_in_outer_ctx (ovar, ctx);
2008 if (use_pointer_for_field (ovar, true))
2010 x = build_sender_ref (ovar, ctx);
2011 var = build_fold_addr_expr (var);
2012 x = build2 (MODIFY_EXPR, void_type_node, x, var);
2013 gimplify_and_add (x, ilist);
2017 x = build_sender_ref (ovar, ctx);
2018 x = build2 (MODIFY_EXPR, void_type_node, x, var);
2019 gimplify_and_add (x, ilist);
2021 x = build_sender_ref (ovar, ctx);
2022 x = build2 (MODIFY_EXPR, void_type_node, var, x);
2023 gimplify_and_add (x, olist);
2028 /* Build the function calls to GOMP_parallel_start etc to actually
2029 generate the parallel operation. REGION is the parallel region
2030 being expanded. BB is the block where to insert the code. WS_ARGS
2031 will be set if this is a call to a combined parallel+workshare
2032 construct, it contains the list of additional arguments needed by
2033 the workshare construct. */
2036 expand_parallel_call (struct omp_region *region, basic_block bb, tree ws_args)
2038 tree t, args, val, cond, c, list, clauses;
2039 block_stmt_iterator si;
2042 clauses = OMP_PARALLEL_CLAUSES (region->entry);
2043 push_gimplify_context ();
2045 /* Determine what flavor of GOMP_parallel_start we will be
2047 start_ix = BUILT_IN_GOMP_PARALLEL_START;
2048 if (is_combined_parallel (region))
2050 tree stmt = region->inner->entry;
2052 if (TREE_CODE (stmt) == OMP_FOR)
2054 struct omp_for_data fd;
2055 extract_omp_for_data (stmt, &fd);
2056 start_ix = BUILT_IN_GOMP_PARALLEL_LOOP_STATIC_START + fd.sched_kind;
2058 else if (TREE_CODE (stmt) == OMP_SECTIONS)
2059 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS_START;
2064 /* By default, the value of NUM_THREADS is zero (selected at run time)
2065 and there is no conditional. */
2067 val = build_int_cst (unsigned_type_node, 0);
2069 c = find_omp_clause (clauses, OMP_CLAUSE_IF);
2071 cond = OMP_CLAUSE_IF_EXPR (c);
2073 c = find_omp_clause (clauses, OMP_CLAUSE_NUM_THREADS);
2075 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
2077 /* Ensure 'val' is of the correct type. */
2078 val = fold_convert (unsigned_type_node, val);
2080 /* If we found the clause 'if (cond)', build either
2081 (cond != 0) or (cond ? val : 1u). */
2084 block_stmt_iterator si;
2086 cond = gimple_boolify (cond);
2088 if (integer_zerop (val))
2089 val = build2 (EQ_EXPR, unsigned_type_node, cond,
2090 build_int_cst (TREE_TYPE (cond), 0));
2093 basic_block cond_bb, then_bb, else_bb;
2095 tree t, then_lab, else_lab, tmp;
2097 tmp = create_tmp_var (TREE_TYPE (val), NULL);
2098 e = split_block (bb, NULL);
2103 then_bb = create_empty_bb (cond_bb);
2104 else_bb = create_empty_bb (then_bb);
2105 then_lab = create_artificial_label ();
2106 else_lab = create_artificial_label ();
2108 t = build3 (COND_EXPR, void_type_node,
2110 build_and_jump (&then_lab),
2111 build_and_jump (&else_lab));
2113 si = bsi_start (cond_bb);
2114 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2116 si = bsi_start (then_bb);
2117 t = build1 (LABEL_EXPR, void_type_node, then_lab);
2118 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2119 t = build2 (MODIFY_EXPR, void_type_node, tmp, val);
2120 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2122 si = bsi_start (else_bb);
2123 t = build1 (LABEL_EXPR, void_type_node, else_lab);
2124 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2125 t = build2 (MODIFY_EXPR, void_type_node, tmp,
2126 build_int_cst (unsigned_type_node, 1));
2127 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2129 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
2130 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
2131 make_edge (then_bb, bb, EDGE_FALLTHRU);
2132 make_edge (else_bb, bb, EDGE_FALLTHRU);
2138 val = get_formal_tmp_var (val, &list);
2139 si = bsi_start (bb);
2140 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2144 args = tree_cons (NULL, val, NULL);
2145 t = OMP_PARALLEL_DATA_ARG (region->entry);
2147 t = null_pointer_node;
2149 t = build_fold_addr_expr (t);
2150 args = tree_cons (NULL, t, args);
2151 t = build_fold_addr_expr (OMP_PARALLEL_FN (region->entry));
2152 args = tree_cons (NULL, t, args);
2155 args = chainon (args, ws_args);
2157 t = built_in_decls[start_ix];
2158 t = build_function_call_expr (t, args);
2159 gimplify_and_add (t, &list);
2161 t = OMP_PARALLEL_DATA_ARG (region->entry);
2163 t = null_pointer_node;
2165 t = build_fold_addr_expr (t);
2166 args = tree_cons (NULL, t, NULL);
2167 t = build_function_call_expr (OMP_PARALLEL_FN (region->entry), args);
2168 gimplify_and_add (t, &list);
2170 t = built_in_decls[BUILT_IN_GOMP_PARALLEL_END];
2171 t = build_function_call_expr (t, NULL);
2172 gimplify_and_add (t, &list);
2175 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2177 pop_gimplify_context (NULL_TREE);
2181 /* If exceptions are enabled, wrap *STMT_P in a MUST_NOT_THROW catch
2182 handler. This prevents programs from violating the structured
2183 block semantics with throws. */
2186 maybe_catch_exception (tree *stmt_p)
2190 if (!flag_exceptions)
2193 if (lang_protect_cleanup_actions)
2194 t = lang_protect_cleanup_actions ();
2197 t = built_in_decls[BUILT_IN_TRAP];
2198 t = build_function_call_expr (t, NULL);
2200 f = build2 (EH_FILTER_EXPR, void_type_node, NULL, NULL);
2201 EH_FILTER_MUST_NOT_THROW (f) = 1;
2202 gimplify_and_add (t, &EH_FILTER_FAILURE (f));
2204 t = build2 (TRY_CATCH_EXPR, void_type_node, *stmt_p, NULL);
2205 append_to_statement_list (f, &TREE_OPERAND (t, 1));
2208 append_to_statement_list (t, stmt_p);
2211 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
2214 list2chain (tree list)
2218 for (t = list; t; t = TREE_CHAIN (t))
2220 tree var = TREE_VALUE (t);
2222 TREE_CHAIN (var) = TREE_VALUE (TREE_CHAIN (t));
2224 TREE_CHAIN (var) = NULL_TREE;
2227 return list ? TREE_VALUE (list) : NULL_TREE;
2231 /* Remove barriers in REGION->EXIT's block. Note that this is only
2232 valid for OMP_PARALLEL regions. Since the end of a parallel region
2233 is an implicit barrier, any workshare inside the OMP_PARALLEL that
2234 left a barrier at the end of the OMP_PARALLEL region can now be
2238 remove_exit_barrier (struct omp_region *region)
2240 block_stmt_iterator si;
2241 basic_block exit_bb;
2244 gcc_assert (TREE_CODE (region->entry) == OMP_PARALLEL);
2246 exit_bb = bb_for_stmt (region->exit);
2248 /* The barrier should be immediately before OMP_RETURN_EXPR.
2249 Otherwise, we cannot remove it. */
2250 si = bsi_last (exit_bb);
2252 gcc_assert (TREE_CODE (t) == OMP_RETURN_EXPR);
2258 if (TREE_CODE (t) == CALL_EXPR
2259 && get_callee_fndecl (t) == built_in_decls[BUILT_IN_GOMP_BARRIER])
2260 bsi_remove (&si, true);
2264 /* Expand the OpenMP parallel directive starting at REGION. */
2267 expand_omp_parallel (struct omp_region *region)
2269 basic_block entry_bb, exit_bb, new_bb;
2270 struct function *child_cfun, *saved_cfun;
2271 tree child_fn, block, t, ws_args;
2272 block_stmt_iterator si;
2275 child_fn = OMP_PARALLEL_FN (region->entry);
2276 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
2279 entry_bb = bb_for_stmt (region->entry);
2280 exit_bb = bb_for_stmt (region->exit);
2282 if (is_combined_parallel (region))
2284 ws_args = region->ws_args;
2286 /* For combined parallel+workshare calls, barriers at the end of
2287 the function are not necessary and can be removed. Since the
2288 caller will have a barrier of its own, the workshare barrier is
2290 remove_exit_barrier (region);
2293 ws_args = NULL_TREE;
2295 if (DECL_STRUCT_FUNCTION (OMP_PARALLEL_FN (region->entry))->cfg)
2297 /* Due to inlining, it may happen that we have already outlined
2298 the region, in which case all we need to do is make the
2299 sub-graph unreachable and emit the parallel call. */
2300 edge entry_succ_e, exit_succ_e;
2301 block_stmt_iterator si;
2303 entry_succ_e = single_succ_edge (entry_bb);
2304 exit_succ_e = single_succ_edge (exit_bb);
2306 si = bsi_last (entry_bb);
2307 gcc_assert (!bsi_end_p (si) && TREE_CODE (bsi_stmt (si)) == OMP_PARALLEL);
2308 bsi_remove (&si, true);
2311 remove_edge (entry_succ_e);
2312 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
2316 /* If the parallel region needs data sent from the parent
2317 function, then the very first statement of the parallel body
2318 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
2319 &.OMP_DATA_O is passed as an argument to the child function,
2320 we need to replace it with the argument as seen by the child
2323 In most cases, this will end up being the identity assignment
2324 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
2325 a function call that has been inlined, the original PARM_DECL
2326 .OMP_DATA_I may have been converted into a different local
2327 variable. In which case, we need to keep the assignment. */
2328 if (OMP_PARALLEL_DATA_ARG (region->entry))
2330 basic_block entry_succ_bb = single_succ (entry_bb);
2331 block_stmt_iterator si = bsi_start (entry_succ_bb);
2334 gcc_assert (!bsi_end_p (si));
2336 stmt = bsi_stmt (si);
2337 gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR
2338 && TREE_CODE (TREE_OPERAND (stmt, 1)) == ADDR_EXPR
2339 && TREE_OPERAND (TREE_OPERAND (stmt, 1), 0)
2340 == OMP_PARALLEL_DATA_ARG (region->entry));
2342 if (TREE_OPERAND (stmt, 0) == DECL_ARGUMENTS (child_fn))
2343 bsi_remove (&si, true);
2345 TREE_OPERAND (stmt, 1) = DECL_ARGUMENTS (child_fn);
2348 /* Declare local variables needed in CHILD_CFUN. */
2349 block = DECL_INITIAL (child_fn);
2350 BLOCK_VARS (block) = list2chain (child_cfun->unexpanded_var_list);
2351 DECL_SAVED_TREE (child_fn) = single_succ (entry_bb)->stmt_list;
2353 /* Reset DECL_CONTEXT on locals and function arguments. */
2354 for (t = BLOCK_VARS (block); t; t = TREE_CHAIN (t))
2355 DECL_CONTEXT (t) = child_fn;
2357 for (t = DECL_ARGUMENTS (child_fn); t; t = TREE_CHAIN (t))
2358 DECL_CONTEXT (t) = child_fn;
2360 /* Split ENTRY_BB at OMP_PARALLEL so that it can be moved to the
2362 si = bsi_last (entry_bb);
2364 gcc_assert (t && TREE_CODE (t) == OMP_PARALLEL);
2365 bsi_remove (&si, true);
2366 e = split_block (entry_bb, t);
2368 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
2370 /* Move the parallel region into CHILD_CFUN. We need to reset
2371 dominance information because the expansion of the inner
2372 regions has invalidated it. */
2373 free_dominance_info (CDI_DOMINATORS);
2374 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb);
2375 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
2376 cgraph_add_new_function (child_fn);
2378 /* Convert OMP_RETURN into a RETURN_EXPR. */
2379 si = bsi_last (exit_bb);
2380 gcc_assert (!bsi_end_p (si)
2381 && TREE_CODE (bsi_stmt (si)) == OMP_RETURN_EXPR);
2382 t = build1 (RETURN_EXPR, void_type_node, NULL);
2383 bsi_insert_after (&si, t, TSI_SAME_STMT);
2384 bsi_remove (&si, true);
2387 /* Emit a library call to launch the children threads. */
2388 expand_parallel_call (region, new_bb, ws_args);
2392 /* A subroutine of expand_omp_for. Generate code for a parallel
2393 loop with any schedule. Given parameters:
2395 for (V = N1; V cond N2; V += STEP) BODY;
2397 where COND is "<" or ">", we generate pseudocode
2399 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2400 if (more) goto L0; else goto L3;
2407 if (V cond iend) goto L1; else goto L2;
2409 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2412 If this is a combined omp parallel loop, instead of the call to
2413 GOMP_loop_foo_start, we emit 'goto L3'. */
2416 expand_omp_for_generic (struct omp_region *region,
2417 struct omp_for_data *fd,
2418 enum built_in_function start_fn,
2419 enum built_in_function next_fn)
2421 tree l0, l1, l2, l3;
2422 tree type, istart0, iend0, iend;
2424 basic_block entry_bb, exit_bb, l0_bb, l1_bb, l2_bb;
2426 block_stmt_iterator si;
2427 bool in_combined_parallel = is_combined_parallel (region);
2429 type = TREE_TYPE (fd->v);
2431 istart0 = create_tmp_var (long_integer_type_node, ".istart0");
2432 iend0 = create_tmp_var (long_integer_type_node, ".iend0");
2433 TREE_ADDRESSABLE (istart0) = 1;
2434 TREE_ADDRESSABLE (iend0) = 1;
2436 l0 = create_artificial_label ();
2437 l1 = create_artificial_label ();
2438 l2 = create_artificial_label ();
2439 l3 = create_artificial_label ();
2440 iend = create_tmp_var (type, NULL);
2442 entry_bb = bb_for_stmt (region->entry);
2443 l1_bb = single_succ (entry_bb);
2444 exit_bb = bb_for_stmt (region->exit);
2446 si = bsi_last (entry_bb);
2447 gcc_assert (bsi_stmt (si) && TREE_CODE (bsi_stmt (si)) == OMP_FOR);
2448 bsi_remove (&si, true);
2449 list = alloc_stmt_list ();
2451 if (!in_combined_parallel)
2453 /* If this is not a combined parallel loop, emit a call to
2454 GOMP_loop_foo_start in ENTRY_BB. */
2455 t = build_fold_addr_expr (iend0);
2456 args = tree_cons (NULL, t, NULL);
2457 t = build_fold_addr_expr (istart0);
2458 args = tree_cons (NULL, t, args);
2461 t = fold_convert (long_integer_type_node, fd->chunk_size);
2462 args = tree_cons (NULL, t, args);
2464 t = fold_convert (long_integer_type_node, fd->step);
2465 args = tree_cons (NULL, t, args);
2466 t = fold_convert (long_integer_type_node, fd->n2);
2467 args = tree_cons (NULL, t, args);
2468 t = fold_convert (long_integer_type_node, fd->n1);
2469 args = tree_cons (NULL, t, args);
2470 t = build_function_call_expr (built_in_decls[start_fn], args);
2471 t = get_formal_tmp_var (t, &list);
2472 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l0),
2473 build_and_jump (&l3));
2474 append_to_statement_list (t, &list);
2475 si = bsi_last (entry_bb);
2476 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2479 /* Iteration setup for sequential loop goes in L0_BB. */
2480 list = alloc_stmt_list ();
2481 t = build1 (LABEL_EXPR, void_type_node, l0);
2482 gimplify_and_add (t, &list);
2484 t = fold_convert (type, istart0);
2485 t = build2 (MODIFY_EXPR, void_type_node, fd->v, t);
2486 gimplify_and_add (t, &list);
2488 t = fold_convert (type, iend0);
2489 t = build2 (MODIFY_EXPR, void_type_node, iend, t);
2490 gimplify_and_add (t, &list);
2492 l0_bb = create_empty_bb (entry_bb);
2493 si = bsi_start (l0_bb);
2494 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2496 /* Loop body goes in L1_BB. */
2497 list = alloc_stmt_list ();
2498 si = bsi_start (l1_bb);
2499 bsi_insert_before (&si, build1 (LABEL_EXPR, void_type_node, l1),
2500 BSI_CONTINUE_LINKING);
2502 /* Code to control the increment and predicate for the sequential
2503 loop goes in the first half of EXIT_BB (we split EXIT_BB so
2504 that we can inherit all the edges going out of the loop
2506 list = alloc_stmt_list ();
2508 t = build2 (PLUS_EXPR, type, fd->v, fd->step);
2509 t = build2 (MODIFY_EXPR, void_type_node, fd->v, t);
2510 gimplify_and_add (t, &list);
2512 t = build2 (fd->cond_code, boolean_type_node, fd->v, iend);
2513 t = get_formal_tmp_var (t, &list);
2514 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l1),
2515 build_and_jump (&l2));
2516 append_to_statement_list (t, &list);
2518 si = bsi_last (exit_bb);
2520 gcc_assert (t && TREE_CODE (t) == OMP_RETURN_EXPR);
2521 bsi_remove (&si, true);
2522 exit_edge = split_block (exit_bb, t);
2523 exit_edge->flags = EDGE_FALSE_VALUE;
2525 si = bsi_last (exit_bb);
2526 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2528 /* Emit code to get the next parallel iteration in L2_BB. */
2529 list = alloc_stmt_list ();
2530 t = build1 (LABEL_EXPR, void_type_node, l2);
2531 gimplify_and_add (t, &list);
2533 t = build_fold_addr_expr (iend0);
2534 args = tree_cons (NULL, t, NULL);
2535 t = build_fold_addr_expr (istart0);
2536 args = tree_cons (NULL, t, args);
2537 t = build_function_call_expr (built_in_decls[next_fn], args);
2538 t = get_formal_tmp_var (t, &list);
2539 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l0),
2540 build_and_jump (&l3));
2541 append_to_statement_list (t, &list);
2543 l2_bb = exit_edge->dest;
2544 si = bsi_start (l2_bb);
2545 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2547 /* Insert exit label on EXIT_EDGE. */
2548 exit_edge = single_succ_edge (l2_bb);
2549 t = build1 (LABEL_EXPR, void_type_node, l3);
2550 bsi_insert_on_edge_immediate (exit_edge, t);
2551 exit_edge->flags = EDGE_FALSE_VALUE;
2553 /* Connect the new blocks. */
2554 remove_edge (single_succ_edge (entry_bb));
2555 if (in_combined_parallel)
2556 make_edge (entry_bb, l2_bb, EDGE_FALLTHRU);
2559 make_edge (entry_bb, l0_bb, EDGE_TRUE_VALUE);
2560 make_edge (entry_bb, exit_edge->dest, EDGE_FALSE_VALUE);
2563 make_edge (l0_bb, l1_bb, EDGE_FALLTHRU);
2564 make_edge (exit_bb, l1_bb, EDGE_TRUE_VALUE);
2565 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
2567 return exit_edge->dest;
2571 /* A subroutine of expand_omp_for. Generate code for a parallel
2572 loop with static schedule and no specified chunk size. Given
2575 for (V = N1; V cond N2; V += STEP) BODY;
2577 where COND is "<" or ">", we generate pseudocode
2583 n = (adj + N2 - N1) / STEP;
2585 q += (q * nthreads != n);
2587 e0 = min(s0 + q, n);
2588 if (s0 >= e0) goto L2; else goto L0;
2595 if (V cond e) goto L1;
2600 expand_omp_for_static_nochunk (struct omp_region *region,
2601 struct omp_for_data *fd)
2603 tree l0, l1, l2, n, q, s0, e0, e, t, nthreads, threadid;
2604 tree type, utype, list;
2605 basic_block entry_bb, exit_bb, seq_start_bb, body_bb, new_exit_bb;
2606 block_stmt_iterator si;
2609 l0 = create_artificial_label ();
2610 l1 = create_artificial_label ();
2611 l2 = create_artificial_label ();
2613 type = TREE_TYPE (fd->v);
2614 utype = lang_hooks.types.unsigned_type (type);
2616 entry_bb = bb_for_stmt (region->entry);
2617 body_bb = single_succ (entry_bb);
2618 exit_bb = bb_for_stmt (region->exit);
2620 /* Iteration space partitioning goes in ENTRY_BB. */
2621 list = alloc_stmt_list ();
2622 t = built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS];
2623 t = build_function_call_expr (t, NULL);
2624 t = fold_convert (utype, t);
2625 nthreads = get_formal_tmp_var (t, &list);
2627 t = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
2628 t = build_function_call_expr (t, NULL);
2629 t = fold_convert (utype, t);
2630 threadid = get_formal_tmp_var (t, &list);
2632 fd->n1 = fold_convert (type, fd->n1);
2633 if (!is_gimple_val (fd->n1))
2634 fd->n1 = get_formal_tmp_var (fd->n1, &list);
2636 fd->n2 = fold_convert (type, fd->n2);
2637 if (!is_gimple_val (fd->n2))
2638 fd->n2 = get_formal_tmp_var (fd->n2, &list);
2640 fd->step = fold_convert (type, fd->step);
2641 if (!is_gimple_val (fd->step))
2642 fd->step = get_formal_tmp_var (fd->step, &list);
2644 t = build_int_cst (type, (fd->cond_code == LT_EXPR ? -1 : 1));
2645 t = fold_build2 (PLUS_EXPR, type, fd->step, t);
2646 t = fold_build2 (PLUS_EXPR, type, t, fd->n2);
2647 t = fold_build2 (MINUS_EXPR, type, t, fd->n1);
2648 t = fold_build2 (TRUNC_DIV_EXPR, type, t, fd->step);
2649 t = fold_convert (utype, t);
2650 if (is_gimple_val (t))
2653 n = get_formal_tmp_var (t, &list);
2655 t = build2 (TRUNC_DIV_EXPR, utype, n, nthreads);
2656 q = get_formal_tmp_var (t, &list);
2658 t = build2 (MULT_EXPR, utype, q, nthreads);
2659 t = build2 (NE_EXPR, utype, t, n);
2660 t = build2 (PLUS_EXPR, utype, q, t);
2661 q = get_formal_tmp_var (t, &list);
2663 t = build2 (MULT_EXPR, utype, q, threadid);
2664 s0 = get_formal_tmp_var (t, &list);
2666 t = build2 (PLUS_EXPR, utype, s0, q);
2667 t = build2 (MIN_EXPR, utype, t, n);
2668 e0 = get_formal_tmp_var (t, &list);
2670 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
2671 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l2),
2672 build_and_jump (&l0));
2673 append_to_statement_list (t, &list);
2675 si = bsi_last (entry_bb);
2676 gcc_assert (bsi_stmt (si) && TREE_CODE (bsi_stmt (si)) == OMP_FOR);
2677 bsi_remove (&si, true);
2678 si = bsi_last (entry_bb);
2679 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2681 /* Setup code for sequential iteration goes in SEQ_START_BB. */
2682 list = alloc_stmt_list ();
2684 t = build1 (LABEL_EXPR, void_type_node, l0);
2685 gimplify_and_add (t, &list);
2687 t = fold_convert (type, s0);
2688 t = build2 (MULT_EXPR, type, t, fd->step);
2689 t = build2 (PLUS_EXPR, type, t, fd->n1);
2690 t = build2 (MODIFY_EXPR, void_type_node, fd->v, t);
2691 gimplify_and_add (t, &list);
2693 t = fold_convert (type, e0);
2694 t = build2 (MULT_EXPR, type, t, fd->step);
2695 t = build2 (PLUS_EXPR, type, t, fd->n1);
2696 e = get_formal_tmp_var (t, &list);
2698 seq_start_bb = create_empty_bb (entry_bb);
2699 si = bsi_start (seq_start_bb);
2700 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2702 /* Original body goes in BODY_BB. */
2703 si = bsi_start (body_bb);
2704 t = build1 (LABEL_EXPR, void_type_node, l1);
2705 bsi_insert_before (&si, t, BSI_CONTINUE_LINKING);
2707 /* Split EXIT_BB at the OMP_RETURN. The code controlling the
2708 sequential loop goes in the original EXIT_BB. The exit out of
2709 the parallel loop goes in the new block (NEW_EXIT_BB). */
2710 si = bsi_last (exit_bb);
2712 bsi_remove (&si, true);
2713 gcc_assert (t && TREE_CODE (t) == OMP_RETURN_EXPR);
2714 exit_edge = split_block (exit_bb, t);
2715 new_exit_bb = exit_edge->dest;
2716 list = alloc_stmt_list ();
2718 t = build2 (PLUS_EXPR, type, fd->v, fd->step);
2719 t = build2 (MODIFY_EXPR, void_type_node, fd->v, t);
2720 gimplify_and_add (t, &list);
2722 t = build2 (fd->cond_code, boolean_type_node, fd->v, e);
2723 t = get_formal_tmp_var (t, &list);
2724 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l1),
2725 build_and_jump (&l2));
2726 append_to_statement_list (t, &list);
2728 si = bsi_last (exit_bb);
2729 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2731 /* Add the exit label to NEW_EXIT_BB. */
2732 si = bsi_start (new_exit_bb);
2733 t = build1 (LABEL_EXPR, void_type_node, l2);
2734 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2735 single_succ_edge (new_exit_bb)->flags = EDGE_FALLTHRU;
2737 /* Connect all the blocks. */
2738 make_edge (seq_start_bb, body_bb, EDGE_FALLTHRU);
2740 remove_edge (single_succ_edge (entry_bb));
2741 make_edge (entry_bb, new_exit_bb, EDGE_TRUE_VALUE);
2742 make_edge (entry_bb, seq_start_bb, EDGE_FALSE_VALUE);
2744 make_edge (exit_bb, body_bb, EDGE_TRUE_VALUE);
2745 find_edge (exit_bb, new_exit_bb)->flags = EDGE_FALSE_VALUE;
2751 /* A subroutine of expand_omp_for. Generate code for a parallel
2752 loop with static schedule and a specified chunk size. Given
2755 for (V = N1; V cond N2; V += STEP) BODY;
2757 where COND is "<" or ">", we generate pseudocode
2763 n = (adj + N2 - N1) / STEP;
2766 s0 = (trip * nthreads + threadid) * CHUNK;
2767 e0 = min(s0 + CHUNK, n);
2768 if (s0 < n) goto L1; else goto L4;
2775 if (V cond e) goto L2; else goto L3;
2783 expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd)
2785 tree l0, l1, l2, l3, l4, n, s0, e0, e, t;
2786 tree trip, nthreads, threadid;
2788 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
2789 basic_block trip_update_bb, new_exit_bb;
2792 block_stmt_iterator si;
2794 l0 = create_artificial_label ();
2795 l1 = create_artificial_label ();
2796 l2 = create_artificial_label ();
2797 l3 = create_artificial_label ();
2798 l4 = create_artificial_label ();
2800 type = TREE_TYPE (fd->v);
2801 utype = lang_hooks.types.unsigned_type (type);
2803 entry_bb = bb_for_stmt (region->entry);
2804 body_bb = single_succ (entry_bb);
2806 exit_bb = bb_for_stmt (region->exit);
2808 /* Trip and adjustment setup goes in ENTRY_BB. */
2809 list = alloc_stmt_list ();
2811 t = built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS];
2812 t = build_function_call_expr (t, NULL);
2813 t = fold_convert (utype, t);
2814 nthreads = get_formal_tmp_var (t, &list);
2816 t = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
2817 t = build_function_call_expr (t, NULL);
2818 t = fold_convert (utype, t);
2819 threadid = get_formal_tmp_var (t, &list);
2821 fd->n1 = fold_convert (type, fd->n1);
2822 if (!is_gimple_val (fd->n1))
2823 fd->n1 = get_formal_tmp_var (fd->n1, &list);
2825 fd->n2 = fold_convert (type, fd->n2);
2826 if (!is_gimple_val (fd->n2))
2827 fd->n2 = get_formal_tmp_var (fd->n2, &list);
2829 fd->step = fold_convert (type, fd->step);
2830 if (!is_gimple_val (fd->step))
2831 fd->step = get_formal_tmp_var (fd->step, &list);
2833 fd->chunk_size = fold_convert (utype, fd->chunk_size);
2834 if (!is_gimple_val (fd->chunk_size))
2835 fd->chunk_size = get_formal_tmp_var (fd->chunk_size, &list);
2837 t = build_int_cst (type, (fd->cond_code == LT_EXPR ? -1 : 1));
2838 t = fold_build2 (PLUS_EXPR, type, fd->step, t);
2839 t = fold_build2 (PLUS_EXPR, type, t, fd->n2);
2840 t = fold_build2 (MINUS_EXPR, type, t, fd->n1);
2841 t = fold_build2 (TRUNC_DIV_EXPR, type, t, fd->step);
2842 t = fold_convert (utype, t);
2843 if (is_gimple_val (t))
2846 n = get_formal_tmp_var (t, &list);
2848 t = build_int_cst (utype, 0);
2849 trip = get_initialized_tmp_var (t, &list, NULL);
2851 si = bsi_last (entry_bb);
2852 gcc_assert (bsi_stmt (si) && TREE_CODE (bsi_stmt (si)) == OMP_FOR);
2853 bsi_remove (&si, true);
2854 si = bsi_last (entry_bb);
2855 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2857 /* Iteration space partitioning goes in ITER_PART_BB. */
2858 list = alloc_stmt_list ();
2860 t = build1 (LABEL_EXPR, void_type_node, l0);
2861 gimplify_and_add (t, &list);
2863 t = build2 (MULT_EXPR, utype, trip, nthreads);
2864 t = build2 (PLUS_EXPR, utype, t, threadid);
2865 t = build2 (MULT_EXPR, utype, t, fd->chunk_size);
2866 s0 = get_formal_tmp_var (t, &list);
2868 t = build2 (PLUS_EXPR, utype, s0, fd->chunk_size);
2869 t = build2 (MIN_EXPR, utype, t, n);
2870 e0 = get_formal_tmp_var (t, &list);
2872 t = build2 (LT_EXPR, boolean_type_node, s0, n);
2873 t = build3 (COND_EXPR, void_type_node, t,
2874 build_and_jump (&l1), build_and_jump (&l4));
2875 append_to_statement_list (t, &list);
2877 iter_part_bb = create_empty_bb (entry_bb);
2878 si = bsi_start (iter_part_bb);
2879 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2881 /* Setup code for sequential iteration goes in SEQ_START_BB. */
2882 list = alloc_stmt_list ();
2884 t = build1 (LABEL_EXPR, void_type_node, l1);
2885 gimplify_and_add (t, &list);
2887 t = fold_convert (type, s0);
2888 t = build2 (MULT_EXPR, type, t, fd->step);
2889 t = build2 (PLUS_EXPR, type, t, fd->n1);
2890 t = build2 (MODIFY_EXPR, void_type_node, fd->v, t);
2891 gimplify_and_add (t, &list);
2893 t = fold_convert (type, e0);
2894 t = build2 (MULT_EXPR, type, t, fd->step);
2895 t = build2 (PLUS_EXPR, type, t, fd->n1);
2896 e = get_formal_tmp_var (t, &list);
2898 seq_start_bb = create_empty_bb (iter_part_bb);
2899 si = bsi_start (seq_start_bb);
2900 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2902 /* Main loop body goes in BODY_BB. */
2903 si = bsi_start (body_bb);
2904 t = build1 (LABEL_EXPR, void_type_node, l2);
2905 bsi_insert_before (&si, t, BSI_CONTINUE_LINKING);
2907 /* Split EXIT_BB. The code controlling the sequential loop goes in
2908 the first half. The trip update code goes into the second half
2909 (TRIP_UPDATE_BB). */
2910 list = alloc_stmt_list ();
2912 t = build2 (PLUS_EXPR, type, fd->v, fd->step);
2913 t = build2 (MODIFY_EXPR, void_type_node, fd->v, t);
2914 gimplify_and_add (t, &list);
2916 t = build2 (fd->cond_code, boolean_type_node, fd->v, e);
2917 t = get_formal_tmp_var (t, &list);
2918 t = build3 (COND_EXPR, void_type_node, t,
2919 build_and_jump (&l2), build_and_jump (&l3));
2920 append_to_statement_list (t, &list);
2922 si = bsi_last (exit_bb);
2924 gcc_assert (t && TREE_CODE (t) == OMP_RETURN_EXPR);
2925 bsi_remove (&si, true);
2926 exit_edge = split_block (exit_bb, t);
2927 si = bsi_last (exit_bb);
2928 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2930 /* Trip update code goes into TRIP_UPDATE_BB. */
2931 trip_update_bb = exit_edge->dest;
2932 list = alloc_stmt_list ();
2934 t = build1 (LABEL_EXPR, void_type_node, l3);
2935 gimplify_and_add (t, &list);
2937 t = build_int_cst (utype, 1);
2938 t = build2 (PLUS_EXPR, utype, trip, t);
2939 t = build2 (MODIFY_EXPR, void_type_node, trip, t);
2940 gimplify_and_add (t, &list);
2942 si = bsi_start (trip_update_bb);
2943 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2944 exit_edge = single_succ_edge (trip_update_bb);
2945 exit_edge->flags = EDGE_FALLTHRU;
2946 new_exit_bb = exit_edge->dest;
2948 /* Insert exit label on EXIT_EDGE. */
2949 t = build1 (LABEL_EXPR, void_type_node, l4);
2950 bsi_insert_on_edge_immediate (exit_edge, t);
2952 /* Connect the new blocks. */
2953 remove_edge (single_succ_edge (entry_bb));
2954 make_edge (entry_bb, iter_part_bb, EDGE_FALLTHRU);
2956 make_edge (iter_part_bb, seq_start_bb, EDGE_TRUE_VALUE);
2957 make_edge (iter_part_bb, new_exit_bb, EDGE_FALSE_VALUE);
2958 remove_edge (exit_edge);
2960 make_edge (seq_start_bb, body_bb, EDGE_FALLTHRU);
2962 make_edge (exit_bb, body_bb, EDGE_TRUE_VALUE);
2963 find_edge (exit_bb, trip_update_bb)->flags = EDGE_FALSE_VALUE;
2965 make_edge (trip_update_bb, iter_part_bb, EDGE_FALLTHRU);
2971 /* Expand the OpenMP loop defined by REGION. */
2974 expand_omp_for (struct omp_region *region)
2976 struct omp_for_data fd;
2977 basic_block last_bb = NULL;
2979 push_gimplify_context ();
2981 extract_omp_for_data (region->entry, &fd);
2983 if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC && !fd.have_ordered)
2985 if (fd.chunk_size == NULL)
2986 last_bb = expand_omp_for_static_nochunk (region, &fd);
2988 last_bb = expand_omp_for_static_chunk (region, &fd);
2992 int fn_index = fd.sched_kind + fd.have_ordered * 4;
2993 int start_ix = BUILT_IN_GOMP_LOOP_STATIC_START + fn_index;
2994 int next_ix = BUILT_IN_GOMP_LOOP_STATIC_NEXT + fn_index;
2995 last_bb = expand_omp_for_generic (region, &fd, start_ix, next_ix);
2998 pop_gimplify_context (NULL);
3002 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
3004 v = GOMP_sections_start (n);
3021 v = GOMP_sections_next ();
3026 If this is a combined parallel sections, replace the call to
3027 GOMP_sections_start with 'goto L1'. */
3030 expand_omp_sections (struct omp_region *region)
3032 tree label_vec, l0, l1, l2, t, u, v;
3034 basic_block entry_bb, exit_bb, l0_bb, l1_bb, default_bb;
3035 edge e, entry_edge, exit_edge;
3037 block_stmt_iterator si;
3039 entry_bb = bb_for_stmt (region->entry);
3040 exit_bb = bb_for_stmt (region->exit);
3042 l0 = create_artificial_label ();
3043 l1 = create_artificial_label ();
3044 l2 = create_artificial_label ();
3046 v = create_tmp_var (unsigned_type_node, ".section");
3048 /* We will build a switch() with enough cases for all the
3049 OMP_SECTION regions, a '0' case to handle the end of more work
3050 and a default case to abort if something goes wrong. */
3051 len = EDGE_COUNT (entry_bb->succs);
3052 label_vec = make_tree_vec (len + 2);
3054 /* Split ENTRY_BB. The call to GOMP_sections_start goes in the
3055 first half. The second half contains the switch(). */
3056 si = bsi_last (entry_bb);
3058 gcc_assert (t && TREE_CODE (t) == OMP_SECTIONS);
3059 bsi_remove (&si, true);
3060 entry_edge = split_block (entry_bb, t);
3061 l0_bb = entry_edge->dest;
3063 if (!is_combined_parallel (region))
3065 /* If we are not inside a combined parallel+sections region,
3066 call GOMP_sections_start. */
3067 t = build_int_cst (unsigned_type_node, len);
3068 t = tree_cons (NULL, t, NULL);
3069 u = built_in_decls[BUILT_IN_GOMP_SECTIONS_START];
3070 t = build_function_call_expr (u, t);
3071 t = build2 (MODIFY_EXPR, void_type_node, v, t);
3072 si = bsi_last (entry_bb);
3073 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
3076 /* The switch() statement replacing OMP_SECTIONS goes in L0_BB. */
3077 si = bsi_last (l0_bb);
3079 t = build1 (LABEL_EXPR, void_type_node, l0);
3080 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
3082 t = build3 (SWITCH_EXPR, void_type_node, v, NULL, label_vec);
3083 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
3085 t = build3 (CASE_LABEL_EXPR, void_type_node,
3086 build_int_cst (unsigned_type_node, 0), NULL, l2);
3087 TREE_VEC_ELT (label_vec, 0) = t;
3089 /* Convert each OMP_SECTION into a CASE_LABEL_EXPR. */
3091 FOR_EACH_EDGE (e, ei, l0_bb->succs)
3093 basic_block s_entry_bb, s_exit_bb;
3096 s_entry_bb = e->dest;
3097 si = bsi_last (s_entry_bb);
3099 gcc_assert (t && TREE_CODE (t) == OMP_SECTION);
3100 s_exit_bb = bb_for_stmt (lookup_omp_region (t)->exit);
3101 bsi_remove (&si, true);
3103 t = create_artificial_label ();
3104 u = build_int_cst (unsigned_type_node, i);
3105 u = build3 (CASE_LABEL_EXPR, void_type_node, u, NULL, t);
3106 TREE_VEC_ELT (label_vec, i) = u;
3107 t = build1 (LABEL_EXPR, void_type_node, t);
3108 si = bsi_last (s_entry_bb);
3109 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
3111 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
3113 si = bsi_last (s_exit_bb);
3115 gcc_assert (t && TREE_CODE (t) == OMP_RETURN_EXPR);
3116 bsi_remove (&si, true);
3117 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
3120 /* Error handling code goes in DEFAULT_BB. */
3121 default_bb = create_empty_bb (entry_bb);
3122 si = bsi_start (default_bb);
3123 t = create_artificial_label ();
3124 u = build3 (CASE_LABEL_EXPR, void_type_node, NULL, NULL, t);
3125 TREE_VEC_ELT (label_vec, len + 1) = u;
3126 t = build1 (LABEL_EXPR, void_type_node, t);
3127 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
3129 t = built_in_decls[BUILT_IN_TRAP];
3130 t = build_function_call_expr (t, NULL);
3131 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
3133 make_edge (l0_bb, default_bb, 0);
3135 /* Code to get the next section goes in L1_BB. */
3136 si = bsi_last (exit_bb);
3138 gcc_assert (t && TREE_CODE (t) == OMP_RETURN_EXPR);
3139 bsi_remove (&si, true);
3140 exit_edge = split_block (exit_bb, t);
3141 l1_bb = exit_edge->src;
3142 exit_bb = exit_edge->dest;
3143 si = bsi_start (l1_bb);
3144 t = build1 (LABEL_EXPR, void_type_node, l1);
3145 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
3147 t = built_in_decls[BUILT_IN_GOMP_SECTIONS_NEXT];
3148 t = build_function_call_expr (t, NULL);
3149 t = build2 (MODIFY_EXPR, void_type_node, v, t);
3150 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
3152 remove_edge (single_succ_edge (l1_bb));
3153 make_edge (l1_bb, l0_bb, EDGE_FALLTHRU);
3155 /* Exit label in EXIT_BB. */
3156 si = bsi_last (exit_bb);
3157 t = build1 (LABEL_EXPR, void_type_node, l2);
3158 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
3160 make_edge (l0_bb, exit_bb, 0);
3161 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
3163 if (is_combined_parallel (region))
3165 /* If this was a combined parallel+sections region, we did not
3166 emit a GOMP_sections_start in the entry block, so we just
3167 need to jump to L1_BB to get the next section. */
3168 remove_edge (single_succ_edge (entry_bb));
3169 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
3174 /* Generic expansion for OpenMP synchronization directives: single,
3175 master, ordered and critical. All we need to do here is remove the
3176 entry and exit markers for REGION. */
3179 expand_omp_synch (struct omp_region *region)
3181 basic_block entry_bb, exit_bb;
3182 block_stmt_iterator si;
3185 entry_bb = bb_for_stmt (region->entry);
3186 exit_bb = bb_for_stmt (region->exit);
3188 si = bsi_last (entry_bb);
3191 && (TREE_CODE (t) == OMP_SINGLE
3192 || TREE_CODE (t) == OMP_MASTER
3193 || TREE_CODE (t) == OMP_ORDERED
3194 || TREE_CODE (t) == OMP_CRITICAL));
3195 bsi_remove (&si, true);
3196 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
3198 si = bsi_last (exit_bb);
3200 gcc_assert (t && TREE_CODE (t) == OMP_RETURN_EXPR);
3201 bsi_remove (&si, true);
3202 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
3206 /* Expand the parallel region tree rooted at REGION. Expansion
3207 proceeds in depth-first order. Innermost regions are expanded
3208 first. This way, parallel regions that require a new function to
3209 be created (e.g., OMP_PARALLEL) can be expanded without having any
3210 internal dependencies in their body. */
3213 expand_omp (struct omp_region *region)
3217 enum tree_code code = TREE_CODE (region->entry);
3220 expand_omp (region->inner);
3225 expand_omp_parallel (region);
3229 expand_omp_for (region);
3233 expand_omp_sections (region);
3237 /* Individual omp sections are handled together with their
3238 parent OMP_SECTIONS region. */
3245 expand_omp_synch (region);
3252 /* Expansion adds and removes basic block, edges, creates
3253 and exposes unreachable regions that need to be cleaned up
3254 before proceeding. */
3255 free_dominance_info (CDI_DOMINATORS);
3256 free_dominance_info (CDI_POST_DOMINATORS);
3257 cleanup_tree_cfg ();
3259 region = region->next;
3264 /* Helper for build_omp_regions. Scan the dominator tree starting at
3265 block BB. PARENT is the region that contains BB. */
3268 build_omp_regions_1 (basic_block bb, struct omp_region *parent)
3270 block_stmt_iterator si;
3275 if (!bsi_end_p (si) && OMP_DIRECTIVE_P (bsi_stmt (si)))
3277 struct omp_region *region;
3279 stmt = bsi_stmt (si);
3281 if (TREE_CODE (stmt) == OMP_RETURN_EXPR)
3283 /* STMT is the return point out of region PARENT. Mark it
3284 as the exit point and make PARENT the immediately
3285 enclosing region. */
3286 gcc_assert (parent);
3288 region->exit = stmt;
3289 parent = parent->outer;
3291 /* If REGION is a parallel region, determine whether it is
3292 a combined parallel+workshare region. */
3293 if (TREE_CODE (region->entry) == OMP_PARALLEL)
3294 determine_parallel_type (region);
3298 /* Otherwise, this directive becomes the parent for a new
3300 region = new_omp_region (stmt, parent);
3304 gcc_assert (region);
3305 if (omp_regions == NULL)
3307 omp_regions = splay_tree_new (splay_tree_compare_pointers, 0, 0);
3308 root_omp_region = region;
3311 splay_tree_insert (omp_regions, (splay_tree_key) stmt,
3312 (splay_tree_value) region);
3315 for (son = first_dom_son (CDI_DOMINATORS, bb);
3317 son = next_dom_son (CDI_DOMINATORS, son))
3318 build_omp_regions_1 (son, parent);
3322 /* Scan the CFG and build a tree of OMP regions. Return the root of
3323 the OMP region tree. */
3326 build_omp_regions (void)
3328 gcc_assert (omp_regions == NULL && root_omp_region == NULL);
3329 calculate_dominance_info (CDI_DOMINATORS);
3330 build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL);
3334 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
3337 execute_expand_omp (void)
3339 build_omp_regions ();
3341 if (root_omp_region)
3345 fprintf (dump_file, "\nOMP region tree\n\n");
3346 dump_omp_region (dump_file, root_omp_region, 0);
3347 fprintf (dump_file, "\n");
3350 expand_omp (root_omp_region);
3351 splay_tree_delete (omp_regions);
3352 root_omp_region = NULL;
3359 gate_expand_omp (void)
3361 return flag_openmp != 0 && errorcount == 0;
3364 struct tree_opt_pass pass_expand_omp =
3366 "ompexp", /* name */
3367 gate_expand_omp, /* gate */
3368 execute_expand_omp, /* execute */
3371 0, /* static_pass_number */
3373 PROP_gimple_any, /* properties_required */
3374 PROP_gimple_lomp, /* properties_provided */
3375 0, /* properties_destroyed */
3376 0, /* todo_flags_start */
3377 TODO_dump_func, /* todo_flags_finish */
3381 /* Routines to lower OpenMP directives into OMP-GIMPLE. */
3383 /* Lower the OpenMP sections directive in *STMT_P. */
3386 lower_omp_sections (tree *stmt_p, omp_context *ctx)
3388 tree new_stmt, stmt, body, bind, block, ilist, olist, new_body;
3389 tree dlist, region_exit;
3390 tree_stmt_iterator tsi;
3395 gcc_assert (OMP_SECTIONS_SECTIONS (stmt) == NULL_TREE);
3397 push_gimplify_context ();
3401 lower_rec_input_clauses (OMP_SECTIONS_CLAUSES (stmt), &ilist, &dlist, ctx);
3403 tsi = tsi_start (OMP_SECTIONS_BODY (stmt));
3404 for (len = 0; !tsi_end_p (tsi); len++, tsi_next (&tsi))
3407 /* There are two markers per section and one end marker for the
3409 OMP_SECTIONS_SECTIONS (stmt) = make_tree_vec (2 * len + 1);
3411 tsi = tsi_start (OMP_SECTIONS_BODY (stmt));
3412 body = alloc_stmt_list ();
3413 for (i = 0; i < len; i++, tsi_next (&tsi))
3416 tree sec_start, sec_end, sec_body;
3418 sec_start = tsi_stmt (tsi);
3419 sec_body = alloc_stmt_list ();
3420 sctx = maybe_lookup_ctx (sec_start);
3423 lower_omp (&OMP_SECTION_BODY (sec_start), sctx);
3424 append_to_statement_list (OMP_SECTION_BODY (sec_start), &sec_body);
3428 tree l = alloc_stmt_list ();
3429 lower_lastprivate_clauses (OMP_SECTIONS_CLAUSES (stmt), NULL,
3431 append_to_statement_list (l, &sec_body);
3434 sec_end = make_node (OMP_RETURN_EXPR);
3436 OMP_SECTION_BODY (sec_start) = sec_body;
3437 append_to_statement_list (sec_start, &body);
3438 append_to_statement_list (sec_end, &body);
3440 TREE_VEC_ELT (OMP_SECTIONS_SECTIONS (stmt), i * 2) = sec_start;
3441 TREE_VEC_ELT (OMP_SECTIONS_SECTIONS (stmt), i * 2 + 1) = sec_end;
3444 block = make_node (BLOCK);
3445 bind = build3 (BIND_EXPR, void_type_node, NULL, body, block);
3446 maybe_catch_exception (&BIND_EXPR_BODY (bind));
3449 lower_reduction_clauses (OMP_SECTIONS_CLAUSES (stmt), &olist, ctx);
3451 /* Unless there's a nowait clause, add a barrier afterward. */
3452 if (!find_omp_clause (OMP_SECTIONS_CLAUSES (stmt), OMP_CLAUSE_NOWAIT))
3453 build_omp_barrier (&olist);
3455 pop_gimplify_context (NULL_TREE);
3456 record_vars_into (ctx->block_vars, ctx->cb.dst_fn);
3458 new_stmt = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
3459 TREE_SIDE_EFFECTS (new_stmt) = 1;
3460 OMP_SECTIONS_BODY (stmt) = body;
3462 region_exit = make_node (OMP_RETURN_EXPR);
3464 new_body = alloc_stmt_list ();
3465 append_to_statement_list (ilist, &new_body);
3466 append_to_statement_list (stmt, &new_body);
3467 /* ??? The OMP_RETURN doesn't logically belong here, but in
3468 expand_omp_sections we expect this marker to be where the
3469 individual sections join after completing the loop. */
3470 append_to_statement_list (region_exit, &new_body);
3471 append_to_statement_list (olist, &new_body);
3472 append_to_statement_list (dlist, &new_body);
3473 BIND_EXPR_BODY (new_stmt) = new_body;
3475 TREE_VEC_ELT (OMP_SECTIONS_SECTIONS (stmt), 2 * len) = region_exit;
3481 /* A subroutine of lower_omp_single. Expand the simple form of
3482 an OMP_SINGLE, without a copyprivate clause:
3484 if (GOMP_single_start ())
3486 [ GOMP_barrier (); ] -> unless 'nowait' is present.
3488 FIXME. It may be better to delay expanding the logic of this until
3489 pass_expand_omp. The expanded logic may make the job more difficult
3490 to a synchronization analysis pass. */
3493 lower_omp_single_simple (tree single_stmt, tree *pre_p)
3497 t = built_in_decls[BUILT_IN_GOMP_SINGLE_START];
3498 t = build_function_call_expr (t, NULL);
3499 t = build3 (COND_EXPR, void_type_node, t,
3500 OMP_SINGLE_BODY (single_stmt), NULL);
3501 gimplify_and_add (t, pre_p);
3503 if (!find_omp_clause (OMP_SINGLE_CLAUSES (single_stmt), OMP_CLAUSE_NOWAIT))
3504 build_omp_barrier (pre_p);
3508 /* A subroutine of lower_omp_single. Expand the simple form of
3509 an OMP_SINGLE, with a copyprivate clause:
3511 #pragma omp single copyprivate (a, b, c)
3513 Create a new structure to hold copies of 'a', 'b' and 'c' and emit:
3516 if ((copyout_p = GOMP_single_copy_start ()) == NULL)
3522 GOMP_single_copy_end (©out);
3533 FIXME. It may be better to delay expanding the logic of this until
3534 pass_expand_omp. The expanded logic may make the job more difficult
3535 to a synchronization analysis pass. */
3538 lower_omp_single_copy (tree single_stmt, tree *pre_p, omp_context *ctx)
3540 tree ptr_type, t, args, l0, l1, l2, copyin_seq;
3542 ctx->sender_decl = create_tmp_var (ctx->record_type, ".omp_copy_o");
3544 ptr_type = build_pointer_type (ctx->record_type);
3545 ctx->receiver_decl = create_tmp_var (ptr_type, ".omp_copy_i");
3547 l0 = create_artificial_label ();
3548 l1 = create_artificial_label ();
3549 l2 = create_artificial_label ();
3551 t = built_in_decls[BUILT_IN_GOMP_SINGLE_COPY_START];
3552 t = build_function_call_expr (t, NULL);
3553 t = fold_convert (ptr_type, t);
3554 t = build2 (MODIFY_EXPR, void_type_node, ctx->receiver_decl, t);
3555 gimplify_and_add (t, pre_p);
3557 t = build2 (EQ_EXPR, boolean_type_node, ctx->receiver_decl,
3558 build_int_cst (ptr_type, 0));
3559 t = build3 (COND_EXPR, void_type_node, t,
3560 build_and_jump (&l0), build_and_jump (&l1));
3561 gimplify_and_add (t, pre_p);
3563 t = build1 (LABEL_EXPR, void_type_node, l0);
3564 gimplify_and_add (t, pre_p);
3566 append_to_statement_list (OMP_SINGLE_BODY (single_stmt), pre_p);
3569 lower_copyprivate_clauses (OMP_SINGLE_CLAUSES (single_stmt), pre_p,
3572 t = build_fold_addr_expr (ctx->sender_decl);
3573 args = tree_cons (NULL, t, NULL);
3574 t = built_in_decls[BUILT_IN_GOMP_SINGLE_COPY_END];
3575 t = build_function_call_expr (t, args);
3576 gimplify_and_add (t, pre_p);
3578 t = build_and_jump (&l2);
3579 gimplify_and_add (t, pre_p);
3581 t = build1 (LABEL_EXPR, void_type_node, l1);
3582 gimplify_and_add (t, pre_p);
3584 append_to_statement_list (copyin_seq, pre_p);
3586 t = build1 (LABEL_EXPR, void_type_node, l2);
3587 gimplify_and_add (t, pre_p);
3589 build_omp_barrier (pre_p);
3593 /* Expand code for an OpenMP single directive. */
3596 lower_omp_single (tree *stmt_p, omp_context *ctx)
3598 tree t, bind, block, single_stmt = *stmt_p, dlist;
3600 push_gimplify_context ();
3602 block = make_node (BLOCK);
3603 bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
3604 TREE_SIDE_EFFECTS (bind) = 1;
3606 lower_rec_input_clauses (OMP_SINGLE_CLAUSES (single_stmt),
3607 &BIND_EXPR_BODY (bind), &dlist, ctx);
3608 lower_omp (&OMP_SINGLE_BODY (single_stmt), ctx);
3610 if (ctx->record_type)
3611 lower_omp_single_copy (single_stmt, &BIND_EXPR_BODY (bind), ctx);
3613 lower_omp_single_simple (single_stmt, &BIND_EXPR_BODY (bind));
3615 append_to_statement_list (dlist, &BIND_EXPR_BODY (bind));
3616 maybe_catch_exception (&BIND_EXPR_BODY (bind));
3617 t = make_node (OMP_RETURN_EXPR);
3618 append_to_statement_list (t, &BIND_EXPR_BODY (bind));
3619 pop_gimplify_context (bind);
3621 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
3622 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
3624 OMP_SINGLE_BODY (single_stmt) = alloc_stmt_list ();
3625 append_to_statement_list (bind, &OMP_SINGLE_BODY (single_stmt));
3629 /* Expand code for an OpenMP master directive. */
3632 lower_omp_master (tree *stmt_p, omp_context *ctx)
3634 tree bind, block, stmt = *stmt_p, lab = NULL, x;
3636 push_gimplify_context ();
3638 block = make_node (BLOCK);
3639 bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
3640 TREE_SIDE_EFFECTS (bind) = 1;
3642 x = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
3643 x = build_function_call_expr (x, NULL);
3644 x = build2 (EQ_EXPR, boolean_type_node, x, integer_zero_node);
3645 x = build3 (COND_EXPR, void_type_node, x, NULL, build_and_jump (&lab));
3646 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
3648 lower_omp (&OMP_MASTER_BODY (stmt), ctx);
3649 append_to_statement_list (OMP_MASTER_BODY (stmt), &BIND_EXPR_BODY (bind));
3651 x = build1 (LABEL_EXPR, void_type_node, lab);
3652 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
3653 maybe_catch_exception (&BIND_EXPR_BODY (bind));
3654 x = make_node (OMP_RETURN_EXPR);
3655 append_to_statement_list (x, &BIND_EXPR_BODY (bind));
3656 pop_gimplify_context (bind);
3658 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
3659 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
3661 OMP_MASTER_BODY (stmt) = alloc_stmt_list ();
3662 append_to_statement_list (bind, &OMP_MASTER_BODY (stmt));
3666 /* Expand code for an OpenMP ordered directive. */
3669 lower_omp_ordered (tree *stmt_p, omp_context *ctx)
3671 tree bind, block, stmt = *stmt_p, x;
3673 push_gimplify_context ();
3675 block = make_node (BLOCK);
3676 bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
3677 TREE_SIDE_EFFECTS (bind) = 1;
3679 x = built_in_decls[BUILT_IN_GOMP_ORDERED_START];
3680 x = build_function_call_expr (x, NULL);
3681 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
3683 lower_omp (&OMP_ORDERED_BODY (stmt), ctx);
3684 append_to_statement_list (OMP_ORDERED_BODY (stmt), &BIND_EXPR_BODY (bind));
3686 x = built_in_decls[BUILT_IN_GOMP_ORDERED_END];
3687 x = build_function_call_expr (x, NULL);
3688 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
3689 maybe_catch_exception (&BIND_EXPR_BODY (bind));
3690 x = make_node (OMP_RETURN_EXPR);
3691 append_to_statement_list (x, &BIND_EXPR_BODY (bind));
3692 pop_gimplify_context (bind);
3694 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
3695 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
3697 OMP_ORDERED_BODY (stmt) = alloc_stmt_list ();
3698 append_to_statement_list (bind, &OMP_ORDERED_BODY (stmt));
3702 /* Gimplify an OMP_CRITICAL statement. This is a relatively simple
3703 substitution of a couple of function calls. But in the NAMED case,
3704 requires that languages coordinate a symbol name. It is therefore
3705 best put here in common code. */
3707 static GTY((param1_is (tree), param2_is (tree)))
3708 splay_tree critical_name_mutexes;
3711 lower_omp_critical (tree *stmt_p, omp_context *ctx)
3713 tree bind, block, stmt = *stmt_p;
3714 tree t, lock, unlock, name;
3716 name = OMP_CRITICAL_NAME (stmt);
3722 if (!critical_name_mutexes)
3723 critical_name_mutexes
3724 = splay_tree_new_ggc (splay_tree_compare_pointers);
3726 n = splay_tree_lookup (critical_name_mutexes, (splay_tree_key) name);
3731 decl = create_tmp_var_raw (ptr_type_node, NULL);
3733 new_str = ACONCAT ((".gomp_critical_user_",
3734 IDENTIFIER_POINTER (name), NULL));
3735 DECL_NAME (decl) = get_identifier (new_str);
3736 TREE_PUBLIC (decl) = 1;
3737 TREE_STATIC (decl) = 1;
3738 DECL_COMMON (decl) = 1;
3739 DECL_ARTIFICIAL (decl) = 1;
3740 DECL_IGNORED_P (decl) = 1;
3741 cgraph_varpool_finalize_decl (decl);
3743 splay_tree_insert (critical_name_mutexes, (splay_tree_key) name,
3744 (splay_tree_value) decl);
3747 decl = (tree) n->value;
3749 args = tree_cons (NULL, build_fold_addr_expr (decl), NULL);
3750 lock = built_in_decls[BUILT_IN_GOMP_CRITICAL_NAME_START];
3751 lock = build_function_call_expr (lock, args);
3753 args = tree_cons (NULL, build_fold_addr_expr (decl), NULL);
3754 unlock = built_in_decls[BUILT_IN_GOMP_CRITICAL_NAME_END];
3755 unlock = build_function_call_expr (unlock, args);
3759 lock = built_in_decls[BUILT_IN_GOMP_CRITICAL_START];
3760 lock = build_function_call_expr (lock, NULL);
3762 unlock = built_in_decls[BUILT_IN_GOMP_CRITICAL_END];
3763 unlock = build_function_call_expr (unlock, NULL);
3766 push_gimplify_context ();
3768 block = make_node (BLOCK);
3769 bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
3770 TREE_SIDE_EFFECTS (bind) = 1;
3772 gimplify_and_add (lock, &BIND_EXPR_BODY (bind));
3774 lower_omp (&OMP_CRITICAL_BODY (stmt), ctx);
3775 maybe_catch_exception (&OMP_CRITICAL_BODY (stmt));
3776 append_to_statement_list (OMP_CRITICAL_BODY (stmt), &BIND_EXPR_BODY (bind));
3778 gimplify_and_add (unlock, &BIND_EXPR_BODY (bind));
3779 t = make_node (OMP_RETURN_EXPR);
3780 append_to_statement_list (t, &BIND_EXPR_BODY (bind));
3782 pop_gimplify_context (bind);
3783 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
3784 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
3786 OMP_CRITICAL_BODY (stmt) = alloc_stmt_list ();
3787 append_to_statement_list (bind, &OMP_CRITICAL_BODY (stmt));
3791 /* A subroutine of lower_omp_for. Generate code to emit the predicate
3792 for a lastprivate clause. Given a loop control predicate of (V
3793 cond N2), we gate the clause on (!(V cond N2)). The lowered form
3794 is appended to *BODY_P. */
3797 lower_omp_for_lastprivate (struct omp_for_data *fd, tree *body_p,
3798 struct omp_context *ctx)
3801 enum tree_code cond_code;
3803 cond_code = fd->cond_code;
3804 cond_code = cond_code == LT_EXPR ? GE_EXPR : LE_EXPR;
3806 /* When possible, use a strict equality expression. This can let VRP
3807 type optimizations deduce the value and remove a copy. */
3808 if (host_integerp (fd->step, 0))
3810 HOST_WIDE_INT step = TREE_INT_CST_LOW (fd->step);
3811 if (step == 1 || step == -1)
3812 cond_code = EQ_EXPR;
3815 cond = build2 (cond_code, boolean_type_node, fd->v, fd->n2);
3817 clauses = OMP_FOR_CLAUSES (fd->for_stmt);
3818 lower_lastprivate_clauses (clauses, cond, body_p, ctx);
3822 /* Lower code for an OpenMP loop directive. */
3825 lower_omp_for (tree *stmt_p, omp_context *ctx)
3827 tree t, stmt, ilist, dlist, new_stmt, *body_p, *rhs_p;
3828 struct omp_for_data fd;
3832 push_gimplify_context ();
3834 lower_omp (&OMP_FOR_PRE_BODY (stmt), ctx);
3835 lower_omp (&OMP_FOR_BODY (stmt), ctx);
3837 /* Move declaration of temporaries in the loop body before we make
3839 if (TREE_CODE (OMP_FOR_BODY (stmt)) == BIND_EXPR)
3840 record_vars_into (BIND_EXPR_VARS (OMP_FOR_BODY (stmt)), ctx->cb.dst_fn);
3842 new_stmt = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
3843 TREE_SIDE_EFFECTS (new_stmt) = 1;
3844 body_p = &BIND_EXPR_BODY (new_stmt);
3846 /* The pre-body and input clauses go before the lowered OMP_FOR. */
3849 append_to_statement_list (OMP_FOR_PRE_BODY (stmt), body_p);
3850 lower_rec_input_clauses (OMP_FOR_CLAUSES (stmt), body_p, &dlist, ctx);
3852 /* Lower the header expressions. At this point, we can assume that
3853 the header is of the form:
3855 #pragma omp for (V = VAL1; V {<|>|<=|>=} VAL2; V = V [+-] VAL3)
3857 We just need to make sure that VAL1, VAL2 and VAL3 are lowered
3858 using the .omp_data_s mapping, if needed. */
3859 rhs_p = &TREE_OPERAND (OMP_FOR_INIT (stmt), 1);
3860 if (!is_gimple_min_invariant (*rhs_p))
3861 *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
3863 rhs_p = &TREE_OPERAND (OMP_FOR_COND (stmt), 1);
3864 if (!is_gimple_min_invariant (*rhs_p))
3865 *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
3867 rhs_p = &TREE_OPERAND (TREE_OPERAND (OMP_FOR_INCR (stmt), 1), 1);
3868 if (!is_gimple_min_invariant (*rhs_p))
3869 *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
3871 /* Once lowered, extract the bounds and clauses. */
3872 extract_omp_for_data (stmt, &fd);
3874 /* Region exit marker goes at the end of the loop body. */
3875 t = make_node (OMP_RETURN_EXPR);
3876 append_to_statement_list (t, &OMP_FOR_BODY (stmt));
3877 maybe_catch_exception (&OMP_FOR_BODY (stmt));
3878 append_to_statement_list (stmt, body_p);
3880 /* After the loop, add exit clauses. */
3881 lower_omp_for_lastprivate (&fd, &dlist, ctx);
3882 lower_reduction_clauses (OMP_FOR_CLAUSES (stmt), body_p, ctx);
3883 append_to_statement_list (dlist, body_p);
3885 /* Add a barrier unless the user specified NOWAIT. Note that if
3886 this is a combined parallel+loop construct, the barrier will be
3887 optimized away during expansion (see expand_omp_for). */
3888 if (!fd.have_nowait)
3890 tree stmt = alloc_stmt_list ();
3891 build_omp_barrier (&stmt);
3892 append_to_statement_list (stmt, body_p);
3895 pop_gimplify_context (NULL_TREE);
3896 record_vars_into (ctx->block_vars, ctx->cb.dst_fn);
3898 OMP_FOR_PRE_BODY (stmt) = NULL_TREE;
3903 /* Lower the OpenMP parallel directive in *STMT_P. CTX holds context
3904 information for the directive. */
3907 lower_omp_parallel (tree *stmt_p, omp_context *ctx)
3909 tree clauses, par_bind, par_body, new_body, bind;
3910 tree olist, ilist, par_olist, par_ilist;
3911 tree stmt, child_fn, t;
3915 clauses = OMP_PARALLEL_CLAUSES (stmt);
3916 par_bind = OMP_PARALLEL_BODY (stmt);
3917 par_body = BIND_EXPR_BODY (par_bind);
3918 child_fn = ctx->cb.dst_fn;
3920 push_gimplify_context ();
3922 par_olist = NULL_TREE;
3923 par_ilist = NULL_TREE;
3924 lower_rec_input_clauses (clauses, &par_ilist, &par_olist, ctx);
3925 lower_omp (&par_body, ctx);
3926 maybe_catch_exception (&par_body);
3927 lower_reduction_clauses (clauses, &par_olist, ctx);
3929 /* Declare all the variables created by mapping and the variables
3930 declared in the scope of the parallel body. */
3931 record_vars_into (ctx->block_vars, child_fn);
3932 record_vars_into (BIND_EXPR_VARS (par_bind), child_fn);
3934 if (ctx->record_type)
3936 ctx->sender_decl = create_tmp_var (ctx->record_type, ".omp_data_o");
3937 OMP_PARALLEL_DATA_ARG (stmt) = ctx->sender_decl;
3942 lower_send_clauses (clauses, &ilist, &olist, ctx);
3943 lower_send_shared_vars (&ilist, &olist, ctx);
3945 /* Once all the expansions are done, sequence all the different
3946 fragments inside OMP_PARALLEL_BODY. */
3947 bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
3948 append_to_statement_list (ilist, &BIND_EXPR_BODY (bind));
3950 new_body = alloc_stmt_list ();
3952 if (ctx->record_type)
3954 t = build_fold_addr_expr (ctx->sender_decl);
3955 t = build2 (MODIFY_EXPR, void_type_node, ctx->receiver_decl, t);
3956 append_to_statement_list (t, &new_body);
3959 append_to_statement_list (par_ilist, &new_body);
3960 append_to_statement_list (par_body, &new_body);
3961 append_to_statement_list (par_olist, &new_body);
3962 t = make_node (OMP_RETURN_EXPR);
3963 append_to_statement_list (t, &new_body);
3964 OMP_PARALLEL_BODY (stmt) = new_body;
3966 append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
3967 append_to_statement_list (olist, &BIND_EXPR_BODY (bind));
3971 pop_gimplify_context (NULL_TREE);
3975 /* Pass *TP back through the gimplifier within the context determined by WI.
3976 This handles replacement of DECL_VALUE_EXPR, as well as adjusting the
3977 flags on ADDR_EXPR. */
3980 lower_regimplify (tree *tp, struct walk_stmt_info *wi)
3982 enum gimplify_status gs;
3986 gs = gimplify_expr (tp, &pre, NULL, is_gimple_lvalue, fb_lvalue);
3987 else if (wi->val_only)
3988 gs = gimplify_expr (tp, &pre, NULL, is_gimple_val, fb_rvalue);
3990 gs = gimplify_expr (tp, &pre, NULL, is_gimple_formal_tmp_var, fb_rvalue);
3991 gcc_assert (gs == GS_ALL_DONE);
3994 tsi_link_before (&wi->tsi, pre, TSI_SAME_STMT);
3998 /* Callback for walk_stmts. Lower the OpenMP directive pointed by TP. */
4001 lower_omp_1 (tree *tp, int *walk_subtrees, void *data)
4003 struct walk_stmt_info *wi = data;
4004 omp_context *ctx = wi->info;
4007 /* If we have issued syntax errors, avoid doing any heavy lifting.
4008 Just replace the OpenMP directives with a NOP to avoid
4009 confusing RTL expansion. */
4010 if (errorcount && OMP_DIRECTIVE_P (*tp))
4012 *tp = build_empty_stmt ();
4017 switch (TREE_CODE (*tp))
4020 ctx = maybe_lookup_ctx (t);
4021 lower_omp_parallel (tp, ctx);
4025 ctx = maybe_lookup_ctx (t);
4027 lower_omp_for (tp, ctx);
4031 ctx = maybe_lookup_ctx (t);
4033 lower_omp_sections (tp, ctx);
4037 ctx = maybe_lookup_ctx (t);
4039 lower_omp_single (tp, ctx);
4043 ctx = maybe_lookup_ctx (t);
4045 lower_omp_master (tp, ctx);
4049 ctx = maybe_lookup_ctx (t);
4051 lower_omp_ordered (tp, ctx);
4055 ctx = maybe_lookup_ctx (t);
4057 lower_omp_critical (tp, ctx);
4061 if (ctx && DECL_HAS_VALUE_EXPR_P (t))
4062 lower_regimplify (tp, wi);
4067 lower_regimplify (tp, wi);
4071 case ARRAY_RANGE_REF:
4075 case VIEW_CONVERT_EXPR:
4077 lower_regimplify (tp, wi);
4084 wi->val_only = true;
4085 lower_regimplify (&TREE_OPERAND (t, 0), wi);
4090 if (!TYPE_P (t) && !DECL_P (t))
4099 lower_omp (tree *stmt_p, omp_context *ctx)
4101 struct walk_stmt_info wi;
4103 memset (&wi, 0, sizeof (wi));
4104 wi.callback = lower_omp_1;
4107 wi.want_locations = true;
4109 walk_stmts (&wi, stmt_p);
4112 /* Main entry point. */
4115 execute_lower_omp (void)
4117 all_contexts = splay_tree_new (splay_tree_compare_pointers, 0,
4118 delete_omp_context);
4120 scan_omp (&DECL_SAVED_TREE (current_function_decl), NULL);
4121 gcc_assert (parallel_nesting_level == 0);
4123 if (all_contexts->root)
4124 lower_omp (&DECL_SAVED_TREE (current_function_decl), NULL);
4128 splay_tree_delete (all_contexts);
4129 all_contexts = NULL;
4135 gate_lower_omp (void)
4137 return flag_openmp != 0;
4140 struct tree_opt_pass pass_lower_omp =
4142 "omplower", /* name */
4143 gate_lower_omp, /* gate */
4144 execute_lower_omp, /* execute */
4147 0, /* static_pass_number */
4149 PROP_gimple_any, /* properties_required */
4150 PROP_gimple_lomp, /* properties_provided */
4151 0, /* properties_destroyed */
4152 0, /* todo_flags_start */
4153 TODO_dump_func, /* todo_flags_finish */
4157 /* The following is a utility to diagnose OpenMP structured block violations.
4158 It's part of the "omplower" pass, as that's invoked too late. It should
4159 be invoked by the respective front ends after gimplification. */
4161 static splay_tree all_labels;
4163 /* Check for mismatched contexts and generate an error if needed. Return
4164 true if an error is detected. */
4167 diagnose_sb_0 (tree *stmt_p, tree branch_ctx, tree label_ctx)
4171 if ((label_ctx ? TREE_VALUE (label_ctx) : NULL) == branch_ctx)
4174 /* Try to avoid confusing the user by producing and error message
4175 with correct "exit" or "enter" verbage. We prefer "exit"
4176 unless we can show that LABEL_CTX is nested within BRANCH_CTX. */
4177 if (branch_ctx == NULL)
4183 if (TREE_VALUE (label_ctx) == branch_ctx)
4188 label_ctx = TREE_CHAIN (label_ctx);
4193 error ("invalid exit from OpenMP structured block");
4195 error ("invalid entry to OpenMP structured block");
4197 *stmt_p = build_empty_stmt ();
4201 /* Pass 1: Create a minimal tree of OpenMP structured blocks, and record
4202 where in the tree each label is found. */
4205 diagnose_sb_1 (tree *tp, int *walk_subtrees, void *data)
4207 struct walk_stmt_info *wi = data;
4208 tree context = (tree) wi->info;
4213 switch (TREE_CODE (t))
4218 walk_tree (&OMP_CLAUSES (t), diagnose_sb_1, wi, NULL);
4224 /* The minimal context here is just a tree of statements. */
4225 inner_context = tree_cons (NULL, t, context);
4226 wi->info = inner_context;
4227 walk_stmts (wi, &OMP_BODY (t));
4232 walk_tree (&OMP_FOR_CLAUSES (t), diagnose_sb_1, wi, NULL);
4233 inner_context = tree_cons (NULL, t, context);
4234 wi->info = inner_context;
4235 walk_tree (&OMP_FOR_INIT (t), diagnose_sb_1, wi, NULL);
4236 walk_tree (&OMP_FOR_COND (t), diagnose_sb_1, wi, NULL);
4237 walk_tree (&OMP_FOR_INCR (t), diagnose_sb_1, wi, NULL);
4238 walk_stmts (wi, &OMP_FOR_PRE_BODY (t));
4239 walk_stmts (wi, &OMP_FOR_BODY (t));
4244 splay_tree_insert (all_labels, (splay_tree_key) LABEL_EXPR_LABEL (t),
4245 (splay_tree_value) context);
4255 /* Pass 2: Check each branch and see if its context differs from that of
4256 the destination label's context. */
4259 diagnose_sb_2 (tree *tp, int *walk_subtrees, void *data)
4261 struct walk_stmt_info *wi = data;
4262 tree context = (tree) wi->info;
4267 switch (TREE_CODE (t))
4272 walk_tree (&OMP_CLAUSES (t), diagnose_sb_2, wi, NULL);
4279 walk_stmts (wi, &OMP_BODY (t));
4284 walk_tree (&OMP_FOR_CLAUSES (t), diagnose_sb_2, wi, NULL);
4286 walk_tree (&OMP_FOR_INIT (t), diagnose_sb_2, wi, NULL);
4287 walk_tree (&OMP_FOR_COND (t), diagnose_sb_2, wi, NULL);
4288 walk_tree (&OMP_FOR_INCR (t), diagnose_sb_2, wi, NULL);
4289 walk_stmts (wi, &OMP_FOR_PRE_BODY (t));
4290 walk_stmts (wi, &OMP_FOR_BODY (t));
4296 tree lab = GOTO_DESTINATION (t);
4297 if (TREE_CODE (lab) != LABEL_DECL)
4300 n = splay_tree_lookup (all_labels, (splay_tree_key) lab);
4301 diagnose_sb_0 (tp, context, n ? (tree) n->value : NULL_TREE);
4307 tree vec = SWITCH_LABELS (t);
4308 int i, len = TREE_VEC_LENGTH (vec);
4309 for (i = 0; i < len; ++i)
4311 tree lab = CASE_LABEL (TREE_VEC_ELT (vec, i));
4312 n = splay_tree_lookup (all_labels, (splay_tree_key) lab);
4313 if (diagnose_sb_0 (tp, context, (tree) n->value))
4320 diagnose_sb_0 (tp, context, NULL_TREE);
4331 diagnose_omp_structured_block_errors (tree fndecl)
4333 tree save_current = current_function_decl;
4334 struct walk_stmt_info wi;
4336 current_function_decl = fndecl;
4338 all_labels = splay_tree_new (splay_tree_compare_pointers, 0, 0);
4340 memset (&wi, 0, sizeof (wi));
4341 wi.callback = diagnose_sb_1;
4342 walk_stmts (&wi, &DECL_SAVED_TREE (fndecl));
4344 memset (&wi, 0, sizeof (wi));
4345 wi.callback = diagnose_sb_2;
4346 wi.want_locations = true;
4347 wi.want_return_expr = true;
4348 walk_stmts (&wi, &DECL_SAVED_TREE (fndecl));
4350 splay_tree_delete (all_labels);
4353 current_function_decl = save_current;
4356 #include "gt-omp-low.h"