#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "tree.h"
#include "rtl.h"
#include "gimple.h"
#include "tree-iterator.h"
#include "tree-inline.h"
#include "langhooks.h"
#include "diagnostic-core.h"
#include "gimple-ssa.h"
#include "cgraph.h"
#include "tree-cfg.h"
#include "tree-phinodes.h"
#include "ssa-iterators.h"
#include "tree-ssanames.h"
#include "tree-into-ssa.h"
#include "tree-dfa.h"
#include "tree-ssa.h"
#include "flags.h"
#include "function.h"
#include "expr.h"
#include "tree-pass.h"
#include "ggc.h"
#include "except.h"
#include "splay-tree.h"
#include "optabs.h"
#include "cfgloop.h"
#include "target.h"
#include "omp-low.h"
#include "gimple-low.h"
#include "tree-cfgcleanup.h"
#include "gt-omp-low.h"

Include dependency graph for omp-low.c:

Data Structures
struct	omp_region
struct	omp_context
struct	omp_for_data_loop
struct	omp_for_data
struct	omp_taskcopy_context

Macros
#define	WALK_SUBSTMTS

Typedefs
typedef struct omp_context	omp_context

Functions
static void	scan_omp (gimple_seq , omp_context )
static tree	scan_omp_1_op (tree , int , void *)
static tree	scan_omp_op ()
static void	lower_omp (gimple_seq , omp_context )
static tree	lookup_decl_in_outer_ctx (tree, omp_context *)
static tree	maybe_lookup_decl_in_outer_ctx (tree, omp_context *)
tree	find_omp_clause ()
static bool	is_parallel_ctx ()
static bool	is_task_ctx ()
static bool	is_taskreg_ctx ()
static bool	is_combined_parallel ()
static void	extract_omp_for_data (gimple for_stmt, struct omp_for_data fd, struct omp_for_data_loop loops)
static bool	workshare_safe_to_combine_p ()
static vec< tree, va_gc > *	get_ws_args_for ()
static void	determine_parallel_type ()
static bool	is_variable_sized ()
static bool	is_reference ()
static tree	lookup_decl ()
static tree	maybe_lookup_decl ()
static tree	lookup_field ()
static tree	lookup_sfield ()
static tree	maybe_lookup_field ()
static bool	use_pointer_for_field ()
static tree	omp_copy_decl_2 ()
static tree	omp_copy_decl_1 ()
static tree	omp_build_component_ref ()
static tree	build_receiver_ref ()
static tree	build_outer_var_ref ()
static tree	build_sender_ref ()
static void	install_var_field ()
static tree	install_var_local ()
static void	fixup_remapped_decl ()
static tree	omp_copy_decl ()
void	dump_omp_region (FILE , struct omp_region , int)
void	debug_omp_region (struct omp_region *)
void	debug_all_omp_regions (void)
void	dump_omp_region ()
DEBUG_FUNCTION void	debug_omp_region ()
static struct omp_region *	new_omp_region (basic_block bb, enum gimple_code type, struct omp_region *parent)
static void	free_omp_region_1 ()
void	free_omp_regions ()
static omp_context *	new_omp_context ()
static gimple_seq	maybe_catch_exception (gimple_seq)
static void	finalize_task_copyfn ()
static void	delete_omp_context ()
static void	fixup_child_record_type ()
static void	scan_sharing_clauses ()
static tree	create_omp_child_function_name ()
static void	create_omp_child_function ()
static tree	find_combined_for (gimple_stmt_iterator gsi_p, bool handled_ops_p, struct walk_stmt_info *wi)
static void	scan_omp_parallel ()
static void	scan_omp_task ()
static void	scan_omp_for ()
static void	scan_omp_sections ()
static void	scan_omp_single ()
static void	scan_omp_target ()
static void	scan_omp_teams ()
static bool	check_omp_nesting_restrictions ()
static tree	scan_omp_1_op ()
static tree	scan_omp_1_stmt (gimple_stmt_iterator gsi, bool handled_ops_p, struct walk_stmt_info *wi)
static void	scan_omp ()
static gimple	build_omp_barrier ()
static omp_context *	maybe_lookup_ctx ()
static tree	lookup_decl_in_outer_ctx ()
static tree	maybe_lookup_decl_in_outer_ctx ()
tree	omp_reduction_init ()
static tree	omp_clause_aligned_alignment ()
static int	omp_max_vf ()
static bool	lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, int &max_vf, tree &idx, tree &lane, tree &ivar, tree &lvar)
static void	lower_rec_input_clauses (tree clauses, gimple_seq ilist, gimple_seq dlist, omp_context ctx, struct omp_for_data fd)
static void	lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq stmt_list, omp_context ctx)
static void	lower_reduction_clauses ()
static void	lower_copyprivate_clauses (tree clauses, gimple_seq slist, gimple_seq rlist, omp_context *ctx)
static void	lower_send_clauses (tree clauses, gimple_seq ilist, gimple_seq olist, omp_context *ctx)
static void	lower_send_shared_vars ()
static gimple	gimple_build_cond_empty ()
static void	expand_parallel_call (struct omp_region region, basic_block bb, gimple entry_stmt, vec< tree, va_gc > ws_args)
static void	expand_task_call ()
static gimple_seq	maybe_catch_exception ()
static tree	vec2chain ()
static void	remove_exit_barrier ()
static void	remove_exit_barriers ()
static void	optimize_omp_library_calls ()
static tree	expand_omp_regimplify_p ()
static void	expand_omp_build_assign ()
static void	expand_omp_taskreg ()
static void	expand_omp_for_init_counts (struct omp_for_data fd, gimple_stmt_iterator gsi, basic_block &entry_bb, tree *counts, basic_block &zero_iter_bb, int &first_zero_iter, basic_block &l2_dom_bb)
static void	expand_omp_for_init_vars (struct omp_for_data fd, gimple_stmt_iterator gsi, tree *counts, gimple inner_stmt, tree startvar)
static basic_block	extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb, basic_block body_bb)
static void	expand_omp_for_generic (struct omp_region region, struct omp_for_data fd, enum built_in_function start_fn, enum built_in_function next_fn, gimple inner_stmt)
static void	expand_omp_for_static_nochunk (struct omp_region region, struct omp_for_data fd, gimple inner_stmt)
static void	expand_omp_for_static_chunk (struct omp_region region, struct omp_for_data fd, gimple inner_stmt)
static void	expand_omp_simd ()
static void	expand_omp_for ()
static void	expand_omp_sections ()
static void	expand_omp_single ()
static void	expand_omp_synch ()
static bool	expand_omp_atomic_load (basic_block load_bb, tree addr, tree loaded_val, int index)
static bool	expand_omp_atomic_store (basic_block load_bb, tree addr, tree loaded_val, tree stored_val, int index)
static bool	expand_omp_atomic_fetch_op (basic_block load_bb, tree addr, tree loaded_val, tree stored_val, int index)
static bool	expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb, tree addr, tree loaded_val, tree stored_val, int index)
static bool	expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb, tree addr, tree loaded_val, tree stored_val)
static void	expand_omp_atomic ()
static void	expand_omp_target ()
static void	expand_omp ()
static void	build_omp_regions_1 (basic_block bb, struct omp_region *parent, bool single_tree)
static void	build_omp_regions_root ()
void	omp_expand_local ()
static void	build_omp_regions ()
static unsigned int	execute_expand_omp ()
static bool	gate_expand_omp ()
gimple_opt_pass *	make_pass_expand_omp ()
static void	maybe_add_implicit_barrier_cancel ()
static void	lower_omp_sections ()
static void	lower_omp_single_simple ()
static void	lower_omp_single_copy ()
static void	lower_omp_single ()
static void	lower_omp_master ()
static void	lower_omp_taskgroup ()
static void	lower_omp_ordered ()
static void	lower_omp_critical ()
static void	lower_omp_for_lastprivate (struct omp_for_data fd, gimple_seq body_p, gimple_seq dlist, struct omp_context ctx)
static void	lower_omp_for ()
static tree	check_combined_parallel (gimple_stmt_iterator gsi_p, bool handled_ops_p, struct walk_stmt_info *wi)
static tree	task_copyfn_copy_decl ()
static tree	task_copyfn_remap_type ()
static void	create_task_copyfn ()
static void	lower_depend_clauses ()
static void	lower_omp_taskreg ()
static void	lower_omp_target ()
static void	lower_omp_teams ()
static tree	lower_omp_regimplify_p (tree tp, int walk_subtrees, void *data)
static void	lower_omp_1 ()
static void	lower_omp ()
static unsigned int	execute_lower_omp ()
gimple_opt_pass *	make_pass_lower_omp ()
static bool	diagnose_sb_0 (gimple_stmt_iterator *gsi_p, gimple branch_ctx, gimple label_ctx)
static tree	diagnose_sb_1 (gimple_stmt_iterator gsi_p, bool handled_ops_p, struct walk_stmt_info *wi)
static tree	diagnose_sb_2 (gimple_stmt_iterator gsi_p, bool handled_ops_p, struct walk_stmt_info *wi)
bool	make_gimple_omp_edges ()
static unsigned int	diagnose_omp_structured_block_errors ()
static bool	gate_diagnose_omp_blocks ()
gimple_opt_pass *	make_pass_diagnose_omp_blocks ()

Variables
static splay_tree	all_contexts
static int	taskreg_nesting_level
static int	target_nesting_level
static struct omp_region *	root_omp_region
static bitmap	task_shared_vars
static unsigned int	tmp_ompfn_id_num
static splay_tree	critical_name_mutexes
static splay_tree	all_labels

Macro Definition Documentation

#define WALK_SUBSTMTS

Value:

case GIMPLE_BIND: \
    case GIMPLE_TRY: \
    case GIMPLE_CATCH: \
    case GIMPLE_EH_FILTER: \
    case GIMPLE_TRANSACTION: \ \
      *handled_ops_p = false; \
      break;

Referenced by create_omp_child_function().

Typedef Documentation

typedef struct omp_context omp_context

Context structure. Used to store information about each parallel directive in the code.

Function Documentation

static gimple build_omp_barrier ( )

static

Re-gimplification and code generation routines. Build a call to GOMP_barrier.

static void build_omp_regions ( )

static

Scan the CFG and build a tree of OMP regions. Return the root of the OMP region tree.

static void build_omp_regions_1	(	basic_block	bb,
		struct omp_region *	parent,
		bool	single_tree
	)

static

Helper for build_omp_regions. Scan the dominator tree starting at block BB. PARENT is the region that contains BB. If SINGLE_TREE is true, the function ends once a single tree is built (otherwise, whole forest of OMP constructs may be built).

         STMT is the return point out of region PARENT.  Mark it
         as the exit point and make PARENT the immediately
         enclosing region.

         GIMPLE_OMP_ATOMIC_STORE is analoguous to
         GIMPLE_OMP_RETURN, but matches with
         GIMPLE_OMP_ATOMIC_LOAD.

         GIMPLE_OMP_SECTIONS_SWITCH is part of
         GIMPLE_OMP_SECTIONS, and we do nothing for it.

         Otherwise, this directive becomes the parent for a new
         region.

static void build_omp_regions_root ( )

static

Builds the tree of OMP regions rooted at ROOT, storing it to root_omp_region.

static tree build_outer_var_ref ( )

static

Build tree nodes to access VAR in the scope outer to CTX. In the case of a parallel, this is a component reference; for workshare constructs this is some variable.

#pragma omp simd isn't a worksharing construct, and can reference even private vars in its linear etc. clauses.

   This can happen with orphaned constructs.  If var is reference, it is
   possible it is shared and as such valid.

Referenced by omp_copy_decl_2().

static tree build_receiver_ref ( )

static

Build tree nodes to access the field for VAR on the receiver side.

If the receiver record type was remapped in the child function, remap the field into the new record type.

static tree build_sender_ref ( )

static

Build tree nodes to access the field for VAR on the sender side.

References build_decl, DECL_ABSTRACT_ORIGIN, DECL_NAME, DECL_SOURCE_LOCATION, lang_hooks_for_types::make_type, omp_context::record_type, omp_context::sfield_map, omp_context::srecord_type, TREE_CHAIN, TREE_TYPE, TYPE_FIELDS, and lang_hooks::types.

Referenced by lower_reduction_clauses().

static tree check_combined_parallel	(	gimple_stmt_iterator *	gsi_p,
		bool *	handled_ops_p,
		struct walk_stmt_info *	wi
	)

static

Callback for walk_stmts. Check if the current statement only contains GIMPLE_OMP_FOR or GIMPLE_OMP_PARALLEL.

static bool check_omp_nesting_restrictions ( )

static

Check OpenMP nesting restrictions.

FALLTHRU

     FALLTHRU

static void create_omp_child_function ( )

static

Build a decl for the omp child function. It'll not contain a body yet, just the bare decl.

Allocate memory for the function structure. The call to allocate_struct_function clobbers CFUN, so we need to restore it afterward.

References GF_OMP_FOR_KIND_FOR, gimple_omp_for_combined_into_p(), gimple_omp_for_kind(), gsi_stmt(), walk_stmt_info::info, integer_zero_node, NULL, and WALK_SUBSTMTS.

static tree create_omp_child_function_name ( )

static

static void create_task_copyfn ( )

static

Create task copyfn.

 Reset DECL_CONTEXT on function arguments.

 Populate the function.

 Remap src and dst argument types if needed.

 First pass: initialize temporaries used in record_type and srecord_type
 sizes and field offsets.

 Second pass: copy shared var pointers and copy construct non-VLA
 firstprivate vars.

 Last pass: handle VLA firstprivates.

DEBUG_FUNCTION void debug_all_omp_regions ( void )

References all_contexts.

void debug_omp_region ( struct omp_region * )

DEBUG_FUNCTION void debug_omp_region ( )

static void delete_omp_context ( )

static

Destroy a omp_context data structures. Called through the splay tree value delete callback.

We hijacked DECL_ABSTRACT_ORIGIN earlier. We need to clear it before it produces corrupt debug information.

static void determine_parallel_type ( )

static

Discover whether REGION is a combined parallel+workshare region.

 We only support parallel+for and parallel+sections.

 Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
 WS_EXIT_BB -> PAR_EXIT_BB.

         If this is a combined parallel loop, we need to determine
         whether or not to use the combined library calls.  There
         are two cases where we do not apply the transformation:
         static loops and any kind of ordered loop.  In the first
         case, we already open code the loop so there is no need
         to do anything else.  In the latter case, the combined
         parallel loop call would still need extra synchronization
         to implement ordered semantics, so there would not be any
         gain in using the combined call.

static unsigned int diagnose_omp_structured_block_errors ( )

static

static bool diagnose_sb_0	(	gimple_stmt_iterator *	gsi_p,
		gimple	branch_ctx,
		gimple	label_ctx
	)

static

Check for mismatched contexts and generate an error if needed. Return true if an error is detected.

If it's obvious we have an invalid entry, be specific about the error.

   Otherwise, be vague and lazy, but efficient.

static tree diagnose_sb_1	(	gimple_stmt_iterator *	gsi_p,
		bool *	handled_ops_p,
		struct walk_stmt_info *	wi
	)

static

Pass 1: Create a minimal tree of OpenMP structured blocks, and record where each label is found.

The minimal context here is just the current OMP construct.

     gimple_omp_for_{index,initial,final} are all DECLs; no need to
     walk them.

static tree diagnose_sb_2	(	gimple_stmt_iterator *	gsi_p,
		bool *	handled_ops_p,
		struct walk_stmt_info *	wi
	)

static

Pass 2: Check each branch and see if its context differs from that of the destination label's context.

gimple_omp_for_{index,initial,final} are all DECLs; no need to walk them.

void dump_omp_region	(	FILE *	,
		struct omp_region *	,
		int
	)

Return the parallel region associated with STMT. Debugging dumps for parallel regions.

Referenced by omp_copy_decl().

void dump_omp_region ( )

Dump the parallel region tree rooted at REGION.

References free_omp_region_1(), omp_region::inner, and omp_region::next.

static unsigned int execute_expand_omp ( )

static

Main entry point for expanding OMP-GIMPLE into runtime calls.

static unsigned int execute_lower_omp ( )

static

Main entry point.

This pass always runs, to provide PROP_gimple_lomp. But there is nothing to do unless -fopenmp is given.

static void expand_omp ( )

static

Expand the parallel region tree rooted at REGION. Expansion proceeds in depth-first order. Innermost regions are expanded first. This way, parallel regions that require a new function to be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any internal dependencies in their body.

First, determine whether this is a combined parallel+workshare region.

         Individual omp sections are handled together with their
         parent GIMPLE_OMP_SECTIONS region.

static void expand_omp_atomic ( )

static

Expand an GIMPLE_OMP_ATOMIC statement. We try to expand using expand_omp_atomic_fetch_op. If it failed, we try to call expand_omp_atomic_pipeline, and if it fails too, the ultimate fallback is wrapping the operation in a mutex (expand_omp_atomic_mutex). REGION is the atomic region built by build_omp_regions_1().

 Make sure the type is one of the supported sizes.

     __sync builtins require strict data alignment.

         Atomic load.

         Atomic store.

         When possible, use specialized atomic update functions.

         If we don't have specialized __sync builtins, try and implement
         as a compare and swap loop.

 The ultimate fallback is wrapping the operation in a mutex.

static bool expand_omp_atomic_fetch_op	(	basic_block	load_bb,
		tree	addr,
		tree	loaded_val,
		tree	stored_val,
		int	index
	)

static

A subroutine of expand_omp_atomic. Attempt to implement the atomic operation as a __atomic_fetch_op builtin. INDEX is log2 of the size of the data type, and thus usable to find the index of the builtin decl. Returns false if the expression is not of the proper form.

 We expect to find the following sequences:

load_bb: GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)

store_bb: val = tmp OP something; (or: something OP tmp) GIMPLE_OMP_STORE (val)

???FIXME: Allow a more flexible sequence. Perhaps use data flow to pick the statements.

 Check for one of the supported fetch-op operations.

 Make sure the expression is of the proper form.

 We could test all of the various optabs involved, but the fact of the
 matter is that (with the exception of i486 vs i586 and xadd) all targets
 that support any atomic operaton optab also implements compare-and-swap.
 Let optabs.c take care of expanding any compare-and-swap loop.

 OpenMP does not imply any barrier-like semantics on its atomic ops.
 It only requires that the operation happen atomically.  Thus we can
 use the RELAXED memory model.

static bool expand_omp_atomic_load	(	basic_block	load_bb,
		tree	addr,
		tree	loaded_val,
		int	index
	)

static

A subroutine of expand_omp_atomic. Attempt to implement the atomic operation as a normal volatile load.

??? If the target does not implement atomic_load_optab[mode], and mode is smaller than word size, then expand_atomic_load assumes that the load is atomic. We could avoid the builtin entirely in this case.

References build_pointer_type_for_mode(), builtin_decl_explicit(), can_compare_and_swap_p(), create_tmp_reg(), create_tmp_var, fold_convert, force_gimple_operand_gsi(), gcc_assert, gimple_build_assign, gsi_insert_before(), gsi_last_bb(), GSI_SAME_STMT, gsi_stmt(), INTEGRAL_TYPE_P, NULL, NULL_TREE, POINTER_TYPE_P, ptr_mode, si, single_succ(), TREE_TYPE, type(), TYPE_MAIN_VARIANT, and TYPE_MODE.

static bool expand_omp_atomic_mutex	(	basic_block	load_bb,
		basic_block	store_bb,
		tree	addr,
		tree	loaded_val,
		tree	stored_val
	)

static

A subroutine of expand_omp_atomic. Implement the atomic operation as:

                           GOMP_atomic_start ();
                           *addr = rhs;
                           GOMP_atomic_end ();

The result is not globally atomic, but works so long as all parallel references are within #pragma omp atomic directives. According to responses received from omp@o.nosp@m.penm.nosp@m.p.org, appears to be within spec. Which makes sense, since that's how several other compilers handle this situation as well. LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're expanding. STORED_VAL is the operand of the matching GIMPLE_OMP_ATOMIC_STORE.

We replace GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with loaded_val = *addr;

and replace GIMPLE_OMP_ATOMIC_STORE (stored_val) with *addr = stored_val;

References OMP_CLAUSE_DEVICE_ID, and OMP_CLAUSE_LOCATION.

static bool expand_omp_atomic_pipeline	(	basic_block	load_bb,
		basic_block	store_bb,
		tree	addr,
		tree	loaded_val,
		tree	stored_val,
		int	index
	)

static

A subroutine of expand_omp_atomic. Implement the atomic operation as:

oldval = *addr; repeat: newval = rhs; // with oldval replacing *addr in rhs oldval = __sync_val_compare_and_swap (addr, oldval, newval); if (oldval != newval) goto repeat;

INDEX is log2 of the size of the data type, and thus usable to find the index of the builtin decl.

 ??? We need a non-pointer interface to __atomic_compare_exchange in
 order to use the RELAXED memory model effectively.

 Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.

 For floating-point values, we'll need to view-convert them to integers
 so that we can perform the atomic compare and swap.  Simplify the
 following code by always setting up the "i"ntegral variables.

 Move the value to the LOADEDI temporary.

 Build the compare&swap statement.

 Note that we always perform the comparison as an integer, even for
 floating point.  This allows the atomic operation to properly
 succeed even with NaNs and -0.0.

 Update cfg.

 Copy the new value to loadedi (we already did that before the condition
 if we are not in SSA).

 Remove GIMPLE_OMP_ATOMIC_STORE.

static bool expand_omp_atomic_store	(	basic_block	load_bb,
		tree	addr,
		tree	loaded_val,
		tree	stored_val,
		int	index
	)

static

A subroutine of expand_omp_atomic. Attempt to implement the atomic operation as a normal volatile store.

 If the load value is needed, then this isn't a store but an exchange.

 ??? If the target does not implement atomic_store_optab[mode], and mode
 is smaller than word size, then expand_atomic_store assumes that the store
 is atomic.  We could avoid the builtin entirely in this case.

 Dig out the type of the function's second argument.

 Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above.

static void expand_omp_build_assign ( )

static

Prepend TO = FROM assignment before *GSI_P.

static void expand_omp_for ( )

static

Expand the OpenMP loop defined by REGION.

If there isn't a continue then this is a degerate case where the introduction of abnormal edges during lowering will prevent original loops from being detected. Fix that up.

static void expand_omp_for_generic	(	struct omp_region *	region,
		struct omp_for_data *	fd,
		enum built_in_function	start_fn,
		enum built_in_function	next_fn,
		gimple	inner_stmt
	)

static

A subroutine of expand_omp_for. Generate code for a parallel loop with any schedule. Given parameters:

 for (V = N1; V cond N2; V += STEP) BODY;

where COND is "<" or ">", we generate pseudocode

 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
 if (more) goto L0; else goto L3;

L0: V = istart0; iend = iend0; L1: BODY; V += STEP; if (V cond iend) goto L1; else goto L2; L2: if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; L3:

If this is a combined omp parallel loop, instead of the call to GOMP_loop_foo_start, we call GOMP_loop_foo_next. If this is gimple_omp_for_combined_p loop, then instead of assigning V and iend in L0 we assign the first two looptemp clause decls of the inner GIMPLE_OMP_FOR and V += STEP; and if (V cond iend) goto L1; else goto L2; are removed.

For collapsed loops, given parameters: collapse(3) for (V1 = N11; V1 cond1 N12; V1 += STEP1) for (V2 = N21; V2 cond2 N22; V2 += STEP2) for (V3 = N31; V3 cond3 N32; V3 += STEP3) BODY;

we generate pseudocode

if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
if (cond3 is <)
  adj = STEP3 - 1;
else
  adj = STEP3 + 1;
count3 = (adj + N32 - N31) / STEP3;
if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
if (cond2 is <)
  adj = STEP2 - 1;
else
  adj = STEP2 + 1;
count2 = (adj + N22 - N21) / STEP2;
if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
if (cond1 is <)
  adj = STEP1 - 1;
else
  adj = STEP1 + 1;
count1 = (adj + N12 - N11) / STEP1;
count = count1 * count2 * count3;
goto Z1;

Z0: count = 0; Z1: more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0); if (more) goto L0; else goto L3; L0: V = istart0; T = V; V3 = N31 + (T % count3) * STEP3; T = T / count3; V2 = N21 + (T % count2) * STEP2; T = T / count2; V1 = N11 + T * STEP1; iend = iend0; L1: BODY; V += 1; if (V < iend) goto L10; else goto L2; L10: V3 += STEP3; if (V3 cond3 N32) goto L1; else goto L11; L11: V3 = N31; V2 += STEP2; if (V2 cond2 N22) goto L1; else goto L12; L12: V2 = N21; V1 += STEP1; goto L1; L2: if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; L3:

 See if we need to bias by LLONG_MIN.

         Some counts[i] vars might be uninitialized if
         some loop has zero iterations.  But the body shouldn't
         be executed in that case, so just avoid uninit warnings.

     In a combined parallel loop, emit a call to
     GOMP_loop_foo_next.

     If this is not a combined parallel loop, emit a call to
     GOMP_loop_foo_start in ENTRY_BB.

         Avoid casting pointers to integer of a different size.

         The GOMP_loop_ull_*start functions have additional boolean
         argument, true for < loops and false for > loops.
         In Fortran, the C bool type can be different from
         boolean_type_node.

 Remove the GIMPLE_OMP_FOR statement.

 Iteration setup for sequential loop goes in L0_BB.

     Code to control the increment and predicate for the sequential
     loop goes in the CONT_BB.

     Remove GIMPLE_OMP_CONTINUE.

     Emit code to get the next parallel iteration in L2_BB.

 Add the loop cleanup function.

 Connect the new blocks.

         The loop may have multiple latches.

static void expand_omp_for_init_counts	(	struct omp_for_data *	fd,
		gimple_stmt_iterator *	gsi,
		basic_block &	entry_bb,
		tree *	counts,
		basic_block &	zero_iter_bb,
		int &	first_zero_iter,
		basic_block &	l2_dom_bb
	)

static

Helper function for expand_omp_{for_*,simd}. If this is the outermost of the combined collapse > 1 loop constructs, generate code like: if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB; if (cond3 is <) adj = STEP3 - 1; else adj = STEP3 + 1; count3 = (adj + N32 - N31) / STEP3; if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB; if (cond2 is <) adj = STEP2 - 1; else adj = STEP2 + 1; count2 = (adj + N22 - N21) / STEP2; if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB; if (cond1 is <) adj = STEP1 - 1; else adj = STEP1 + 1; count1 = (adj + N12 - N11) / STEP1; count = count1 * count2 * count3; Furthermore, if ZERO_ITER_BB is NULL, create a BB which does: count = 0; and set ZERO_ITER_BB to that bb. If this isn't the outermost of the combined loop constructs, just initialize COUNTS array from the looptemp clauses. NOTE: It could be better to moosh all of the BBs together, creating one larger BB with all the computation and the unexpected jump at the end. I.e.

bool zero3, zero2, zero1, zero;

zero3 = N32 c3 N31; count3 = (N32 - N31) /[cl] STEP3; zero2 = N22 c2 N21; count2 = (N22 - N21) /[cl] STEP2; zero1 = N12 c1 N11; count1 = (N12 - N11) /[cl] STEP1; zero = zero3 || zero2 || zero1; count = count1 * count2 * count3; if (__builtin_expect(zero, false)) goto zero_iter_bb;

After all, we expect the zero=false, and thus we expect to have to evaluate all of the comparison expressions, so short-circuiting oughtn't be a win. Since the condition isn't protecting a denominator, we're not concerned about divide-by-zero, so we can fully evaluate count even if a numerator turned out to be wrong.

It seems like putting this all together would create much better scheduling opportunities, and less pressure on the chip's branch predictor.

 Collapsed loops need work for expansion into SSA form.

     First two _looptemp_ clauses are for istart/iend, counts[0]
     isn't supposed to be handled, as the inner loop doesn't
     use it.

     ?? We could probably use CEIL_DIV_EXPR instead of
     TRUNC_DIV_EXPR and adjusting by hand.  Unless we can't
     generate the same code in the end because generically we
     don't know that the values involved must be negative for
     GT??

static void expand_omp_for_init_vars	(	struct omp_for_data *	fd,
		gimple_stmt_iterator *	gsi,
		tree *	counts,
		gimple	inner_stmt,
		tree	startvar
	)

static

Helper function for expand_omp_{for_*,simd}. Generate code like: T = V; V3 = N31 + (T % count3) * STEP3; T = T / count3; V2 = N21 + (T % count2) * STEP2; T = T / count2; V1 = N11 + T * STEP1; if this loop doesn't have an inner loop construct combined with it. If it does have an inner loop construct combined with it and the iteration count isn't known constant, store values from counts array into its looptemp temporaries instead.

If fd->loop.n2 is constant, then no propagation of the counts is needed, they are constant.

     First two _looptemp_ clauses are for istart/iend, counts[0]
     isn't supposed to be handled, as the inner loop doesn't
     use it.

References omp_for_data_loop::cond_code, omp_region::cont, create_tmp_var, fold_build2, gcc_assert, is_combined_parallel(), omp_for_data::iter_type, long_integer_type_node, long_long_unsigned_type_node, omp_for_data::loop, omp_for_data_loop::n1, omp_for_data_loop::n2, NULL, NULL_TREE, omp_for_data_loop::step, TREE_ADDRESSABLE, TREE_CODE, TREE_TYPE, type(), TYPE_UNSIGNED, and omp_for_data_loop::v.

static void expand_omp_for_static_chunk	(	struct omp_region *	region,
		struct omp_for_data *	fd,
		gimple	inner_stmt
	)

static

A subroutine of expand_omp_for. Generate code for a parallel loop with static schedule and a specified chunk size. Given parameters:

 for (V = N1; V cond N2; V += STEP) BODY;

where COND is "<" or ">", we generate pseudocode

 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
 if (cond is <)
   adj = STEP - 1;
 else
   adj = STEP + 1;
 if ((__typeof (V)) -1 > 0 && cond is >)
   n = -(adj + N2 - N1) / -STEP;
 else
   n = (adj + N2 - N1) / STEP;
 trip = 0;
 V = threadid * CHUNK * STEP + N1;  &ndash; this extra definition of V is
                                       here so that V is defined
                                       if the loop is not entered

L0: s0 = (trip * nthreads + threadid) * CHUNK; e0 = min(s0 + CHUNK, n); if (s0 < n) goto L1; else goto L4; L1: V = s0 * STEP + N1; e = e0 * STEP + N1; L2: BODY; V += STEP; if (V cond e) goto L2; else goto L3; L3: trip += 1; goto L0; L4:

 Trip and adjustment setup goes in ENTRY_BB.

 Remove the GIMPLE_OMP_FOR.

 Iteration space partitioning goes in ITER_PART_BB.

 Setup code for sequential iteration goes in SEQ_START_BB.

     The code controlling the sequential loop goes in CONT_BB,
     replacing the GIMPLE_OMP_CONTINUE.

     Remove GIMPLE_OMP_CONTINUE.

     Trip update code goes into TRIP_UPDATE_BB.

 Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.

 Connect the new blocks.

     When we redirect the edge from trip_update_bb to iter_part_bb, we
     remove arguments of the phi nodes in fin_bb.  We need to create
     appropriate phi nodes in iter_part_bb instead.

         A special case &ndash; fd->loop.v is not yet computed in
         iter_part_bb, we need to use v_extra instead.

     Make phi node for trip.

static void expand_omp_for_static_nochunk	(	struct omp_region *	region,
		struct omp_for_data *	fd,
		gimple	inner_stmt
	)

static

A subroutine of expand_omp_for. Generate code for a parallel loop with static schedule and no specified chunk size. Given parameters:

 for (V = N1; V cond N2; V += STEP) BODY;

where COND is "<" or ">", we generate pseudocode

 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
 if (cond is <)
   adj = STEP - 1;
 else
   adj = STEP + 1;
 if ((__typeof (V)) -1 > 0 && cond is >)
   n = -(adj + N2 - N1) / -STEP;
 else
   n = (adj + N2 - N1) / STEP;
 q = n / nthreads;
 tt = n % nthreads;
 if (threadid < tt) goto L3; else goto L4;

L3: tt = 0; q = q + 1; L4: s0 = q * threadid + tt; e0 = s0 + q; V = s0 * STEP + N1; if (s0 >= e0) goto L2; else goto L0; L0: e = e0 * STEP + N1; L1: BODY; V += STEP; if (V cond e) goto L1; L2:

 Iteration space partitioning goes in ENTRY_BB.

 Remove the GIMPLE_OMP_FOR statement.

 Setup code for sequential iteration goes in SEQ_START_BB.

     The code controlling the sequential loop replaces the
     GIMPLE_OMP_CONTINUE.

     Remove the GIMPLE_OMP_CONTINUE statement.

 Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.

 Connect all the blocks.

static tree expand_omp_regimplify_p ( )

static

Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be regimplified.

Any variable with DECL_VALUE_EXPR needs to be regimplified.

static void expand_omp_sections ( )

static

Expand code for an OpenMP sections directive. In pseudo code, we generate

 v = GOMP_sections_start (n);

L0: switch (v) { case 0: goto L2; case 1: section 1; goto L1; case 2: ... case n: ... default: abort (); } L1: v = GOMP_sections_next (); goto L0; L2: reduction;

If this is a combined parallel sections, replace the call to GOMP_sections_start with call to GOMP_sections_next.

This can happen if there are reductions.

 We will build a switch() with enough cases for all the
 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
 and a default case to abort if something goes wrong.   


 Use vec::quick_push on label_vec throughout, since we know the size
 in advance.

 The call to GOMP_sections_start goes in ENTRY_BB, replacing the
 GIMPLE_OMP_SECTIONS statement.

     If we are not inside a combined parallel+sections region,
     call GOMP_sections_start.

     Otherwise, call GOMP_sections_next.

 The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
 L0_BB.

 Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR.

     Skip optional reduction region.

 Error handling code goes in DEFAULT_BB.

     Code to get the next section goes in L1_BB.

 Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB.

References build2_loc, build_call_expr_loc(), build_int_cst(), builtin_decl_explicit(), cfun, fold_build1_loc, force_gimple_operand_gsi(), gcc_assert, gimple_in_ssa_p(), gimple_location(), gimple_omp_atomic_seq_cst_p(), gsi_last_bb(), gsi_remove(), GSI_SAME_STMT, gsi_stmt(), MEMMODEL_RELAXED, MEMMODEL_SEQ_CST, NULL, NULL_TREE, single_succ(), TODO_update_ssa_no_phi, TREE_TYPE, type(), update_ssa(), useless_type_conversion_p(), and void_type_node.

static void expand_omp_simd ( )

static

A subroutine of expand_omp_for. Generate code for a simd non-worksharing loop. Given parameters:

 for (V = N1; V cond N2; V += STEP) BODY;

where COND is "<" or ">", we generate pseudocode

 V = N1;
 goto L1;

L0: BODY; V += STEP; L1: if (V cond N2) goto L0; else goto L2; L2:

For collapsed loops, given parameters: collapse(3) for (V1 = N11; V1 cond1 N12; V1 += STEP1) for (V2 = N21; V2 cond2 N22; V2 += STEP2) for (V3 = N31; V3 cond3 N32; V3 += STEP3) BODY;

we generate pseudocode

if (cond3 is <)
  adj = STEP3 - 1;
else
  adj = STEP3 + 1;
count3 = (adj + N32 - N31) / STEP3;
if (cond2 is <)
  adj = STEP2 - 1;
else
  adj = STEP2 + 1;
count2 = (adj + N22 - N21) / STEP2;
if (cond1 is <)
  adj = STEP1 - 1;
else
  adj = STEP1 + 1;
count1 = (adj + N12 - N11) / STEP1;
count = count1 * count2 * count3;
V = 0;
V1 = N11;
V2 = N21;
V3 = N31;
goto L1;

L0: BODY; V += 1; V3 += STEP3; V2 += (V3 cond3 N32) ? 0 : STEP2; V3 = (V3 cond3 N32) ? V3 : N31; V1 += (V2 cond2 N22) ? 0 : STEP1; V2 = (V2 cond2 N22) ? V2 : N21; L1: if (V < count) goto L0; else goto L2; L2:

 Not needed in SSA form right now.

 Remove the GIMPLE_OMP_FOR statement.

     Code to control the increment goes in the CONT_BB.

     Remove GIMPLE_OMP_CONTINUE.

 Emit the condition in L1_BB.

 Remove GIMPLE_OMP_RETURN.

 Connect the new blocks.

     If not -fno-tree-loop-vectorize, hint that we want to vectorize
     the loop.

static void expand_omp_single ( )

static

Expand code for an OpenMP single directive. We've already expanded much of the code, here we simply place the GOMP_barrier call.

static void expand_omp_synch ( )

static

Generic expansion for OpenMP synchronization directives: master, ordered and critical. All we need to do here is remove the entry and exit markers for REGION.

static void expand_omp_target ( )

static

Expand the OpenMP target{, data, update} directive starting at REGION.

     If the target region needs data sent from the parent
     function, then the very first statement (except possible
     tree profile counter updates) of the parallel body
     is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
     &.OMP_DATA_O is passed as an argument to the child function,
     we need to replace it with the argument as seen by the child
     function.

     In most cases, this will end up being the identity assignment
     .OMP_DATA_I = .OMP_DATA_I.  However, if the parallel body had
     a function call that has been inlined, the original PARM_DECL
     .OMP_DATA_I may have been converted into a different local
     variable.  In which case, we need to keep the assignment.

                 We're ignoring the subcode because we're
                 effectively doing a STRIP_NOPS.

     Declare local variables needed in CHILD_CFUN.

     The gimplifier could record temporaries in target block
     rather than in containing function's local_decls chain,
     which would mean cgraph missed finalizing them.  Do it now.

     We'll create a CFG for child_fn, so no gimple body is needed.

     Reset DECL_CONTEXT on function arguments.

     Split ENTRY_BB at GIMPLE_OMP_TARGET,
     so that it can be moved to the child function.

     Convert GIMPLE_OMP_RETURN into a RETURN_EXPR.

     Move the target region into CHILD_CFUN.

     When the OMP expansion process cannot guarantee an up-to-date
     loop tree arrange for the child function to fixup loops.

     Remove non-local VAR_DECLs from child_cfun->local_decls list.

     Inform the callgraph about the new function.

     Fix the callgraph edges for child_cfun.  Those for cfun will be
     fixed in a following pass.

     Some EH regions might become dead, see PR34608.  If
     pass_cleanup_cfg isn't the first pass to happen with the
     new child, these dead EH edges might cause problems.
     Clean them up now.

 Emit a library call to launch the target region, or do data
 transfers.

 By default, the value of DEVICE is -1 (let runtime library choose)
 and there is no conditional.

 Ensure 'device' is of the correct type.

 If we found the clause 'if (cond)', build
 (cond ? device : -2).

 FIXME: This will be address of
 extern char __OPENMP_TARGET__[] __attribute__((visibility ("hidden")))
 symbol, as soon as the linker plugin is able to create it for us.

static void expand_omp_taskreg ( )

static

Expand the OpenMP parallel or task directive starting at REGION.

     Due to inlining, it may happen that we have already outlined
     the region, in which case all we need to do is make the
     sub-graph unreachable and emit the parallel call.

     If the parallel region needs data sent from the parent
     function, then the very first statement (except possible
     tree profile counter updates) of the parallel body
     is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
     &.OMP_DATA_O is passed as an argument to the child function,
     we need to replace it with the argument as seen by the child
     function.

     In most cases, this will end up being the identity assignment
     .OMP_DATA_I = .OMP_DATA_I.  However, if the parallel body had
     a function call that has been inlined, the original PARM_DECL
     .OMP_DATA_I may have been converted into a different local
     variable.  In which case, we need to keep the assignment.

                 We're ignore the subcode because we're
                 effectively doing a STRIP_NOPS.

                 ?? Is setting the subcode really necessary ??

             If we are in ssa form, we must load the value from the default
             definition of the argument.  That should not be defined now,
             since the argument is not used uninitialized.

             ?? Is setting the subcode really necessary ??

     Declare local variables needed in CHILD_CFUN.

     The gimplifier could record temporaries in parallel/task block
     rather than in containing function's local_decls chain,
     which would mean cgraph missed finalizing them.  Do it now.

     We'll create a CFG for child_fn, so no gimple body is needed.

     Reset DECL_CONTEXT on function arguments.

     Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
     so that it can be moved to the child function.

     Convert GIMPLE_OMP_RETURN into a RETURN_EXPR.

     Move the parallel region into CHILD_CFUN.

     When the OMP expansion process cannot guarantee an up-to-date
     loop tree arrange for the child function to fixup loops.

     Remove non-local VAR_DECLs from child_cfun->local_decls list.

     Inform the callgraph about the new function.

     Fix the callgraph edges for child_cfun.  Those for cfun will be
     fixed in a following pass.

     Some EH regions might become dead, see PR34608.  If
     pass_cleanup_cfg isn't the first pass to happen with the
     new child, these dead EH edges might cause problems.
     Clean them up now.

 Emit a library call to launch the children threads.

static void expand_parallel_call	(	struct omp_region *	region,
		basic_block	bb,
		gimple	entry_stmt,
		vec< tree, va_gc > *	ws_args
	)

static

Build the function calls to GOMP_parallel_start etc to actually generate the parallel operation. REGION is the parallel region being expanded. BB is the block where to insert the code. WS_ARGS will be set if this is a call to a combined parallel+workshare construct, it contains the list of additional arguments needed by the workshare construct.

 Determine what flavor of GOMP_parallel we will be
 emitting.

 By default, the value of NUM_THREADS is zero (selected at run time)
 and there is no conditional.

 Ensure 'val' is of the correct type.

 If we found the clause 'if (cond)', build either
 (cond != 0) or (cond ? val : 1u).

static void expand_task_call ( )

static

Build the function call to GOMP_task to actually generate the task operation. BB is the block where to insert the code.

References BLOCK_VARS, DECL_CHAIN, gimple_block(), and TREE_ADDRESSABLE.

static void extract_omp_for_data	(	gimple	for_stmt,
		struct omp_for_data *	fd,
		struct omp_for_data_loop *	loops
	)

static

Extract the header elements of parallel loop FOR_STMT and store them into *FD.

FIXME: for now map schedule(auto) to schedule(static). There should be analysis to determine whether all iterations are approximately the same amount of work (then schedule(static) is best) or if it varies (then schedule(dynamic,N) is better).

     We only need to compute a default chunk size for ordered
     static loops and dynamic loops.

Referenced by workshare_safe_to_combine_p().

static basic_block extract_omp_for_update_vars	(	struct omp_for_data *	fd,
		basic_block	cont_bb,
		basic_block	body_bb
	)

static

Helper function for expand_omp_for_*. Generate code like: L10: V3 += STEP3; if (V3 cond3 N32) goto BODY_BB; else goto L11; L11: V3 = N31; V2 += STEP2; if (V2 cond2 N22) goto BODY_BB; else goto L12; L12: V2 = N21; V1 += STEP1; goto BODY_BB;

static void finalize_task_copyfn ( )

static

Finalize task copyfn.

Inform the callgraph about the new function.

References build_decl, omp_context::cb, copy_node, copy_tree_body_r(), DECL_CHAIN, DECL_CONTEXT, DECL_FIELD_OFFSET, DECL_NAME, DECL_SIZE, DECL_SIZE_UNIT, DECL_SOURCE_LOCATION, omp_context::field_map, layout_type(), lang_hooks_for_types::make_type, nreverse(), NULL, omp_context::receiver_decl, omp_context::record_type, remap_type(), TREE_TYPE, type(), gdbhooks::TYPE_DECL, TYPE_FIELDS, TYPE_NAME, lang_hooks::types, and walk_tree.

Referenced by new_omp_context().

static tree find_combined_for	(	gimple_stmt_iterator *	gsi_p,
		bool *	handled_ops_p,
		struct walk_stmt_info *	wi
	)

static

Callback for walk_gimple_seq. Check if combined parallel contains gimple_omp_for_combined_into_p OMP_FOR.

References fixup_child_record_type(), layout_type(), and omp_context::record_type.

tree find_omp_clause ( )

Find an OpenMP clause of type KIND within CLAUSES.

References omp_context::stmt.

Referenced by lower_send_clauses(), scan_omp_teams(), and workshare_safe_to_combine_p().

static void fixup_child_record_type ( )

static

Fix up RECEIVER_DECL with a type that has been remapped to the child context.

??? It isn't sufficient to just call remap_type here, because variably_modified_type_p doesn't work the way we expect for record types. Testing each field for whether it needs remapping and creating a new record by hand works, however.

         Arrange to be able to look up the receiver field
         given the sender field.

Referenced by find_combined_for().

static void fixup_remapped_decl ( )

static

Adjust the replacement for DECL in CTX for the new context. This means copying the DECL_VALUE_EXPR, and fixing up the type.

static void free_omp_region_1 ( )

static

Release the memory associated with the region tree rooted at REGION.

Referenced by dump_omp_region().

void free_omp_regions ( void )

Release the memory for the entire omp region tree.

static bool gate_diagnose_omp_blocks ( )

static

static bool gate_expand_omp ( )

static

OMP expansion – the default pass, run before creation of SSA form.

static vec<tree, va_gc>* get_ws_args_for ( )

static

Collect additional arguments needed to emit a combined parallel+workshare call. WS_STMT is the workshare directive being expanded.

Number of sections is equal to the number of edges from the GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to the exit of the sections region.

References omp_for_data::chunk_size, fold_convert_loc(), and long_integer_type_node.

static gimple gimple_build_cond_empty ( )

static

A convenience function to build an empty GIMPLE_COND with just the condition.

static void install_var_field ( )

static

Add a new field for VAR inside the structure CTX->SENDER_DECL.

Remember what variable this field was created for. This does have a side effect of making dwarf2out ignore this member, so for helpful debugging we clear it later in delete_omp_context.

static tree install_var_local ( )

static

static bool is_combined_parallel ( )

inlinestatic

Return true if REGION is a combined parallel+workshare region.

References OMP_CLAUSE_NOWAIT.

Referenced by expand_omp_for_init_vars(), and lower_send_clauses().

static bool is_parallel_ctx ( )

inlinestatic

Return true if CTX is for an omp parallel.

References omp_region::is_combined_parallel.

static bool is_reference ( )

inlinestatic

Return true if DECL is a reference type.

References omp_context::field_map, and NULL_TREE.

Referenced by lower_reduction_clauses().

static bool is_task_ctx ( )

inlinestatic

Return true if CTX is for an omp task.

References count, GF_OMP_FOR_KIND_SIMD, gimple_location(), gimple_omp_for_kind(), long_integer_type_node, and NULL_TREE.

Referenced by lower_reduction_clauses(), and new_omp_context().

static bool is_taskreg_ctx ( )

inlinestatic

Return true if CTX is for an omp parallel or omp task.

static bool is_variable_sized ( )

inlinestatic

Return true if EXPR is variable sized.

References omp_context::field_map, and omp_context::sfield_map.

Referenced by lower_reduction_clauses(), and omp_copy_decl_2().

static tree lookup_decl ( )

inlinestatic

Lookup variables in the decl or field splay trees. The "maybe" form allows for the variable form to not have been entered, otherwise we assert that the variable must have been entered.

References AGGREGATE_TYPE_P, and TREE_TYPE.

static tree lookup_decl_in_outer_ctx	(	tree	,
		omp_context *
	)

static

Referenced by lower_reduction_clauses().

static tree lookup_decl_in_outer_ctx ( )

static

Find the mapping for DECL in CTX or the immediately enclosing context that has a mapping for DECL.

If CTX is a nested parallel directive, we may have to use the decl mappings created in CTX's parent context. Suppose that we have the following parallel nesting (variable UIDs showed for clarity):

 iD.1562 = 0;
 #omp parallel shared(iD.1562)           -> outer parallel
   iD.1562 = iD.1562 + 1;

   #omp parallel shared (iD.1562)        -> inner parallel
      iD.1562 = iD.1562 - 1;

Each parallel structure will create a distinct .omp_data_s structure for copying iD.1562 in/out of the directive:

 outer parallel          .omp_data_s.1.i -> iD.1562
 inner parallel          .omp_data_s.2.i -> iD.1562

A shared variable mapping will produce a copy-out operation before the parallel directive and a copy-in operation after it. So, in this case we would have:

 iD.1562 = 0;
 .omp_data_o.1.i = iD.1562;
 #omp parallel shared(iD.1562)           -> outer parallel
   .omp_data_i.1 = &.omp_data_o.1
   .omp_data_i.1->i = .omp_data_i.1->i + 1;

   .omp_data_o.2.i = iD.1562;            -> **
   #omp parallel shared(iD.1562)         -> inner parallel
     .omp_data_i.2 = &.omp_data_o.2
     .omp_data_i.2->i = .omp_data_i.2->i - 1;

This is a problem.  The symbol iD.1562 cannot be referenced
inside the body of the outer parallel region.  But since we are
emitting this copy operation while expanding the inner parallel
directive, we need to access the CTX structure of the outer
parallel directive to get the correct mapping:

   .omp_data_o.2.i = .omp_data_i.1->i

Since there may be other workshare or parallel directives enclosing the parallel directive, it may be necessary to walk up the context parent chain. This is not a problem in general because nested parallelism happens only rarely.

References targetm.

static tree lookup_field ( )

inlinestatic

static tree lookup_sfield ( )

inlinestatic

static void lower_copyprivate_clauses	(	tree	clauses,
		gimple_seq *	slist,
		gimple_seq *	rlist,
		omp_context *	ctx
	)

static

Generate code to implement the COPYPRIVATE clauses.

static void lower_depend_clauses ( )

static

static void lower_lastprivate_clauses	(	tree	clauses,
		tree	predicate,
		gimple_seq *	stmt_list,
		omp_context *	ctx
	)

static

Generate code to implement the LASTPRIVATE clauses. This is used for both parallel and workshare constructs. PREDICATE may be NULL if it's always true.

 Early exit if there are no lastprivate or linear clauses.

     If this was a workshare clause, see if it had been combined
     with its parallel.  In that case, look for the clauses on the
     parallel statement itself.

         If this was a workshare clause, see if it had been combined
         with its parallel.  In that case, continue looking for the
         clauses also on the parallel statement itself.

References OMP_CLAUSE_REDUCTION_PLACEHOLDER.

static void lower_omp	(	gimple_seq *	,
		omp_context *
	)

static

Referenced by lower_omp_sections(), and maybe_add_implicit_barrier_cancel().

static void lower_omp ( )

static

Inside target region we haven't called fold_stmt during gimplification, because it can break code by adding decl references that weren't in the source. Call fold_stmt now.

static void lower_omp_1 ( )

static

 If we have issued syntax errors, avoid doing any heavy lifting.
 Just replace the OpenMP directives with a NOP to avoid
 confusing RTL expansion.

           FALLTHRU

     FALLTHRU

static void lower_omp_critical ( )

static

static void lower_omp_for ( )

static

Lower code for an OpenMP loop directive.

 Replace at gsi right away, so that 'stmt' is no member
 of a sequence anymore as we're going to add to to a different
 one below.

 Move declaration of temporaries in the loop body before we make
 it go away.

     We need two temporaries with fd.loop.v type (istart/iend)
     and then (fd.collapse - 1) temporaries with the same
     type for count2 ... countN-1 vars if not constant.

 The pre-body and input clauses go before the lowered GIMPLE_OMP_FOR.

 Lower the header expressions.  At this point, we can assume that
 the header is of the form:

    #pragma omp for (V = VAL1; V {<|>|<=|>=} VAL2; V = V [+-] VAL3)

 We just need to make sure that VAL1, VAL2 and VAL3 are lowered
 using the .omp_data_s mapping, if needed.

 Once lowered, extract the bounds and clauses.

 After the loop, add exit clauses.

 Region exit marker goes at the end of the loop body.

static void lower_omp_for_lastprivate	(	struct omp_for_data *	fd,
		gimple_seq *	body_p,
		gimple_seq *	dlist,
		struct omp_context *	ctx
	)

static

A subroutine of lower_omp_for. Generate code to emit the predicate for a lastprivate clause. Given a loop control predicate of (V cond N2), we gate the clause on (!(V cond N2)). The lowered form is appended to *DLIST, iterator initialization is appended to *BODY_P.

 When possible, use a strict equality expression.  This can let VRP
 type optimizations deduce the value and remove a copy.

     Optimize: v = 0; is usually cheaper than v = some_other_constant.

     Initialize the iterator variable, so that threads that don't execute
     any iterations don't execute the lastprivate clauses by accident.

static void lower_omp_master ( )

static

Expand code for an OpenMP master directive.

References get_formal_tmp_var(), gimple_omp_for_final_ptr(), gimple_omp_for_incr(), gimple_omp_for_initial_ptr(), is_gimple_min_invariant(), and TREE_OPERAND.

static void lower_omp_ordered ( )

static

Expand code for an OpenMP ordered directive.

static tree lower_omp_regimplify_p	(	tree *	tp,
		int *	walk_subtrees,
		void *	data
	)

static

Callback for lower_omp_1. Return non-NULL if *tp needs to be regimplified. If DATA is non-NULL, lower_omp_1 is outside of OpenMP context, but with task_shared_vars set.

Any variable with DECL_VALUE_EXPR needs to be regimplified.

 If a global variable has been privatized, TREE_CONSTANT on
 ADDR_EXPR might be wrong.

static void lower_omp_sections ( )

static

Lower the OpenMP sections directive in the current statement in GSI_P. CTX is the enclosing OMP context for the current statement.

References omp_context::block_vars, BLOCK_VARS, builtin_decl_explicit(), gimple_bind_add_seq(), gimple_bind_add_stmt(), gimple_bind_append_vars(), gimple_bind_vars(), gimple_build_bind(), gimple_build_call(), gimple_build_omp_return(), gimple_omp_body(), gimple_omp_body_ptr(), gimple_omp_set_body(), gsi_replace(), gsi_stmt(), lower_omp(), make_node, maybe_catch_exception(), NULL, pop_gimplify_context(), and push_gimplify_context().

static void lower_omp_single ( )

static

Expand code for an OpenMP single directive.

static void lower_omp_single_copy ( )

static

A subroutine of lower_omp_single. Expand the simple form of a GIMPLE_OMP_SINGLE, with a copyprivate clause:

 #pragma omp single copyprivate (a, b, c)

Create a new structure to hold copies of 'a', 'b' and 'c' and emit:

{ if ((copyout_p = GOMP_single_copy_start ()) == NULL) { BODY; copyout.a = a; copyout.b = b; copyout.c = c; GOMP_single_copy_end (&copyout); } else { a = copyout_p->a; b = copyout_p->b; c = copyout_p->c; } GOMP_barrier (); }

FIXME. It may be better to delay expanding the logic of this until pass_expand_omp. The expanded logic may make the job more difficult to a synchronization analysis pass.

static void lower_omp_single_simple ( )

static

A subroutine of lower_omp_single. Expand the simple form of a GIMPLE_OMP_SINGLE, without a copyprivate clause:

 if (GOMP_single_start ())
   BODY;
 [ GOMP_barrier (); ]    -> unless 'nowait' is present.

FIXME. It may be better to delay expanding the logic of this until pass_expand_omp. The expanded logic may make the job more difficult to a synchronization analysis pass.

static void lower_omp_target ( )

static

Lower the OpenMP target directive in the current statement in GSI_P. CTX holds context information for the directive.

Declare all the variables created by mapping and the variables
declared in the scope of the target body.

 Once all the expansions are done, sequence all the different
 fragments inside gimple_omp_body.   


     fixup_child_record_type might have changed receiver_decl's type.

References bitmap_bit_p, DECL_HAS_VALUE_EXPR_P, DECL_P, DECL_UID, NULL, NULL_TREE, recompute_tree_invariant_for_addr_expr(), TREE_CODE, and TYPE_P.

static void lower_omp_taskgroup ( )

static

Expand code for an OpenMP taskgroup directive.

static void lower_omp_taskreg ( )

static

Lower the OpenMP parallel or task directive in the current statement in GSI_P. CTX holds context information for the directive.

 Declare all the variables created by mapping and the variables
 declared in the scope of the parallel body.

 Once all the expansions are done, sequence all the different
 fragments inside gimple_omp_body.

     fixup_child_record_type might have changed receiver_decl's type.

static void lower_omp_teams ( )

static

Expand code for an OpenMP teams directive.

References TREE_CHAIN, and TREE_VALUE.

static void lower_rec_input_clauses	(	tree	clauses,
		gimple_seq *	ilist,
		gimple_seq *	dlist,
		omp_context *	ctx,
		struct omp_for_data *	fd
	)

static

Generate code to implement the input clauses, FIRSTPRIVATE and COPYIN, from the receiver (aka child) side and initializers for REFERENCE_TYPE private variables. Initialization statements go in ILIST, while calls to destructors go in DLIST.

 Set max_vf=1 (which will later enforce safelen=1) in simd loops
 with data sharing clauses referencing variable sized vars.  That
 is unnecessarily hard to support and very unlikely to result in
 vectorized code anyway.

 Do all the fixed sized types in the first pass, and the variable sized
 types in the second pass.  This makes sure that the scalar arguments to
 the variable sized types are processed before we use them in the
 variable sized operations.

             Ignore shared directives in teams construct.

             Handle _looptemp_ clauses only on parallel.

             For variable sized types, we need to allocate the
             actual storage here.  Call alloca and store the
             result in the pointer decl that we created elsewhere.

                 void *tmp = __builtin_alloca

             For references that are being privatized for Fortran,
             allocate new backing storage for the new pointer
             variable.  This allows us to avoid changing all the
             code that expects a pointer to something that expects
             a direct variable.

             Ignore shared directives in teams construct.

             Shared global vars are just accessed directly.

             Set up the DECL_VALUE_EXPR for shared variables now.  This
             needs to be delayed until after fixup_child_record_type so
             that we get the correct type during the dereference.

             ??? If VAR is not passed by reference, and the variable
             hasn't been initialized yet, then we'll get a warning for
             the store into the omp_data_s structure.  Ideally, we'd be
             able to notice this and not store anything at all, but
             we're generating code too early.  Suppress the warning.

             FALLTHRU

             FALLTHRU

                     reduction(-:var) sums up the partial results, so it
                     acts identically to reduction(+:var).

     Don't want uninit warnings on simduid, it is always uninitialized,
     but we use it not for the value, but for the DECL_UID only.

 The copyin sequence is not to be executed by the main thread, since
 that would result in self-copies.  Perhaps not visible to scalars,
 but it certainly is to C++ operator=.

 If any copyin variable is passed by reference, we must ensure the
 master thread doesn't modify it before it is copied over in all
 threads.  Similarly for variables in both firstprivate and
 lastprivate clauses we need to ensure the lastprivate copying
 happens after firstprivate copying in all threads.  And similarly
 for UDRs if initializer expression refers to omp_orig.

     Don't add any barrier for #pragma omp simd or
     #pragma omp distribute.

 If max_vf is non-zero, then we can use only a vectorization factor
 up to the max_vf we chose.  So stick it into the safelen clause.

static bool lower_rec_simd_input_clauses	(	tree	new_var,
		omp_context *	ctx,
		int &	max_vf,
		tree &	idx,
		tree &	lane,
		tree &	ivar,
		tree &	lvar
	)

static

Helper function of lower_rec_input_clauses, used for #pragma omp simd privatization.

static void lower_reduction_clauses ( )

static

Generate code to implement the REDUCTION clauses.

 SIMD reductions are handled in lower_rec_input_clauses.

 First see if there is exactly one reduction clause.  Use OMP_ATOMIC
 update in that case, otherwise use a lock.

           Never use OMP_ATOMIC for array reductions or UDRs.

     reduction(-:var) sums up the partial results, so it acts
     identically to reduction(+:var).

References build_fold_addr_expr_loc(), build_sender_ref(), DECL_ABSTRACT_ORIGIN, lang_hooks::decls, gcc_unreachable, gimplify_assign(), is_global_var(), is_reference(), is_task_ctx(), is_variable_sized(), lookup_decl_in_outer_ctx(), NULL, OMP_CLAUSE__LOOPTEMP_, OMP_CLAUSE_CHAIN, OMP_CLAUSE_CODE, OMP_CLAUSE_COPYIN, OMP_CLAUSE_DECL, OMP_CLAUSE_FIRSTPRIVATE, OMP_CLAUSE_LASTPRIVATE, OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE, OMP_CLAUSE_LOCATION, OMP_CLAUSE_PRIVATE, OMP_CLAUSE_PRIVATE_OUTER_REF, OMP_CLAUSE_REDUCTION, lang_hooks_for_decls::omp_private_outer_ref, TREE_OPERAND, and use_pointer_for_field().

static void lower_send_clauses	(	tree	clauses,
		gimple_seq *	ilist,
		gimple_seq *	olist,
		omp_context *	ctx
	)

static

Generate code to implement the clauses, FIRSTPRIVATE, COPYIN, LASTPRIVATE, and REDUCTION from the sender (aka parent) side.

References build_int_cst(), cfun, create_tmp_var, find_omp_clause(), fold_build2_loc, fold_convert_loc(), gcc_assert, gcc_unreachable, gimple_boolify(), gimple_in_ssa_p(), gimple_location(), gimple_omp_parallel_clauses(), walk_stmt_info::gsi, omp_region::inner, integer_zerop(), is_combined_parallel(), make_ssa_name(), NULL, NULL_TREE, OMP_CLAUSE_IF, OMP_CLAUSE_IF_EXPR, OMP_CLAUSE_LOCATION, OMP_CLAUSE_NUM_THREADS, OMP_CLAUSE_NUM_THREADS_EXPR, OMP_CLAUSE_PROC_BIND, OMP_CLAUSE_PROC_BIND_KIND, OMP_CLAUSE_SCHEDULE_AUTO, OMP_CLAUSE_SCHEDULE_RUNTIME, omp_region::sched_kind, TREE_TYPE, omp_region::type, and unsigned_type_node.

static void lower_send_shared_vars ( )

static

Generate code to implement SHARED from the sender (aka parent) side. This is trickier, since GIMPLE_OMP_PARALLEL_CLAUSES doesn't list things that got automatically shared.

If CTX is a nested parallel directive. Find the immediately enclosing parallel or workshare construct that contains a mapping for OVAR.

             We don't need to receive a new reference to a result
             or parm decl.  In fact we may not store to it as we will
             invalidate any pending RSO and generate wrong gimple
             during inlining.

bool make_gimple_omp_edges ( )

Called from tree-cfg.c make_edges to create cfg edges for all GIMPLE_OMP codes.

     In the case of a GIMPLE_OMP_SECTION, the edge will go
     somewhere other than the next block.  This will be
     created later.

         Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
         succs edges as abnormal to prevent splitting
         them.

         Make the loopback edge.

         Create an edge from GIMPLE_OMP_FOR to exit, which
         corresponds to the case that the body of the loop
         is not executed at all.

         Wire up the edges into and out of the nested sections.

           Make the loopback edge to the block with
           GIMPLE_OMP_SECTIONS_SWITCH.

           Make the edge from the switch to exit.

gimple_opt_pass* make_pass_diagnose_omp_blocks ( )

gimple_opt_pass* make_pass_expand_omp ( )

gimple_opt_pass* make_pass_lower_omp ( )

static void maybe_add_implicit_barrier_cancel ( )

static

Routines to lower OpenMP directives into OMP-GIMPLE. If ctx is a worksharing context inside of a cancellable parallel region and it isn't nowait, add lhs to its GIMPLE_OMP_RETURN and conditional branch to parallel's cancel_label to handle cancellation in the implicit barrier.

References omp_context::block_vars, BLOCK_VARS, builtin_decl_explicit(), gimple_bind_add_seq(), gimple_bind_add_stmt(), gimple_bind_append_vars(), gimple_build_bind(), gimple_build_call(), gimple_build_omp_return(), gimple_omp_body(), gimple_omp_body_ptr(), gimple_omp_set_body(), gsi_replace(), gsi_stmt(), lower_omp(), make_node, and NULL.

static gimple_seq maybe_catch_exception ( gimple_seq )

static

Referenced by lower_omp_sections().

static gimple_seq maybe_catch_exception ( )

static

If exceptions are enabled, wrap the statements in BODY in a MUST_NOT_THROW catch handler and return it. This prevents programs from violating the structured block semantics with throws.

References builtin_decl_explicit(), DECL_EXTERNAL, DECL_INITIAL, DECL_NAME, gimple_call_fndecl(), gsi_stmt(), is_gimple_call(), NULL, and TREE_PUBLIC.

static omp_context* maybe_lookup_ctx ( )

static

If a context was created for STMT when it was scanned, return it.

References NULL, real_arithmetic(), and real_inf().

static tree maybe_lookup_decl ( )

inlinestatic

References DECL_EXTERNAL, and TREE_STATIC.

Referenced by omp_max_vf().

static tree maybe_lookup_decl_in_outer_ctx	(	tree	,
		omp_context *
	)

static

Referenced by omp_copy_decl_2().

static tree maybe_lookup_decl_in_outer_ctx ( )

static

Similar to lookup_decl_in_outer_ctx, but return DECL if not found in outer contexts.

static tree maybe_lookup_field ( )

inlinestatic

References omp_context::outer.

static omp_context* new_omp_context ( )

static

Create a new context, with OUTER_CTX being the surrounding context.

References omp_context::cb, DECL_ABSTRACT_ORIGIN, DECL_CHAIN, copy_body_data::decl_map, omp_context::field_map, finalize_task_copyfn(), is_task_ctx(), NULL, pointer_map_destroy(), omp_context::record_type, omp_context::sfield_map, omp_context::srecord_type, omp_context::stmt, and TYPE_FIELDS.

Referenced by scan_omp_for().

static struct omp_region* new_omp_region	(	basic_block	bb,
		enum gimple_code	type,
		struct omp_region *	parent
	)

staticread

Create a new parallel region starting at STMT inside region PARENT.

This is a nested region. Add it to the list of inner regions in PARENT.

     This is a toplevel region.  Add it to the list of toplevel
     regions in ROOT_OMP_REGION.

References copy_body_data::block, omp_context::cb, omp_context::depth, and NULL.

static tree omp_build_component_ref ( )

static

Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it as appropriate.

static tree omp_clause_aligned_alignment ( )

static

Return alignment to be assumed for var in CLAUSE, which should be OMP_CLAUSE_ALIGNED.

Otherwise return implementation defined alignment.

References NULL.

static tree omp_copy_decl ( )

static

The callback for remap_decl. Search all containing contexts for a mapping of the variable; this avoids having to duplicate the splay tree ahead of time. We know a mapping doesn't already exist in the given context. Create new mappings to implement default semantics.

References dump_omp_region().

static tree omp_copy_decl_1 ( )

static

static tree omp_copy_decl_2 ( )

static

Construct a new automatic decl similar to VAR.

References build_outer_var_ref(), build_simple_mem_ref, DECL_VALUE_EXPR, is_global_var(), is_variable_sized(), maybe_lookup_decl_in_outer_ctx(), and TREE_OPERAND.

void omp_expand_local ( )

Expands omp construct (and its subconstructs) starting in HEAD.

static int omp_max_vf ( )

static

Return maximum possible vectorization factor for the target.

References gcc_assert, is_global_var(), maybe_lookup_decl(), NULL, OMP_CLAUSE__LOOPTEMP_, OMP_CLAUSE_COPYIN, OMP_CLAUSE_DECL, OMP_CLAUSE_FIRSTPRIVATE, OMP_CLAUSE_LINEAR, OMP_CLAUSE_PRIVATE, OMP_CLAUSE_PRIVATE_DEBUG, OMP_CLAUSE_REDUCTION, OMP_CLAUSE_REDUCTION_OMP_ORIG_REF, OMP_CLAUSE_SHARED, and omp_context::stmt.

tree omp_reduction_init ( )

Construct the initialization value for reduction CLAUSE.

References floor_log2().

static void optimize_omp_library_calls ( )

static

Optimize omp_get_thread_num () and omp_get_num_threads () calls. These can't be declared as const functions, but within one parallel body they are constant, so they can be transformed there into __builtin_omp_get_{thread_num,num_threads} () which are declared const. Similarly for task body, except that in untied task omp_get_thread_num () can change at any task scheduling point.

In #pragma omp task untied omp_get_thread_num () can change during the execution of the task region.

References gimple_assign_rhs1(), gimple_omp_taskreg_data_arg(), TREE_CODE, and TREE_OPERAND.

static void remove_exit_barrier ( )

static

Remove barriers in REGION->EXIT's block. Note that this is only valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be removed.

 If the parallel region doesn't return, we don't have REGION->EXIT
 block at all.

 The last insn in the block will be the parallel's GIMPLE_OMP_RETURN.  The
 workshare's GIMPLE_OMP_RETURN will be in a preceding block.  The kinds of
 statements that can appear in between are extremely limited &ndash; no
 memory operations at all.  Here, we allow nothing at all, so the
 only thing we allow to precede this GIMPLE_OMP_RETURN is a label.

         OpenMP 3.0 tasks unfortunately prevent this optimization
         in many cases.  If there could be tasks queued, the barrier
         might be needed to let the tasks run before some local
         variable of the parallel that the task uses as shared
         runs out of scope.  The task can be spawned either
         from within current function (this would be easy to check)
         or from some function it calls and gets passed an address
         of such a variable.

References DECL_HAS_VALUE_EXPR_P, DECL_P, NULL_TREE, recompute_tree_invariant_for_addr_expr(), TREE_CODE, and TYPE_P.

static void remove_exit_barriers ( )

static

References gcc_assert, gsi_end_p(), gsi_next(), gsi_start_bb(), gsi_stmt(), NULL, and single_succ().

static void scan_omp	(	gimple_seq *	,
		omp_context *
	)

static

Referenced by scan_omp_for().

static void scan_omp ( )

static

Scan all the statements starting at the current statement. CTX contains context information about the OpenMP directives and clauses found during the scan.

static tree scan_omp_1_op	(	tree *	,
		int *	,
		void *
	)

static

static tree scan_omp_1_op ( )

static

Helper function scan_omp.

Callback for walk_tree or operators in walk_gimple_stmt used to scan for OpenMP directives in TP.

static tree scan_omp_1_stmt	(	gimple_stmt_iterator *	gsi,
		bool *	handled_ops_p,
		struct walk_stmt_info *	wi
	)

static

Helper function for scan_omp.

Callback for walk_gimple_stmt used to scan for OpenMP directives in the current statement in GSI.

Check the OpenMP nesting restrictions.

static void scan_omp_for ( )

static

Scan an OpenMP loop directive.

References gimple_omp_body_ptr(), gimple_omp_teams_clauses(), new_omp_context(), scan_omp(), and scan_sharing_clauses().

static tree scan_omp_op ( )

inlinestatic

Convenience function for calling scan_omp_1_op on tree operands.

References omp_context::stmt.

static void scan_omp_parallel ( )

static

Scan an OpenMP parallel directive.

Ignore parallel directives with empty bodies, unless there are copyin clauses.

         We need two temporaries with fd.loop.v type (istart/iend)
         and then (fd.collapse - 1) temporaries with the same
         type for count2 ... countN-1 vars if not constant.

static void scan_omp_sections ( )

static

Scan an OpenMP sections directive.

References error_at(), GF_OMP_FOR_KIND_DISTRIBUTE, gimple_location(), and gimple_omp_for_kind().

static void scan_omp_single ( )

static

Scan an OpenMP single directive.

static void scan_omp_target ( )

static

Scan an OpenMP target{, data, update} directive.

References DECL_FUNCTION_CODE, error_at(), gimple_call_fndecl(), and gimple_location().

static void scan_omp_task ( )

static

Scan an OpenMP task directive.

Ignore task directives with empty bodies.

     Move VLA fields to the end.

static void scan_omp_teams ( )

static

Scan an OpenMP teams directive.

References omp_context::cancellable, find_omp_clause(), gimple_omp_sections_clauses(), OMP_CLAUSE_NOWAIT, and omp_context::stmt.

static void scan_sharing_clauses ( )

static

Instantiate decls as necessary in CTX to satisfy the data sharing specified by CLAUSES.

         Ignore shared directives in teams construct.

         Global variables don't need to be copied,
         the receiver side will use them directly.

         We don't need to copy const scalar vars back.

         Let the corresponding firstprivate clause create
         the variable.

         FALLTHRU

         Global variables with "omp declare target" attribute
         don't need to be copied, the receiver side will use them
         directly.

             Ignore OMP_CLAUSE_MAP_POINTER kind for arrays in
             #pragma omp target data, there is nothing to map for
             those.

         Let the corresponding firstprivate clause create
         the variable.

         FALLTHRU

         Ignore shared directives in teams construct.

Referenced by scan_omp_for().

static tree task_copyfn_copy_decl ( )

static

static tree task_copyfn_remap_type ( )

static

static bool use_pointer_for_field ( )

static

Return true if DECL should be copied by pointer. SHARED_CTX is the parallel context if DECL is to be shared.

 We can only use copy-in/copy-out semantics for shared variables
 when we know the value is not accessible from an outer scope.

     ??? Trivially accessible from anywhere.  But why would we even
     be passing an address in this case?  Should we simply assert
     this to be false, or should we have a cleanup pass that removes
     these from the list of mappings?

     For variables with DECL_HAS_VALUE_EXPR_P set, we cannot tell
     without analyzing the expression whether or not its location
     is accessible to anyone else.  In the case of nested parallel
     regions it certainly may be.

     Do not use copy-in/copy-out for variables that have their
     address taken.

     lower_send_shared_vars only uses copy-in, but not copy-out
     for these.

     Disallow copy-in/out in nested parallel if
     decl is shared in outer parallel, otherwise
     each thread could store the shared variable
     in its own copy-in location, making the
     variable no longer really shared.

     For tasks avoid using copy-in/out.  As tasks can be
     deferred or executed in different thread, when GOMP_task
     returns, the task hasn't necessarily terminated.

             Taking address of OUTER in lower_send_shared_vars
             might need regimplification of everything that uses the
             variable.

References gimple_omp_taskreg_clauses(), OMP_CLAUSE_CHAIN, OMP_CLAUSE_CODE, OMP_CLAUSE_DECL, OMP_CLAUSE_SHARED, and omp_context::stmt.

Referenced by lower_reduction_clauses().

static tree vec2chain ( )

static

Chain all the DECLs in LIST by their TREE_CHAIN fields.

static bool workshare_safe_to_combine_p ( )

static

Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB is the immediate dominator of PAR_ENTRY_BB, return true if there are no data dependencies that would prevent expanding the parallel directive at PAR_ENTRY_BB as a combined parallel+workshare region.

When expanding a combined parallel+workshare region, the call to the child function may need additional arguments in the case of GIMPLE_OMP_FOR regions. In some cases, these arguments are computed out of variables passed in from the parent to the child via 'struct .omp_data_s'. For instance:

 #pragma omp parallel for schedule (guided, i * 4)
 for (j ...)

Is lowered into:

 # BLOCK 2 (PAR_ENTRY_BB)
 .omp_data_o.i = i;
 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)

 # BLOCK 3 (WS_ENTRY_BB)
 .omp_data_i = &.omp_data_o;
 D.1667 = .omp_data_i->i;
 D.1598 = D.1667 * 4;
 #pragma omp for schedule (guided, D.1598)

When we outline the parallel region, the call to the child function 'bar.omp_fn.0' will need the value D.1598 in its argument list, but that value is computed after the call site. So, in principle we cannot do the transformation.

To see whether the code in WS_ENTRY_BB blocks the combined parallel+workshare call, we collect all the variables used in the GIMPLE_OMP_FOR header check whether they appear on the LHS of any statement in WS_ENTRY_BB. If so, then we cannot emit the combined call.

FIXME. If we had the SSA form built at this point, we could merely hoist the code in block 3 into block 2 and be done with it. But at this point we don't have dataflow information and though we could hack something up here, it is really not worth the aggravation.

FIXME. We give up too easily here. If any of these arguments are not constants, they will likely involve variables that have been mapped into fields of .omp_data_s for sharing with the child function. With appropriate data flow, it would be possible to see through this.

References omp_for_data::chunk_size, extract_omp_for_data(), find_omp_clause(), fold_convert_loc(), gcc_assert, gimple_location(), gimple_omp_for_combined_into_p(), gimple_omp_parallel_clauses(), long_integer_type_node, omp_for_data::loop, omp_for_data_loop::n1, omp_for_data_loop::n2, NULL, OMP_CLAUSE__LOOPTEMP_, OMP_CLAUSE_CHAIN, OMP_CLAUSE_DECL, omp_for_data_loop::step, and vec_alloc().

Variable Documentation

splay_tree all_contexts

static

Referenced by debug_all_omp_regions().

splay_tree all_labels

static

The following is a utility to diagnose OpenMP structured block violations. It is not part of the "omplower" pass, as that's invoked too late. It should be invoked by the respective front ends after gimplification.

splay_tree critical_name_mutexes

static

Gimplify a GIMPLE_OMP_CRITICAL statement. This is a relatively simple substitution of a couple of function calls. But in the NAMED case, requires that languages coordinate a symbol name. It is therefore best put here in common code.

struct omp_region* root_omp_region

static

int target_nesting_level

static

bitmap task_shared_vars

static

int taskreg_nesting_level

static

unsigned int tmp_ompfn_id_num

static

Create a new name for omp child function. Returns an identifier.

Data Structures

Macros

Typedefs

Functions

Variables

Macro Definition Documentation

Typedef Documentation

Function Documentation

Variable Documentation