#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tree.h"
#include "gimple.h"
#include "gimple-ssa.h"
#include "tree-cfg.h"
#include "tree-phinodes.h"
#include "ssa-iterators.h"
#include "tree-ssanames.h"
#include "tree-ssa-loop-ivopts.h"
#include "tree-ssa-loop-manip.h"
#include "tree-ssa-loop-niter.h"
#include "tree-ssa-loop.h"
#include "tree-into-ssa.h"
#include "cfgloop.h"
#include "tree-data-ref.h"
#include "tree-scalar-evolution.h"
#include "gimple-pretty-print.h"
#include "tree-pass.h"
#include "langhooks.h"
#include "tree-vectorizer.h"
#include "tree-hasher.h"
#include "tree-parloops.h"
#include "omp-low.h"
#include "gt-tree-parloops.h"

Include dependency graph for tree-parloops.c:

Data Structures
struct	reduction_info
struct	reduction_hasher
struct	name_to_copy_elt
struct	name_to_copy_hasher
struct	lambda_trans_matrix_s
struct	elv_data
struct	clsn_data

Macros
#define	MIN_PER_THREAD 100
#define	LTM_MATRIX(T) ((T)->matrix)
#define	LTM_ROWSIZE(T) ((T)->rowsize)
#define	LTM_COLSIZE(T) ((T)->colsize)
#define	LTM_DENOMINATOR(T) ((T)->denominator)

Typedefs
typedef hash_table < reduction_hasher >	reduction_info_table_type
typedef hash_table < name_to_copy_hasher >	name_to_copy_table_type
typedef struct lambda_trans_matrix_s *	lambda_trans_matrix

Functions
static struct reduction_info *	reduction_phi ()
static lambda_trans_matrix	lambda_trans_matrix_new (int colsize, int rowsize, struct obstack *lambda_obstack)
static void	lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n, lambda_vector vec, lambda_vector dest)
static bool	lambda_transform_legal_p (lambda_trans_matrix trans, int nb_loops, vec< ddr_p > dependence_relations)
static bool	loop_parallel_p ()
static bool	loop_has_blocks_with_irreducible_flag ()
static tree	take_address_of (tree obj, tree type, edge entry, int_tree_htab_type decl_address, gimple_stmt_iterator *gsi)
int	initialize_reductions ()
static tree	eliminate_local_variables_1 ()
static void	eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi, int_tree_htab_type decl_address)
static void	eliminate_local_variables ()
static bool	expr_invariant_in_region_p ()
static tree	separate_decls_in_region_name (tree name, name_to_copy_table_type name_copies, int_tree_htab_type decl_copies, bool copy_name_p)
static void	separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt, name_to_copy_table_type name_copies, int_tree_htab_type decl_copies)
static bool	separate_decls_in_region_debug (gimple stmt, name_to_copy_table_type name_copies, int_tree_htab_type decl_copies)
int	add_field_for_reduction ()
int	add_field_for_name ()
int	create_phi_for_local_result ()
int	create_call_for_reduction_1 ()
static void	create_call_for_reduction (struct loop loop, reduction_info_table_type reduction_list, struct clsn_data ld_st_data)
int	create_loads_for_reductions ()
static void	create_final_loads_for_reduction (reduction_info_table_type reduction_list, struct clsn_data *ld_st_data)
int	create_stores_for_reduction ()
int	create_loads_and_stores_for_name (name_to_copy_elt *slot, struct clsn_data clsn_data)
static void	separate_decls_in_region (edge entry, edge exit, reduction_info_table_type reduction_list, tree arg_struct, tree new_arg_struct, struct clsn_data *ld_st_data)
bool	parallelized_function_p ()
static tree	create_loop_fn ()
static void	transform_to_exit_first_loop (struct loop *loop, reduction_info_table_type reduction_list, tree nit)
static basic_block	create_parallel_loop (struct loop *loop, tree loop_fn, tree data, tree new_data, unsigned n_threads, location_t loc)
static void	gen_parallel_loop (struct loop loop, reduction_info_table_type reduction_list, unsigned n_threads, struct tree_niter_desc niter)
static bool	loop_has_vector_phi_nodes ()
static void	build_new_reduction (reduction_info_table_type reduction_list, gimple reduc_stmt, gimple phi)
int	set_reduc_phi_uids ()
static void	gather_scalar_reductions ()
static bool	try_get_loop_niter ()
static bool	try_create_reduction_list (loop_p loop, reduction_info_table_type reduction_list)
bool	parallelize_loops ()
static bool	gate_tree_parallelize_loops ()
static unsigned	tree_parallelize_loops ()
gimple_opt_pass *	make_pass_parallelize_loops ()

Variables
static bitmap	parallelized_functions

Macro Definition Documentation

#define LTM_COLSIZE ( T ) ((T)->colsize)

Referenced by lambda_matrix_vector_mult().

#define LTM_DENOMINATOR ( T ) ((T)->denominator)

#define LTM_MATRIX ( T ) ((T)->matrix)

#define LTM_ROWSIZE ( T ) ((T)->rowsize)

Referenced by lambda_matrix_vector_mult().

#define MIN_PER_THREAD 100

Loop autoparallelization. Copyright (C) 2006-2013 Free Software Foundation, Inc. Contributed by Sebastian Pop pop@c.nosp@m.ri.e.nosp@m.nsmp..nosp@m.fr Zdenek Dvorak dvora.nosp@m.kz@s.nosp@m.use.c.nosp@m.z and Razya Ladelsky razya.nosp@m.@il..nosp@m.ibm.c.nosp@m.om.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see http://www.gnu.org/licenses/. This pass tries to distribute iterations of loops into several threads. The implementation is straightforward – for each loop we test whether its iterations are independent, and if it is the case (and some additional conditions regarding profitability and correctness are satisfied), we add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion machinery do its job.

The most of the complexity is in bringing the code into shape expected by the omp expanders: – for GIMPLE_OMP_FOR, ensuring that the loop has only one induction variable and that the exit test is at the start of the loop body – for GIMPLE_OMP_PARALLEL, replacing the references to local addressable variables by accesses through pointers, and breaking up ssa chains by storing the values incoming to the parallelized loop to a structure passed to the new function as an argument (something similar is done in omp gimplification, unfortunately only a small part of the code can be shared).

TODO: – if there are several parallelizable loops in a function, it may be possible to generate the threads just once (using synchronization to ensure that cross-loop dependences are obeyed). – handling of common reduction patterns for outer loops.

More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC Minimal number of iterations of a loop that should be executed in each thread.

Typedef Documentation

typedef struct lambda_trans_matrix_s * lambda_trans_matrix

A transformation matrix, which is a self-contained ROWSIZE x COLSIZE matrix. Rather than use floats, we simply keep a single DENOMINATOR that represents the denominator for every element in the matrix.

typedef hash_table<name_to_copy_hasher> name_to_copy_table_type

typedef hash_table<reduction_hasher> reduction_info_table_type

Function Documentation

int add_field_for_name ( )

Callback for htab_traverse. Adds a field corresponding to a ssa name described in SLOT. The type is passed in DATA.

int add_field_for_reduction ( )

Callback for htab_traverse. Adds a field corresponding to the reduction specified in SLOT. The type is passed in DATA.

static void build_new_reduction	(	reduction_info_table_type	reduction_list,
		gimple	reduc_stmt,
		gimple	phi
	)

static

Create a reduction_info struct, initialize it with REDUC_STMT and PHI, insert it to the REDUCTION_LIST.

References edge_def::dest, dump_file, dump_flags, gather_scalar_reductions(), gcc_assert, gsi_end_p(), gsi_next(), gsi_start_phis(), gsi_stmt(), PHI_ARG_DEF_FROM_EDGE, print_generic_expr(), print_gimple_stmt(), reduction_info::reduc_phi, single_dom_exit(), and virtual_operand_p().

static void create_call_for_reduction	(	struct loop *	loop,
		reduction_info_table_type	reduction_list,
		struct clsn_data *	ld_st_data
	)

static

Create the atomic operation at the join point of the threads. REDUCTION_LIST describes the reductions in the LOOP. LD_ST_DATA describes the shared data structure where shared data is stored in and loaded from.

Find the fallthru edge from GIMPLE_OMP_CONTINUE.

References build_fold_addr_expr, create_loads_for_reductions(), gimple_build_assign, gsi_after_labels(), gsi_insert_before(), GSI_NEW_STMT, clsn_data::load, clsn_data::load_bb, SSA_NAME_DEF_STMT, clsn_data::store, and hash_table< Descriptor, Allocator >::traverse().

int create_call_for_reduction_1 ( )

Callback for htab_traverse. Create an atomic instruction for the reduction described in SLOT. DATA annotates the place in memory the atomic operation relates to, and the basic block it needs to be generated in.

Create phi node.

References create_phi_for_local_result(), FALLTHRU_EDGE, loop::latch, clsn_data::load_bb, and hash_table< Descriptor, Allocator >::traverse().

static void create_final_loads_for_reduction	(	reduction_info_table_type	reduction_list,
		struct clsn_data *	ld_st_data
	)

static

Load the reduction result that was stored in LD_ST_DATA. REDUCTION_LIST describes the list of reductions that the loads should be generated for.

int create_loads_and_stores_for_name	(	name_to_copy_elt **	slot,
		struct clsn_data *	clsn_data
	)

Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and store to a field of STORE in STORE_BB for the ssa name and its duplicate specified in SLOT.

References hash_table< Descriptor, Allocator >::create(), edge_def::dest, FOR_EACH_VEC_ELT, gather_blocks_in_sese_region(), gsi_end_p(), gsi_next(), gsi_start_bb(), gsi_start_phis(), gsi_stmt(), is_gimple_debug(), separate_decls_in_region_stmt(), single_pred(), single_succ_edge(), split_edge(), and type().

int create_loads_for_reductions ( )

Callback for htab_traverse. Loads the final reduction value at the join point of all threads, and inserts it in the right place.

Referenced by create_call_for_reduction().

static tree create_loop_fn ( )

static

Creates and returns an empty function that will receive the body of a parallelized loop.

The call to allocate_struct_function clobbers CFUN, so we need to restore it.

References add_phi_arg(), copy_ssa_name(), create_phi_node(), gcc_assert, get_loop_body_in_dom_order(), gimple_cond_lhs(), gimple_cond_rhs(), gimple_cond_set_lhs(), gimple_duplicate_sese_tail(), gsi_end_p(), gsi_next(), gsi_start_phis(), gsi_stmt(), loop::header, last_stmt(), NULL_TREE, PHI_RESULT, SET_PHI_RESULT, single_dom_exit(), single_succ(), single_succ_edge(), split_block_after_labels(), edge_def::src, UNKNOWN_LOCATION, update_stmt(), and virtual_operand_p().

static basic_block create_parallel_loop	(	struct loop *	loop,
		tree	loop_fn,
		tree	data,
		tree	new_data,
		unsigned	n_threads,
		location_t	loc
	)

static

Create the parallel constructs for LOOP as described in gen_parallel_loop. LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL. NEW_DATA is the variable that should be initialized from the argument of LOOP_FN. N_THREADS is the requested number of threads. Returns the basic block containing GIMPLE_OMP_PARALLEL tree.

 Prepare the GIMPLE_OMP_PARALLEL statement.

 Initialize NEW_DATA.

 Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL.

 Extract data for GIMPLE_OMP_FOR.

 Prepare cfg.

 Emit GIMPLE_OMP_FOR.

 Emit GIMPLE_OMP_CONTINUE.

 Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR.

 After the above dom info is hosed.  Re-compute it.

References add_phi_arg(), gimple_phi_arg_location_from_edge(), gsi_stmt(), loop_latch_edge(), loop_preheader_edge(), PHI_ARG_DEF_FROM_EDGE, and SSA_NAME_DEF_STMT.

int create_phi_for_local_result ( )

Callback for htab_traverse. A local result is the intermediate result computed by a single thread, or the initial value in case no iteration was executed. This function creates a phi node reflecting these values. The phi's result will be stored in NEW_PHI field of the reduction's data structure.

STORE_BB is the block where the phi should be stored. It is the destination of the loop exit. (Find the fallthru edge from GIMPLE_OMP_CONTINUE).

 STORE_BB has two predecessors.  One coming from  the loop
 (the reduction's result is computed at the loop),
 and another coming from a block preceding the loop,
 when no iterations
 are executed (the initial value should be taken).

References build3, build_addr(), build_simple_mem_ref, create_tmp_var, edge_def::dest, reduction_info::field, gimple_build_omp_atomic_load(), gsi_insert_after(), GSI_NEW_STMT, gsi_start_bb(), clsn_data::load, clsn_data::load_bb, make_ssa_name(), NULL, NULL_TREE, PHI_RESULT, reduction_info::reduc_phi, split_block(), SSA_NAME_DEF_STMT, and TREE_TYPE.

Referenced by create_call_for_reduction_1().

int create_stores_for_reduction ( )

Callback for htab_traverse. Store the neutral value for the particular reduction's operation, e.g. 0 for PLUS_EXPR, 1 for MULT_EXPR, etc. into the reduction field. The reduction is specified in SLOT. The store information is passed in DATA.

static void eliminate_local_variables ( )

static

Eliminates the references to local variables from the single entry single exit region between the ENTRY and EXIT edges.

This includes: 1) Taking address of a local variable – these are moved out of the region (and temporary variable is created to hold the address if necessary).

2) Dereferencing a local variable – these are replaced with indirect references.

References CDI_DOMINATORS, dominated_by_p(), gimple_bb(), is_gimple_min_invariant(), SSA_NAME_DEF_STMT, and TREE_CODE.

static tree eliminate_local_variables_1 ( )

static

Eliminates references to local variables in *TP out of the single entry single exit region starting at DTA->ENTRY. DECL_ADDRESS contains addresses of the references that had their address taken already. If the expression is changed, CHANGED is set to true. Callback for walk_tree.

ADDR_EXPR may appear in two contexts: – as a gimple operand, when the address taken is a function invariant – as gimple rhs, when the resulting address in not a function invariant We do not need to do anything special in the latter case (the base of the memory reference whose address is taken may be replaced in the DECL_P case). The former case is more complicated, as we need to ensure that the new address is still a gimple operand. Thus, it is not sufficient to replace just the base of the memory reference – we need to move the whole computation of the address out of the loop.

static void eliminate_local_variables_stmt	(	edge	entry,
		gimple_stmt_iterator *	gsi,
		int_tree_htab_type	decl_address
	)

static

Moves the references to local variables in STMT at *GSI out of the single entry single exit region starting at ENTRY. DECL_ADDRESS contains addresses of the references that had their address taken already.

static bool expr_invariant_in_region_p ( )

static

Returns true if expression EXPR is not defined between ENTRY and EXIT, i.e. if all its operands are defined outside of the region.

static bool gate_tree_parallelize_loops ( )

static

Parallelization.

static void gather_scalar_reductions ( )

static

Detect all reductions in the LOOP, insert them into REDUCTION_LIST.

As gimple_uid is used by the vectorizer in between vect_analyze_loop_form and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts only now.

References flow_bb_inside_loop_p(), gimple_debug_bind_p(), and USE_STMT.

Referenced by build_new_reduction().

static void gen_parallel_loop	(	struct loop *	loop,
		reduction_info_table_type	reduction_list,
		unsigned	n_threads,
		struct tree_niter_desc *	niter
	)

static

Generates code to execute the iterations of LOOP in N_THREADS threads in parallel.

NITER describes number of iterations of LOOP. REDUCTION_LIST describes the reductions existent in the LOOP.

 From

loop { IV = phi (INIT, IV + STEP) BODY1; if (COND) break; BODY2;

}

 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
 we generate the following code:

 if (MAY_BE_ZERO
 || NITER < MIN_PER_THREAD * N_THREADS)
 goto original;

 BODY1;
 store all local loop-invariant variables used in body of the loop to DATA.
 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
 load the variables from DATA.
 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
 BODY2;
 BODY1;
 GIMPLE_OMP_CONTINUE;
 GIMPLE_OMP_RETURN         &ndash; GIMPLE_OMP_FOR
 GIMPLE_OMP_RETURN         &ndash; GIMPLE_OMP_PARALLEL
 goto end;

 original:
 loop
   {
     IV = phi (INIT, IV + STEP)
     BODY1;
     if (COND)
       break;
     BODY2;
   }

 end:




 Create two versions of the loop &ndash; in the old one, we know that the
 number of iterations is large enough, and we will transform it into the
 loop that will be split to loop_fn, the new one will be used for the
 remaining iterations.

 We should compute a better number-of-iterations value for outer loops.
 That is, if we have

for (i = 0; i < n; ++i)
  for (j = 0; j < m; ++j)
    ...

we should compute nit = n * m, not nit = n.  
Also may_be_zero handling would need to be adjusted.

 We assume that the loop usually iterates a lot.

 Base all the induction variables in LOOP on a single control one.

 Ensure that the exit condition is the first statement in the loop.

 Generate initializations for reductions.

 Eliminate the references to local variables from the loop.

 In the old loop, move all variables non-local to the loop to a structure
 and back, and create separate decls for the variables used in loop.

 Create the parallel constructs.

 Cancel the loop (it is simpler to do it here rather than to teach the
 expander to do it).

 Free loop bound estimations that could contain references to
 removed statements.

 Expand the parallel constructs.  We do it directly here instead of running
 a separate expand_omp pass, since it is more efficient, and less likely to
 cause troubles with further analyses not being able to deal with the
 OMP trees.

int initialize_reductions ( )

Callback for htab_traverse. Create the initialization statement for reduction described in SLOT, and place it at the preheader of the loop described in DATA.

 Create initialization in preheader:
 reduction_variable = initialization value of reduction.

 In the phi node at the header, replace the argument coming
 from the preheader with the reduction initialization value.

 Create a new variable to initialize the reduction.

 Replace the argument representing the initialization value
 with the initialization value for the reduction (neutral
 element for the particular operation, e.g. 0 for PLUS_EXPR,
 1 for MULT_EXPR, etc).
 Keep the old value in a new variable "reduction_initial",
 that will be taken in consideration after the parallel
 computing is done.

 Create new variable to hold the initial value.

static void lambda_matrix_vector_mult	(	lambda_matrix	matrix,
		int	m,
		int	n,
		lambda_vector	vec,
		lambda_vector	dest
	)

static

Multiply a vector VEC by a matrix MAT. MAT is an M*N matrix, and VEC is a vector with length N. The result is stored in DEST which must be a vector of length M.

References gcc_assert, LTM_COLSIZE, and LTM_ROWSIZE.

static lambda_trans_matrix lambda_trans_matrix_new	(	int	colsize,
		int	rowsize,
		struct obstack *	lambda_obstack
	)

static

Allocate a new transformation matrix.

static bool lambda_transform_legal_p	(	lambda_trans_matrix	trans,
		int	nb_loops,
		vec< ddr_p >	dependence_relations
	)

static

Return true if TRANS is a legal transformation matrix that respects the dependence vectors in DISTS and DIRS. The conservative answer is false.

"Wolfe proves that a unimodular transformation represented by the matrix T is legal when applied to a loop nest with a set of lexicographically non-negative distance vectors RDG if and only if for each vector d in RDG, (T.d >= 0) is lexicographically positive. i.e.: if and only if it transforms the lexicographically positive distance vectors to lexicographically positive vectors. Note that a unimodular matrix must transform the zero vector (and only it) to the zero vector." S.Muchnick.

 When there are no dependences, the transformation is correct.

 When there is an unknown relation in the dependence_relations, we
 know that it is no worth looking at this loop nest: give up.

 For each distance vector in the dependence graph.

     Don't care about relations for which we know that there is no
     dependence, nor about read-read (aka. output-dependences):
     these data accesses can happen in any order.

     Conservatively answer: "this transformation is not valid".

     If the dependence could not be captured by a distance vector,
     conservatively answer that the transform is not valid.

     Compute trans.dist_vect

static bool loop_has_blocks_with_irreducible_flag ( )

inlinestatic

Return true when LOOP contains basic blocks marked with the BB_IRREDUCIBLE_LOOP flag.

References build_fold_addr_expr, build_simple_mem_ref, DECL_P, handled_component_p(), TREE_OPERAND, and unshare_expr().

static bool loop_has_vector_phi_nodes ( )

static

Returns true when LOOP contains vector phi nodes.

References dump_file, dump_flags, gcc_assert, number_of_iterations_exit(), and single_dom_exit().

static bool loop_parallel_p ( )

static

Data dependency analysis. Returns true if the iterations of LOOP are independent on each other (that is, if we can execute them in parallel).

Check for problems with dependences. If the loop can be reversed, the iterations are independent.

References dump_file, and dump_flags.

gimple_opt_pass* make_pass_parallelize_loops ( )

bool parallelize_loops ( void )

Detect parallel loops and generate parallel code using libgomp primitives. Returns true if some loop was parallelized, false otherwise.

 Do not parallelize loops in the functions created by parallelization.

     If we use autopar in graphite pass, we use its marked dependency
  checking results.

         FIXME: the check for vector phi nodes could be removed.

     FIXME: Bypass this check as graphite doesn't update the
     count and frequency correctly now.

             Do not bother with loops in cold areas.

 Parallelization will cause new function calls to be inserted through
 which local variables will escape.  Reset the points-to solution
 for ESCAPED.

bool parallelized_function_p ( )

Returns true if FN was created by create_loop_fn.

static struct reduction_info* reduction_phi ( )

staticread

Referenced by get_initial_def_for_reduction(), and transform_to_exit_first_loop().

static void separate_decls_in_region	(	edge	entry,
		edge	exit,
		reduction_info_table_type	reduction_list,
		tree *	arg_struct,
		tree *	new_arg_struct,
		struct clsn_data *	ld_st_data
	)

static

Moves all the variables used in LOOP and defined outside of it (including the initial values of loop phi nodes, and *PER_THREAD if it is a ssa name) to a structure created for this purpose. The code

while (1) { use (a); use (b); }

is transformed this way:

bb0: old.a = a; old.b = b;

bb1: a' = new->a; b' = new->b; while (1) { use (a'); use (b'); }

`old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The pointer `new' is intentionally not initialized (the loop will be split to a separate function later, and `new' will be initialized from its arguments). LD_ST_DATA holds information about the shared data structure used to pass information among the threads. It is initialized here, and gen_parallel_loop will pass it to create_call_for_reduction that needs this information. REDUCTION_LIST describes the reductions in LOOP.

 Now process debug bind stmts.  We must not create decls while
 processing debug stmts, so we defer their processing so as to
 make sure we will have debug info for as many variables as
 possible (all of those that were dealt with in the loop above),
 and discard those for which we know there's nothing we can
 do.

     It may happen that there is nothing to copy (if there are only
     loop carried and external variables in the loop).

     Create the type for the structure to store the ssa names to.

         Create the fields for reductions.

     Create the loads and stores.

     Load the calculation from memory (after the join of the threads).

static bool separate_decls_in_region_debug	(	gimple	stmt,
		name_to_copy_table_type	name_copies,
		int_tree_htab_type	decl_copies
	)

static

Finds the ssa names used in STMT that are defined outside the region between ENTRY and EXIT and replaces such ssa names with their duplicates. The duplicates are stored to NAME_COPIES. Base decls of all ssa names used in STMT (including those defined in LOOP) are replaced with the new temporary variables; the replacement decls are stored in DECL_COPIES.

static tree separate_decls_in_region_name	(	tree	name,
		name_to_copy_table_type	name_copies,
		int_tree_htab_type	decl_copies,
		bool	copy_name_p
	)

static

If COPY_NAME_P is true, creates and returns a duplicate of NAME. The copies are stored to NAME_COPIES, if NAME was already duplicated, its duplicate stored in NAME_COPIES is returned.

Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also duplicated, storing the copies in DECL_COPIES.

Ensure that when we meet this decl next time, we won't duplicate it again.

References create_tmp_var, DECL_GIMPLE_REG_P, DECL_UID, hash_table< Descriptor, Allocator >::find_slot_with_hash(), gcc_assert, get_name(), int_tree_map::to, TREE_TYPE, and int_tree_map::uid.

static void separate_decls_in_region_stmt	(	edge	entry,
		edge	exit,
		gimple	stmt,
		name_to_copy_table_type	name_copies,
		int_tree_htab_type	decl_copies
	)

static

Finds the ssa names used in STMT that are defined outside the region between ENTRY and EXIT and replaces such ssa names with their duplicates. The duplicates are stored to NAME_COPIES. Base decls of all ssa names used in STMT (including those defined in LOOP) are replaced with the new temporary variables; the replacement decls are stored in DECL_COPIES.

References DECL_P, DECL_UID, hash_table< Descriptor, Allocator >::find_slot_with_hash(), FOR_EACH_PHI_OR_STMT_USE, gcc_assert, gimple_debug_bind_get_var(), gimple_debug_bind_p(), gimple_debug_bind_reset_value(), gimple_debug_bind_set_var(), gimple_debug_source_bind_get_var(), gimple_debug_source_bind_p(), gimple_debug_source_bind_set_var(), SSA_NAME_VERSION, SSA_OP_USE, SSA_VAR_P, TREE_CODE, int_tree_map::uid, update_stmt(), USE_FROM_PTR, and name_to_copy_elt::version.

Referenced by create_loads_and_stores_for_name().

int set_reduc_phi_uids ( )

Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts.

static tree take_address_of	(	tree	obj,
		tree	type,
		edge	entry,
		int_tree_htab_type	decl_address,
		gimple_stmt_iterator *	gsi
	)

static

Assigns the address of OBJ in TYPE to an ssa name, and returns this name. The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls to their addresses that can be reused. The address of OBJ is known to be invariant in the whole function. Other needed statements are placed right before GSI.

 Since the address of OBJ is invariant, the trees may be shared.
 Avoid rewriting unrelated parts of the code.

 Canonicalize the access to base on a MEM_REF.

 Assign a canonical SSA name to the address of the base decl used
 in the address and share it for all accesses and addresses based
 on it.

 Express the address in terms of the canonical SSA name.

static void transform_to_exit_first_loop	(	struct loop *	loop,
		reduction_info_table_type	reduction_list,
		tree	nit
	)

static

Moves the exit condition of LOOP to the beginning of its header, and duplicates the part of the last iteration that gets disabled to the exit of the loop. NIT is the number of iterations of the loop (used to initialize the variables in the duplicated part).

TODO: the common case is that latch of the loop is empty and immediately follows the loop exit. In this case, it would be better not to copy the body of the loop, but only move the entry of the loop directly before the exit check and increase the number of iterations of the loop by one. This may need some additional preconditioning in case NIT = ~0. REDUCTION_LIST describes the reductions in LOOP.

 Make sure that we have phi nodes on exit for all loop header phis
 (create_parallel_loop requires that).

 Other than reductions, the only gimple reg that should be copied
 out of the loop is the control variable.

     Check if it is a part of reduction.  If it is,
     keep the phi at the reduction's keep_res field.  The
     PHI_RESULT of this phi is the resulting value of the reduction
     variable when exiting the loop.

 Initialize the control variable to number of iterations
 according to the rhs of the exit condition.

References gsi_next(), reduction_info::keep_res, PHI_ARG_DEF_FROM_EDGE, reduction_phi(), and SSA_NAME_DEF_STMT.

static unsigned tree_parallelize_loops ( )

static

static bool try_create_reduction_list	(	loop_p	loop,
		reduction_info_table_type	reduction_list
	)

static

Try to initialize REDUCTION_LIST for code generation part. REDUCTION_LIST describes the reductions.

The iterations of the loop may communicate only through bivs whose iteration space can be distributed efficiently.

static bool try_get_loop_niter ( )

static

Try to initialize NITER for code generation part.

We need to know # of iterations, and there should be no uses of values defined inside loop outside of it, unless the values are invariants of the loop.

Variable Documentation

bitmap parallelized_functions

static

Bitmap containing uids of functions created by parallelization. We cannot allocate it from the default obstack, as it must live across compilation of several functions; we make it gc allocated instead.

Data Structures

Macros

Typedefs

Functions

Variables

Macro Definition Documentation

Typedef Documentation

Function Documentation

}

Variable Documentation