GCC Middle and Back End API Reference
tree-parloops.c File Reference

Data Structures

struct  reduction_info
struct  reduction_hasher
struct  name_to_copy_elt
struct  name_to_copy_hasher
struct  lambda_trans_matrix_s
struct  elv_data
struct  clsn_data


typedef hash_table
< reduction_hasher
typedef hash_table
< name_to_copy_hasher
typedef struct


static struct reduction_inforeduction_phi ()
static lambda_trans_matrix lambda_trans_matrix_new (int colsize, int rowsize, struct obstack *lambda_obstack)
static void lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n, lambda_vector vec, lambda_vector dest)
static bool lambda_transform_legal_p (lambda_trans_matrix trans, int nb_loops, vec< ddr_p > dependence_relations)
static bool loop_parallel_p ()
static bool loop_has_blocks_with_irreducible_flag ()
static tree take_address_of (tree obj, tree type, edge entry, int_tree_htab_type decl_address, gimple_stmt_iterator *gsi)
int initialize_reductions ()
static tree eliminate_local_variables_1 ()
static void eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi, int_tree_htab_type decl_address)
static void eliminate_local_variables ()
static bool expr_invariant_in_region_p ()
static tree separate_decls_in_region_name (tree name, name_to_copy_table_type name_copies, int_tree_htab_type decl_copies, bool copy_name_p)
static void separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt, name_to_copy_table_type name_copies, int_tree_htab_type decl_copies)
static bool separate_decls_in_region_debug (gimple stmt, name_to_copy_table_type name_copies, int_tree_htab_type decl_copies)
int add_field_for_reduction ()
int add_field_for_name ()
int create_phi_for_local_result ()
int create_call_for_reduction_1 ()
static void create_call_for_reduction (struct loop *loop, reduction_info_table_type reduction_list, struct clsn_data *ld_st_data)
int create_loads_for_reductions ()
static void create_final_loads_for_reduction (reduction_info_table_type reduction_list, struct clsn_data *ld_st_data)
int create_stores_for_reduction ()
int create_loads_and_stores_for_name (name_to_copy_elt **slot, struct clsn_data *clsn_data)
static void separate_decls_in_region (edge entry, edge exit, reduction_info_table_type reduction_list, tree *arg_struct, tree *new_arg_struct, struct clsn_data *ld_st_data)
bool parallelized_function_p ()
static tree create_loop_fn ()
static void transform_to_exit_first_loop (struct loop *loop, reduction_info_table_type reduction_list, tree nit)
static basic_block create_parallel_loop (struct loop *loop, tree loop_fn, tree data, tree new_data, unsigned n_threads, location_t loc)
static void gen_parallel_loop (struct loop *loop, reduction_info_table_type reduction_list, unsigned n_threads, struct tree_niter_desc *niter)
static bool loop_has_vector_phi_nodes ()
static void build_new_reduction (reduction_info_table_type reduction_list, gimple reduc_stmt, gimple phi)
int set_reduc_phi_uids ()
static void gather_scalar_reductions ()
static bool try_get_loop_niter ()
static bool try_create_reduction_list (loop_p loop, reduction_info_table_type reduction_list)
bool parallelize_loops ()
static bool gate_tree_parallelize_loops ()
static unsigned tree_parallelize_loops ()
gimple_opt_passmake_pass_parallelize_loops ()


static bitmap parallelized_functions

Typedef Documentation

   A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
   matrix.  Rather than use floats, we simply keep a single DENOMINATOR that
   represents the denominator for every element in the matrix.  

Function Documentation

int add_field_for_name ( )
   Callback for htab_traverse.  Adds a field corresponding to a ssa name
   described in SLOT. The type is passed in DATA.  
int add_field_for_reduction ( )
   Callback for htab_traverse.  Adds a field corresponding to the reduction
   specified in SLOT. The type is passed in DATA.  
static void build_new_reduction ( reduction_info_table_type  reduction_list,
gimple  reduc_stmt,
gimple  phi 
   Create a reduction_info struct, initialize it with REDUC_STMT
   and PHI, insert it to the REDUCTION_LIST.  

References edge_def::dest, dump_file, dump_flags, gather_scalar_reductions(), gsi_end_p(), gsi_next(), gsi_start_phis(), gsi_stmt(), print_generic_expr(), print_gimple_stmt(), reduction_info::reduc_phi, single_dom_exit(), and virtual_operand_p().

static void create_call_for_reduction ( struct loop loop,
reduction_info_table_type  reduction_list,
struct clsn_data ld_st_data 
   Create the atomic operation at the join point of the threads.
   REDUCTION_LIST describes the reductions in the LOOP.
   LD_ST_DATA describes the shared data structure where
   shared data is stored in and loaded from.  
     Find the fallthru edge from GIMPLE_OMP_CONTINUE.  

References create_loads_for_reductions(), gsi_after_labels(), gsi_insert_before(), GSI_NEW_STMT, clsn_data::load, clsn_data::load_bb, clsn_data::store, and hash_table< Descriptor, Allocator >::traverse().

int create_call_for_reduction_1 ( )
   Callback for htab_traverse.  Create an atomic instruction for the
   reduction described in SLOT.
   DATA annotates the place in memory the atomic operation relates to,
   and the basic block it needs to be generated in.  
     Create phi node.  

References create_phi_for_local_result(), loop::latch, clsn_data::load_bb, and hash_table< Descriptor, Allocator >::traverse().

static void create_final_loads_for_reduction ( reduction_info_table_type  reduction_list,
struct clsn_data ld_st_data 
   Load the reduction result that was stored in LD_ST_DATA.
   REDUCTION_LIST describes the list of reductions that the
   loads should be generated for.  
int create_loads_and_stores_for_name ( name_to_copy_elt **  slot,
struct clsn_data clsn_data 
   Callback for htab_traverse.  Creates loads to a field of LOAD in LOAD_BB and
   store to a field of STORE in STORE_BB for the ssa name and its duplicate
   specified in SLOT.  

References hash_table< Descriptor, Allocator >::create(), edge_def::dest, gather_blocks_in_sese_region(), gsi_end_p(), gsi_next(), gsi_start_bb(), gsi_start_phis(), gsi_stmt(), is_gimple_debug(), separate_decls_in_region_stmt(), single_pred(), single_succ_edge(), split_edge(), and type().

int create_loads_for_reductions ( )
   Callback for htab_traverse.  Loads the final reduction value at the
   join point of all threads, and inserts it in the right place.  

Referenced by create_call_for_reduction().

static tree create_loop_fn ( )
static basic_block create_parallel_loop ( struct loop loop,
tree  loop_fn,
tree  data,
tree  new_data,
unsigned  n_threads,
location_t  loc 
   Create the parallel constructs for LOOP as described in gen_parallel_loop.
   LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
   NEW_DATA is the variable that should be initialized from the argument
   of LOOP_FN.  N_THREADS is the requested number of threads.  Returns the
   basic block containing GIMPLE_OMP_PARALLEL tree.  
     Prepare the GIMPLE_OMP_PARALLEL statement.  
     Initialize NEW_DATA.  
     Extract data for GIMPLE_OMP_FOR.  
     Prepare cfg.  
     Emit GIMPLE_OMP_FOR.  
     After the above dom info is hosed.  Re-compute it.  

References add_phi_arg(), gimple_phi_arg_location_from_edge(), gsi_stmt(), loop_latch_edge(), and loop_preheader_edge().

int create_phi_for_local_result ( )
   Callback for htab_traverse.  A local result is the intermediate result
   computed by a single
   thread, or the initial value in case no iteration was executed.
   This function creates a phi node reflecting these values.
   The phi's result will be stored in NEW_PHI field of the
   reduction's data structure.  
     STORE_BB is the block where the phi
     should be stored.  It is the destination of the loop exit.
     (Find the fallthru edge from GIMPLE_OMP_CONTINUE).  
     STORE_BB has two predecessors.  One coming from  the loop
     (the reduction's result is computed at the loop),
     and another coming from a block preceding the loop,
     when no iterations
     are executed (the initial value should be taken).  

References build_addr(), create_tmp_var(), current_function_decl, edge_def::dest, reduction_info::field, gimple_build_omp_atomic_load(), gsi_insert_after(), GSI_NEW_STMT, gsi_start_bb(), clsn_data::load, clsn_data::load_bb, make_ssa_name(), reduction_info::reduc_phi, and split_block().

Referenced by create_call_for_reduction_1().

int create_stores_for_reduction ( )
   Callback for htab_traverse.  Store the neutral value for the
  particular reduction's operation, e.g. 0 for PLUS_EXPR,
  1 for MULT_EXPR, etc. into the reduction field.
  The reduction is specified in SLOT. The store information is
  passed in DATA.  
static void eliminate_local_variables ( )
   Eliminates the references to local variables from the single entry
   single exit region between the ENTRY and EXIT edges.

   This includes:
   1) Taking address of a local variable -- these are moved out of the
   region (and temporary variable is created to hold the address if

   2) Dereferencing a local variable -- these are replaced with indirect

References CDI_DOMINATORS, dominated_by_p(), gimple_bb(), and is_gimple_min_invariant().

static tree eliminate_local_variables_1 ( )
   Eliminates references to local variables in *TP out of the single
   entry single exit region starting at DTA->ENTRY.
   DECL_ADDRESS contains addresses of the references that had their
   address taken already.  If the expression is changed, CHANGED is
   set to true.  Callback for walk_tree.  
         ADDR_EXPR may appear in two contexts:
         -- as a gimple operand, when the address taken is a function invariant
         -- as gimple rhs, when the resulting address in not a function
         We do not need to do anything special in the latter case (the base of
         the memory reference whose address is taken may be replaced in the
         DECL_P case).  The former case is more complicated, as we need to
         ensure that the new address is still a gimple operand.  Thus, it
         is not sufficient to replace just the base of the memory reference --
         we need to move the whole computation of the address out of the
static void eliminate_local_variables_stmt ( edge  entry,
gimple_stmt_iterator gsi,
int_tree_htab_type  decl_address 
   Moves the references to local variables in STMT at *GSI out of the single
   entry single exit region starting at ENTRY.  DECL_ADDRESS contains
   addresses of the references that had their address taken
static bool expr_invariant_in_region_p ( )
   Returns true if expression EXPR is not defined between ENTRY and
   EXIT, i.e. if all its operands are defined outside of the region.  
static bool gate_tree_parallelize_loops ( )
static void gather_scalar_reductions ( )
   Detect all reductions in the LOOP, insert them into REDUCTION_LIST.  
     As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
     and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
     only now.  

References flow_bb_inside_loop_p(), and gimple_debug_bind_p().

Referenced by build_new_reduction().

static void gen_parallel_loop ( struct loop loop,
reduction_info_table_type  reduction_list,
unsigned  n_threads,
struct tree_niter_desc niter 
   Generates code to execute the iterations of LOOP in N_THREADS
   threads in parallel.

   NITER describes number of iterations of LOOP.
   REDUCTION_LIST describes the reductions existent in the LOOP.  

         IV = phi (INIT, IV + STEP)
         if (COND)

     with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
     we generate the following code:


     if (MAY_BE_ZERO
     goto original;

     store all local loop-invariant variables used in body of the loop to DATA.
     load the variables from DATA.
     goto end;

         IV = phi (INIT, IV + STEP)
         if (COND)

     Create two versions of the loop -- in the old one, we know that the
     number of iterations is large enough, and we will transform it into the
     loop that will be split to loop_fn, the new one will be used for the
     remaining iterations.  
     We should compute a better number-of-iterations value for outer loops.
     That is, if we have
    for (i = 0; i < n; ++i)
      for (j = 0; j < m; ++j)

    we should compute nit = n * m, not nit = n.  
    Also may_be_zero handling would need to be adjusted.  
     We assume that the loop usually iterates a lot.  
     Base all the induction variables in LOOP on a single control one.  
     Ensure that the exit condition is the first statement in the loop.  
     Generate initializations for reductions.  
     Eliminate the references to local variables from the loop.  
     In the old loop, move all variables non-local to the loop to a structure
     and back, and create separate decls for the variables used in loop.  
     Create the parallel constructs.  
     Cancel the loop (it is simpler to do it here rather than to teach the
     expander to do it).  
     Free loop bound estimations that could contain references to
     removed statements.  
     Expand the parallel constructs.  We do it directly here instead of running
     a separate expand_omp pass, since it is more efficient, and less likely to
     cause troubles with further analyses not being able to deal with the
     OMP trees.  
int initialize_reductions ( )
   Callback for htab_traverse.  Create the initialization statement
   for reduction described in SLOT, and place it at the preheader of
   the loop described in DATA.  
     Create initialization in preheader:
     reduction_variable = initialization value of reduction.  
     In the phi node at the header, replace the argument coming
     from the preheader with the reduction initialization value.  
     Create a new variable to initialize the reduction.  
     Replace the argument representing the initialization value
     with the initialization value for the reduction (neutral
     element for the particular operation, e.g. 0 for PLUS_EXPR,
     1 for MULT_EXPR, etc).
     Keep the old value in a new variable "reduction_initial",
     that will be taken in consideration after the parallel
     computing is done.  
     Create new variable to hold the initial value.  
static void lambda_matrix_vector_mult ( lambda_matrix  matrix,
int  m,
int  n,
lambda_vector  vec,
lambda_vector  dest 
   Multiply a vector VEC by a matrix MAT.
   MAT is an M*N matrix, and VEC is a vector with length N.  The result
   is stored in DEST which must be a vector of length M.  
static lambda_trans_matrix lambda_trans_matrix_new ( int  colsize,
int  rowsize,
struct obstack lambda_obstack 
   Allocate a new transformation matrix.  
static bool lambda_transform_legal_p ( lambda_trans_matrix  trans,
int  nb_loops,
vec< ddr_p dependence_relations 
   Return true if TRANS is a legal transformation matrix that respects
   the dependence vectors in DISTS and DIRS.  The conservative answer
   is false.

   "Wolfe proves that a unimodular transformation represented by the
   matrix T is legal when applied to a loop nest with a set of
   lexicographically non-negative distance vectors RDG if and only if
   for each vector d in RDG, (T.d >= 0) is lexicographically positive.
   i.e.: if and only if it transforms the lexicographically positive
   distance vectors to lexicographically positive vectors.  Note that
   a unimodular matrix must transform the zero vector (and only it) to
   the zero vector." S.Muchnick.  
     When there are no dependences, the transformation is correct.  
     When there is an unknown relation in the dependence_relations, we
     know that it is no worth looking at this loop nest: give up.  
     For each distance vector in the dependence graph.  
         Don't care about relations for which we know that there is no
         dependence, nor about read-read (aka. output-dependences):
         these data accesses can happen in any order.  
         Conservatively answer: "this transformation is not valid".  
         If the dependence could not be captured by a distance vector,
         conservatively answer that the transform is not valid.  
         Compute trans.dist_vect 
static bool loop_has_blocks_with_irreducible_flag ( )
   Return true when LOOP contains basic blocks marked with the

References handled_component_p(), and unshare_expr().

static bool loop_has_vector_phi_nodes ( )
   Returns true when LOOP contains vector phi nodes.  

References dump_file, dump_flags, number_of_iterations_exit(), and single_dom_exit().

static bool loop_parallel_p ( )
   Data dependency analysis. Returns true if the iterations of LOOP
   are independent on each other (that is, if we can execute them
   in parallel).  
     Check for problems with dependences.  If the loop can be reversed,
     the iterations are independent.  

References dump_file, and dump_flags.

gimple_opt_pass* make_pass_parallelize_loops ( )
bool parallelize_loops ( void  )
   Detect parallel loops and generate parallel code using libgomp
   primitives.  Returns true if some loop was parallelized, false
     Do not parallelize loops in the functions created by parallelization.  
         If we use autopar in graphite pass, we use its marked dependency
      checking results.  
             FIXME: the check for vector phi nodes could be removed.  
         FIXME: Bypass this check as graphite doesn't update the
         count and frequency correctly now.  
                 Do not bother with loops in cold areas.  
     Parallelization will cause new function calls to be inserted through
     which local variables will escape.  Reset the points-to solution
     for ESCAPED.  
bool parallelized_function_p ( )
   Returns true if FN was created by create_loop_fn.  
static struct reduction_info* reduction_phi ( )
static void separate_decls_in_region ( edge  entry,
edge  exit,
reduction_info_table_type  reduction_list,
tree arg_struct,
tree new_arg_struct,
struct clsn_data ld_st_data 
   Moves all the variables used in LOOP and defined outside of it (including
   the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
   name) to a structure created for this purpose.  The code

   while (1)
       use (a);
       use (b);

   is transformed this way:

   old.a = a;
   old.b = b;

   a' = new->a;
   b' = new->b;
   while (1)
       use (a');
       use (b');

   `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT.  The
   pointer `new' is intentionally not initialized (the loop will be split to a
   separate function later, and `new' will be initialized from its arguments).
   LD_ST_DATA holds information about the shared data structure used to pass
   information among the threads.  It is initialized here, and
   gen_parallel_loop will pass it to create_call_for_reduction that
   needs this information.  REDUCTION_LIST describes the reductions
   in LOOP.  
     Now process debug bind stmts.  We must not create decls while
     processing debug stmts, so we defer their processing so as to
     make sure we will have debug info for as many variables as
     possible (all of those that were dealt with in the loop above),
     and discard those for which we know there's nothing we can
         It may happen that there is nothing to copy (if there are only
         loop carried and external variables in the loop).  
         Create the type for the structure to store the ssa names to.  
             Create the fields for reductions.  
         Create the loads and stores.  
         Load the calculation from memory (after the join of the threads).  
static bool separate_decls_in_region_debug ( gimple  stmt,
name_to_copy_table_type  name_copies,
int_tree_htab_type  decl_copies 
   Finds the ssa names used in STMT that are defined outside the
   region between ENTRY and EXIT and replaces such ssa names with
   their duplicates.  The duplicates are stored to NAME_COPIES.  Base
   decls of all ssa names used in STMT (including those defined in
   LOOP) are replaced with the new temporary variables; the
   replacement decls are stored in DECL_COPIES.  
static tree separate_decls_in_region_name ( tree  name,
name_to_copy_table_type  name_copies,
int_tree_htab_type  decl_copies,
bool  copy_name_p 
   If COPY_NAME_P is true, creates and returns a duplicate of NAME.
   The copies are stored to NAME_COPIES, if NAME was already duplicated,
   its duplicate stored in NAME_COPIES is returned.

   Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
   duplicated, storing the copies in DECL_COPIES.  
         Ensure that when we meet this decl next time, we won't duplicate
         it again.  

References create_tmp_var(), hash_table< Descriptor, Allocator >::find_slot_with_hash(), get_name(), int_tree_map::to, and int_tree_map::uid.

static void separate_decls_in_region_stmt ( edge  entry,
edge  exit,
gimple  stmt,
name_to_copy_table_type  name_copies,
int_tree_htab_type  decl_copies 
   Finds the ssa names used in STMT that are defined outside the
   region between ENTRY and EXIT and replaces such ssa names with
   their duplicates.  The duplicates are stored to NAME_COPIES.  Base
   decls of all ssa names used in STMT (including those defined in
   LOOP) are replaced with the new temporary variables; the
   replacement decls are stored in DECL_COPIES.  

References hash_table< Descriptor, Allocator >::find_slot_with_hash(), gimple_debug_bind_get_var(), gimple_debug_bind_p(), gimple_debug_bind_reset_value(), gimple_debug_bind_set_var(), gimple_debug_source_bind_get_var(), gimple_debug_source_bind_p(), gimple_debug_source_bind_set_var(), int_tree_map::uid, update_stmt(), and name_to_copy_elt::version.

Referenced by create_loads_and_stores_for_name().

int set_reduc_phi_uids ( )
   Callback for htab_traverse.  Sets gimple_uid of reduc_phi stmts.  
static tree take_address_of ( tree  obj,
tree  type,
edge  entry,
int_tree_htab_type  decl_address,
gimple_stmt_iterator gsi 
   Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
   The assignment statement is placed on edge ENTRY.  DECL_ADDRESS maps decls
   to their addresses that can be reused.  The address of OBJ is known to
   be invariant in the whole function.  Other needed statements are placed
   right before GSI.  
     Since the address of OBJ is invariant, the trees may be shared.
     Avoid rewriting unrelated parts of the code.  
     Canonicalize the access to base on a MEM_REF.  
     Assign a canonical SSA name to the address of the base decl used
     in the address and share it for all accesses and addresses based
     on it.  
     Express the address in terms of the canonical SSA name.  
static void transform_to_exit_first_loop ( struct loop loop,
reduction_info_table_type  reduction_list,
tree  nit 
   Moves the exit condition of LOOP to the beginning of its header, and
   duplicates the part of the last iteration that gets disabled to the
   exit of the loop.  NIT is the number of iterations of the loop
   (used to initialize the variables in the duplicated part).

   TODO: the common case is that latch of the loop is empty and immediately
   follows the loop exit.  In this case, it would be better not to copy the
   body of the loop, but only move the entry of the loop directly before the
   exit check and increase the number of iterations of the loop by one.
   This may need some additional preconditioning in case NIT = ~0.
   REDUCTION_LIST describes the reductions in LOOP.  
     Make sure that we have phi nodes on exit for all loop header phis
     (create_parallel_loop requires that).  
     Other than reductions, the only gimple reg that should be copied
     out of the loop is the control variable.  
         Check if it is a part of reduction.  If it is,
         keep the phi at the reduction's keep_res field.  The
         PHI_RESULT of this phi is the resulting value of the reduction
         variable when exiting the loop.  
     Initialize the control variable to number of iterations
     according to the rhs of the exit condition.  

References gsi_next(), reduction_info::keep_res, and reduction_phi().

static unsigned tree_parallelize_loops ( )
static bool try_create_reduction_list ( loop_p  loop,
reduction_info_table_type  reduction_list 
   Try to initialize REDUCTION_LIST for code generation part.
   REDUCTION_LIST describes the reductions.  
     The iterations of the loop may communicate only through bivs whose
     iteration space can be distributed efficiently.  
static bool try_get_loop_niter ( )
   Try to initialize NITER for code generation part.  
     We need to know # of iterations, and there should be no uses of values
     defined inside loop outside of it, unless the values are invariants of
     the loop.  

Variable Documentation

bitmap parallelized_functions
   Bitmap containing uids of functions created by parallelization.  We cannot
   allocate it from the default obstack, as it must live across compilation
   of several functions; we make it gc allocated instead.