GCC Middle and Back End API Reference
tree-loop-distribution.c File Reference

Data Structures

struct  rdg_vertex
struct  rdg_edge
struct  partition_s

Typedefs

typedef struct rdg_vertexrdg_vertex_p
typedef struct rdg_edgerdg_edge_p
typedef struct partition_spartition_t

Enumerations

enum  rdg_dep_type { flow_dd = 'f', control_dd = 'c' }
enum  partition_kind { PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY }

Functions

static void dump_rdg_vertex ()
DEBUG_FUNCTION void debug_rdg_vertex ()
static void dump_rdg ()
DEBUG_FUNCTION void debug_rdg ()
static void dot_rdg_1 ()
DEBUG_FUNCTION void dot_rdg ()
static int rdg_vertex_for_stmt ()
static void create_rdg_edges_for_scalar ()
static void create_edge_for_control_dependence (struct graph *rdg, basic_block bb, int v, control_dependences *cd)
static void create_rdg_flow_edges ()
static void create_rdg_cd_edges ()
static bool create_rdg_vertices (struct graph *rdg, vec< gimple > stmts, loop_p loop, vec< data_reference_p > *datarefs)
static void stmts_from_loop ()
static void free_rdg ()
static struct graphbuild_rdg ()
static partition_t partition_alloc ()
static void partition_free ()
static bool partition_builtin_p ()
static bool partition_reduction_p ()
static void partition_merge_into ()
static bool ssa_name_has_uses_outside_loop_p ()
static bool stmt_has_scalar_dependences_outside_loop ()
static struct loopcopy_loop_before ()
static void create_bb_after_loop ()
static void generate_loops_for_partition (struct loop *loop, partition_t partition, bool copy_p)
static tree build_size_arg_loc ()
static tree build_addr_arg_loc ()
static int const_with_all_bytes_same ()
static void generate_memset_builtin ()
static void generate_memcpy_builtin ()
static void destroy_loop ()
static void generate_code_for_partition (struct loop *loop, partition_t partition, bool copy_p)
static partition_t build_rdg_partition_for_vertex ()
static void classify_partition ()
static tree ref_base_address ()
static bool similar_memory_accesses (struct graph *rdg, partition_t partition1, partition_t partition2)
static void rdg_build_partitions (struct graph *rdg, vec< gimple > starting_stmts, vec< partition_t > *partitions)
static void dump_rdg_partitions ()
void debug_rdg_partitions (vec< partition_t >)
DEBUG_FUNCTION void debug_rdg_partitions ()
static int number_of_rw_in_rdg ()
static int number_of_rw_in_partition ()
static bool partition_contains_all_rw (struct graph *rdg, vec< partition_t > partitions)
static int pg_add_dependence_edges (struct graph *rdg, vec< loop_p > loops, int dir, vec< data_reference_p > drs1, vec< data_reference_p > drs2)
static int pgcmp ()
static int distribute_loop (struct loop *loop, vec< gimple > stmts, control_dependences *cd, int *nb_calls)
static unsigned int tree_loop_distribution ()
static bool gate_tree_loop_distribution ()
gimple_opt_passmake_pass_loop_distribution ()

Typedef Documentation

typedef struct partition_s * partition_t
typedef struct rdg_edge * rdg_edge_p
   Dependence information attached to an edge of the RDG.  
typedef struct rdg_vertex * rdg_vertex_p
@verbatim 

Loop distribution. Copyright (C) 2006-2013 Free Software Foundation, Inc. Contributed by Georges-Andre Silber Georg.nosp@m.es-A.nosp@m.ndre..nosp@m.Silb.nosp@m.er@en.nosp@m.smp..nosp@m.fr and Sebastian Pop sebas.nosp@m.tian.nosp@m..pop@.nosp@m.amd..nosp@m.com.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see http://www.gnu.org/licenses/.

   This pass performs loop distribution: for example, the loop

   |DO I = 2, N
   |    A(I) = B(I) + C
   |    D(I) = A(I-1)*E
   |ENDDO

   is transformed to

   |DOALL I = 2, N
   |   A(I) = B(I) + C
   |ENDDO
   |
   |DOALL I = 2, N
   |   D(I) = A(I-1)*E
   |ENDDO

   This pass uses an RDG, Reduced Dependence Graph built on top of the
   data dependence relations.  The RDG is then topologically sorted to
   obtain a map of information producers/consumers based on which it
   generates the new loops.  
   A Reduced Dependence Graph (RDG) vertex representing a statement.  

Enumeration Type Documentation

Enumerator:
PKIND_NORMAL 
PKIND_MEMSET 
PKIND_MEMCPY 
   Data dependence type.  
Enumerator:
flow_dd 
     Read After Write (RAW).  
control_dd 
     Control dependence (execute conditional on).  

Function Documentation

static tree build_addr_arg_loc ( )
static
   Build an address argument for a memory operation call.  
     Test for a negative stride, iterating over every element.  
static struct graph* build_rdg ( )
staticread
   Build the Reduced Dependence Graph (RDG) with one vertex per
   statement of the loop nest LOOP_NEST, and one edge per data dependence or
   scalar dependence.  
     Create the RDG vertices from the stmts of the loop nest.  

References PKIND_MEMCPY, PKIND_MEMSET, and PKIND_NORMAL.

static partition_t build_rdg_partition_for_vertex ( )
static
   Returns a partition with all the statements needed for computing
   the vertex V of the RDG, also including the loop exit conditions.  

References DR_IS_READ, gimple_assign_single_p(), and gimple_vuse().

static tree build_size_arg_loc ( )
static
   Build the size argument for a memory operation call.  
static void classify_partition ( )
static
   Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
   For the moment we detect only the memset zero pattern.  
         If the stmt has uses outside of the loop mark it as reduction.  
     Perform general partition disqualification for builtins.  
     Detect memset and memcpy.  
         Any scalar stmts are ok.  
         Otherwise just regular loads/stores.  
         But exactly one store and/or load.  
         Direct aggregate copy or via an SSA name temporary.  
         Now check that if there is a dependence this dependence is
         of a suitable form for memmove.  
static int const_with_all_bytes_same ( )
static
   If VAL memory representation contains the same value in all bytes,
   return that value, otherwise return -1.
   E.g. for 0x24242424 return 0x24, for IEEE double
   747708026454360457216.0 return 0x44, etc.  
static struct loop* copy_loop_before ( )
staticread
   Return a copy of LOOP placed before LOOP.  

References CP_SIMPLE_PREHEADERS, create_bb_after_loop(), and create_preheader().

static void create_bb_after_loop ( )
static
   Creates an empty basic block after LOOP.  

Referenced by copy_loop_before().

static void create_edge_for_control_dependence ( struct graph rdg,
basic_block  bb,
int  v,
control_dependences cd 
)
static
   Creates an edge for the control dependences of BB to the vertex V.  

References create_rdg_edges_for_scalar(), and graph::n_vertices.

static void create_rdg_cd_edges ( )
static
   Creates the edges of the reduced dependence graph RDG.  

References vertex::data, find_data_references_in_stmt(), and gimple_set_uid().

static void create_rdg_edges_for_scalar ( )
static
static void create_rdg_flow_edges ( )
static
   Creates the edges of the reduced dependence graph RDG.  
static bool create_rdg_vertices ( struct graph rdg,
vec< gimple stmts,
loop_p  loop,
vec< data_reference_p > *  datarefs 
)
static
   Build the vertices of the reduced dependence graph RDG.  Return false
   if that failed.  
         Record statement to vertex mapping.  
DEBUG_FUNCTION void debug_rdg ( )
   Call dump_rdg on stderr.  

References pp_gimple_stmt_1(), and graph::vertices.

void debug_rdg_partitions ( vec< partition_t )
   Debug PARTITIONS.  
DEBUG_FUNCTION void debug_rdg_partitions ( )
DEBUG_FUNCTION void debug_rdg_vertex ( )
   Call dump_rdg_vertex on stderr.  
static void destroy_loop ( )
static
   Remove and destroy the loop LOOP.  
         We have made sure to not leave any dangling uses of SSA
         names defined in the loop.  With the exception of virtuals.
         Make sure we replace all uses of virtual defs that will remain
         outside of the loop with the bare symbol as delete_basic_block
         will release them.  
static int distribute_loop ( struct loop loop,
vec< gimple stmts,
control_dependences cd,
int *  nb_calls 
)
static
   Distributes the code from LOOP in such a way that producer
   statements are placed before consumer statements.  Tries to separate
   only the statements from STMTS into separate loops.
   Returns the number of distributed loops.  
     If we are only distributing patterns but did not detect any,
     simply bail out.  
     If we are only distributing patterns fuse all partitions that
     were not classified as builtins.  This also avoids chopping
     a loop into pieces, separated by builtin calls.  That is, we
     only want no or a single loop body remaining.  
     Due to limitations in the transform phase we have to fuse all
     reduction partitions into the last partition so the existing
     loop will contain all loop-closed PHI nodes.  
     Apply our simple cost model - fuse partitions with similar
     memory accesses.  
     Build the partition dependency graph.  
             FIXME - leaks.  
               dependence direction - 0 is no dependence, -1 is back,
               1 is forth, 2 is both (we can stop then, merging will occur).  
         Add edges to the reduction partition (if any) to force it last.  
         Compute partitions we cannot separate and fuse them.  
         Now order the remaining nodes in postorder.  

References dump_bitmap(), dump_file, dump_flags, partition_free(), partition_merge_into(), and partition_s::stmts.

DEBUG_FUNCTION void dot_rdg ( )
   Display the Reduced Dependence Graph using dotty.  
     When debugging, you may want to enable the following code.  

References gimple_uid().

static void dot_rdg_1 ( )
static
         Highlight reads from memory.  
         Highlight stores to memory.  
                These are the most common dependences: don't print these. 
static void dump_rdg ( )
static
   Dump the reduced dependence graph RDG to FILE.  

References buffer, pretty_printer::buffer, graph::n_vertices, and output_buffer::stream.

static void dump_rdg_partitions ( )
static
   Dump to FILE the PARTITIONS.  
static void dump_rdg_vertex ( )
static
   Dump vertex I in RDG to FILE.  
static void free_rdg ( )
static
   Free the reduced dependence graph RDG.  
static bool gate_tree_loop_distribution ( )
static
static void generate_code_for_partition ( struct loop loop,
partition_t  partition,
bool  copy_p 
)
static
   Generates code for PARTITION.  
         Reductions all have to be in the last partition.  
     Common tail for partitions we turn into a call.  If this was the last
     partition for which we generate code, we have to destroy the loop.  

References gimple_has_volatile_ops(), partition_s::kind, partition_s::main_dr, partition_s::niter, PKIND_NORMAL, partition_s::reduction_p, partition_s::secondary_dr, stmt_has_scalar_dependences_outside_loop(), and partition_s::stmts.

static void generate_loops_for_partition ( struct loop loop,
partition_t  partition,
bool  copy_p 
)
static
   Generate code for PARTITION from the code in LOOP.  The loop is
   copied when COPY_P is true.  All the statements not flagged in the
   PARTITION bitmap are removed from the loop or from its copy.  The
   statements are indexed in sequence inside a basic block, and the
   basic blocks of a loop are taken in dom order.  
     Remove stmts not in the PARTITION bitmap.  
                 Choose an arbitrary path through the empty CFG part
                 that this unnecessary control stmt controls.  
static void generate_memcpy_builtin ( )
static
   Generate a call to memcpy for PARTITION in LOOP.  
     The new statements will be placed before LOOP.  
static void generate_memset_builtin ( )
static
   Generate a call to memset for PARTITION in LOOP.  
     The new statements will be placed before LOOP.  
     This exactly matches the pattern recognition in classify_partition.  
     Handle constants like 0x15151515 and similarly
     floating point constants etc. where all bytes are the same.  
gimple_opt_pass* make_pass_loop_distribution ( )
static int number_of_rw_in_partition ( )
static
   Returns the number of read and write operations in a PARTITION of
   the RDG.  
static int number_of_rw_in_rdg ( )
static
   Returns the number of read and write operations in the RDG.  
static partition_t partition_alloc ( )
static
   Allocate and initialize a partition from BITMAP.  
static bool partition_builtin_p ( )
static
   Returns true if the partition can be generated as a builtin.  
static bool partition_contains_all_rw ( struct graph rdg,
vec< partition_t partitions 
)
static
   Returns true when one of the PARTITIONS contains all the read or
   write operations of RDG.  

References find_loop_nest().

static void partition_free ( )
static
   Free PARTITION.  

Referenced by distribute_loop().

static void partition_merge_into ( )
static
   Merge PARTITION into the partition DEST.  

Referenced by distribute_loop().

static bool partition_reduction_p ( )
static
   Returns true if the partition contains a reduction.  
static int pg_add_dependence_edges ( struct graph rdg,
vec< loop_p loops,
int  dir,
vec< data_reference_p drs1,
vec< data_reference_p drs2 
)
static
   Compute partition dependence created by the data references in DRS1
   and DRS2 and modify and return DIR according to that.  
     dependence direction - 0 is no dependence, -1 is back,
     1 is forth, 2 is both (we can stop then, merging will occur).  
           Re-shuffle data-refs to be in dominator order.  
               Known dependences can still be unordered througout the
               iteration space, see gcc.dg/tree-ssa/ldist-16.c.  
static int pgcmp ( )
static
   Compare postorder number of the partition graph vertices V1 and V2.  
static void rdg_build_partitions ( struct graph rdg,
vec< gimple starting_stmts,
vec< partition_t > *  partitions 
)
static
   Aggregate several components into a useful partition that is
   registered in the PARTITIONS vector.  Partitions will be
   distributed in different loops.  
         If the vertex is already contained in another partition so
         is the partition rooted at it.  
     All vertices should have been assigned to at least one partition now,
     other than vertices belonging to dead code.  
static int rdg_vertex_for_stmt ( )
static
   Returns the index of STMT in RDG.  

Referenced by create_rdg_edges_for_scalar().

static tree ref_base_address ( )
static
   For a data reference REF, return the declaration of its base
   address or NULL_TREE if the base is not determined.  
static bool similar_memory_accesses ( struct graph rdg,
partition_t  partition1,
partition_t  partition2 
)
static
   Returns true when PARTITION1 and PARTITION2 have similar memory
   accesses in RDG.  
     First check whether in the intersection of the two partitions are
     any loads or stores.  Common loads are the situation that happens
     most often.  
     Then check all data-references against each other.  
static bool ssa_name_has_uses_outside_loop_p ( )
static
   Returns true when DEF is an SSA_NAME defined in LOOP and used after
   the LOOP.  

References delete_update_ssa(), free_original_copy_tables(), initialize_original_copy_tables(), loop_preheader_edge(), and slpeel_tree_duplicate_loop_to_edge_cfg().

static bool stmt_has_scalar_dependences_outside_loop ( )
static
   Returns true when STMT defines a scalar variable used after the
   loop LOOP.  

Referenced by generate_code_for_partition().

static void stmts_from_loop ( )
static
   Initialize STMTS with all the statements of LOOP.  The order in
   which we discover statements is important as
   generate_loops_for_partition is using the same traversal for
   identifying statements in loop copies.  

References graph_edge::data, vertex::data, free(), free_data_refs(), free_graph(), gimple_set_uid(), graph::n_vertices, vertex::succ, graph_edge::succ_next, and graph::vertices.

static unsigned int tree_loop_distribution ( )
static
   Distribute all loops in the current function.  
     We can at the moment only distribute non-nested loops, thus restrict
     walking to innermost loops.  
         If the loop doesn't have a single exit we will fail anyway,
         so do that early.  
         Only optimize hot loops.  
         Initialize the worklist with stmts we seed the partitions with.  
                 Distribute stmts which have defs that are used outside of
                 the loop.  
                 If there is a stmt with side-effects bail out - we
                 cannot and should not distribute this loop.  
                 Distribute stmts which have defs that are used outside of
                 the loop.  
                 Otherwise only distribute stores for now.