GCC Middle and Back End API Reference
tree-loop-distribution.c File Reference
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tree.h"
#include "gimple.h"
#include "gimple-ssa.h"
#include "tree-cfg.h"
#include "tree-phinodes.h"
#include "ssa-iterators.h"
#include "tree-ssanames.h"
#include "tree-ssa-loop-manip.h"
#include "tree-ssa-loop.h"
#include "tree-into-ssa.h"
#include "tree-ssa.h"
#include "cfgloop.h"
#include "tree-chrec.h"
#include "tree-data-ref.h"
#include "tree-scalar-evolution.h"
#include "tree-pass.h"
#include "gimple-pretty-print.h"
#include "tree-vectorizer.h"
Include dependency graph for tree-loop-distribution.c:

Data Structures

struct  rdg_vertex
struct  rdg_edge
struct  partition_s

Macros

#define RDGV_STMT(V)   ((struct rdg_vertex *) ((V)->data))->stmt
#define RDGV_DATAREFS(V)   ((struct rdg_vertex *) ((V)->data))->datarefs
#define RDGV_HAS_MEM_WRITE(V)   ((struct rdg_vertex *) ((V)->data))->has_mem_write
#define RDGV_HAS_MEM_READS(V)   ((struct rdg_vertex *) ((V)->data))->has_mem_reads
#define RDG_STMT(RDG, I)   RDGV_STMT (&(RDG->vertices[I]))
#define RDG_DATAREFS(RDG, I)   RDGV_DATAREFS (&(RDG->vertices[I]))
#define RDG_MEM_WRITE_STMT(RDG, I)   RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
#define RDG_MEM_READS_STMT(RDG, I)   RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
#define RDGE_TYPE(E)   ((struct rdg_edge *) ((E)->data))->type
#define PGDATA(i)   ((pgdata *)(pg->vertices[i].data))

Typedefs

typedef struct rdg_vertexrdg_vertex_p
typedef struct rdg_edgerdg_edge_p
typedef struct partition_spartition_t

Enumerations

enum  rdg_dep_type { flow_dd = 'f', control_dd = 'c' }
enum  partition_kind { PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY }

Functions

static void dump_rdg_vertex ()
DEBUG_FUNCTION void debug_rdg_vertex ()
static void dump_rdg ()
DEBUG_FUNCTION void debug_rdg ()
static void dot_rdg_1 ()
DEBUG_FUNCTION void dot_rdg ()
static int rdg_vertex_for_stmt ()
static void create_rdg_edges_for_scalar ()
static void create_edge_for_control_dependence (struct graph *rdg, basic_block bb, int v, control_dependences *cd)
static void create_rdg_flow_edges ()
static void create_rdg_cd_edges ()
static bool create_rdg_vertices (struct graph *rdg, vec< gimple > stmts, loop_p loop, vec< data_reference_p > *datarefs)
static void stmts_from_loop ()
static void free_rdg ()
static struct graphbuild_rdg ()
static partition_t partition_alloc ()
static void partition_free ()
static bool partition_builtin_p ()
static bool partition_reduction_p ()
static void partition_merge_into ()
static bool ssa_name_has_uses_outside_loop_p ()
static bool stmt_has_scalar_dependences_outside_loop ()
static struct loopcopy_loop_before ()
static void create_bb_after_loop ()
static void generate_loops_for_partition (struct loop *loop, partition_t partition, bool copy_p)
static tree build_size_arg_loc ()
static tree build_addr_arg_loc ()
static int const_with_all_bytes_same ()
static void generate_memset_builtin ()
static void generate_memcpy_builtin ()
static void destroy_loop ()
static void generate_code_for_partition (struct loop *loop, partition_t partition, bool copy_p)
static partition_t build_rdg_partition_for_vertex ()
static void classify_partition ()
static tree ref_base_address ()
static bool similar_memory_accesses (struct graph *rdg, partition_t partition1, partition_t partition2)
static void rdg_build_partitions (struct graph *rdg, vec< gimple > starting_stmts, vec< partition_t > *partitions)
static void dump_rdg_partitions ()
void debug_rdg_partitions (vec< partition_t >)
DEBUG_FUNCTION void debug_rdg_partitions ()
static int number_of_rw_in_rdg ()
static int number_of_rw_in_partition ()
static bool partition_contains_all_rw (struct graph *rdg, vec< partition_t > partitions)
static int pg_add_dependence_edges (struct graph *rdg, vec< loop_p > loops, int dir, vec< data_reference_p > drs1, vec< data_reference_p > drs2)
static int pgcmp ()
static int distribute_loop (struct loop *loop, vec< gimple > stmts, control_dependences *cd, int *nb_calls)
static unsigned int tree_loop_distribution ()
static bool gate_tree_loop_distribution ()
gimple_opt_passmake_pass_loop_distribution ()

Macro Definition Documentation

#define PGDATA (   i)    ((pgdata *)(pg->vertices[i].data))
#define RDG_DATAREFS (   RDG,
  I 
)    RDGV_DATAREFS (&(RDG->vertices[I]))
#define RDG_MEM_READS_STMT (   RDG,
  I 
)    RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
#define RDG_MEM_WRITE_STMT (   RDG,
  I 
)    RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
#define RDG_STMT (   RDG,
  I 
)    RDGV_STMT (&(RDG->vertices[I]))
#define RDGE_TYPE (   E)    ((struct rdg_edge *) ((E)->data))->type
#define RDGV_DATAREFS (   V)    ((struct rdg_vertex *) ((V)->data))->datarefs
#define RDGV_HAS_MEM_READS (   V)    ((struct rdg_vertex *) ((V)->data))->has_mem_reads

Referenced by create_rdg_cd_edges().

#define RDGV_HAS_MEM_WRITE (   V)    ((struct rdg_vertex *) ((V)->data))->has_mem_write

Referenced by create_rdg_cd_edges().

#define RDGV_STMT (   V)    ((struct rdg_vertex *) ((V)->data))->stmt

Typedef Documentation

typedef struct partition_s * partition_t
typedef struct rdg_edge * rdg_edge_p

Dependence information attached to an edge of the RDG.

typedef struct rdg_vertex * rdg_vertex_p

Loop distribution. Copyright (C) 2006-2013 Free Software Foundation, Inc. Contributed by Georges-Andre Silber Georg.nosp@m.es-A.nosp@m.ndre..nosp@m.Silb.nosp@m.er@en.nosp@m.smp..nosp@m.fr and Sebastian Pop sebas.nosp@m.tian.nosp@m..pop@.nosp@m.amd..nosp@m.com.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see http://www.gnu.org/licenses/. This pass performs loop distribution: for example, the loop

|DO I = 2, N | A(I) = B(I) + C | D(I) = A(I-1)*E |ENDDO

is transformed to

|DOALL I = 2, N | A(I) = B(I) + C |ENDDO | |DOALL I = 2, N | D(I) = A(I-1)*E |ENDDO

This pass uses an RDG, Reduced Dependence Graph built on top of the data dependence relations. The RDG is then topologically sorted to obtain a map of information producers/consumers based on which it generates the new loops. A Reduced Dependence Graph (RDG) vertex representing a statement.


Enumeration Type Documentation

Enumerator:
PKIND_NORMAL 
PKIND_MEMSET 
PKIND_MEMCPY 

Data dependence type.

Enumerator:
flow_dd 

Read After Write (RAW).

control_dd 

Control dependence (execute conditional on).


Function Documentation

static tree build_addr_arg_loc ( )
static

Build an address argument for a memory operation call.

Test for a negative stride, iterating over every element.

static struct graph* build_rdg ( )
staticread

Build the Reduced Dependence Graph (RDG) with one vertex per statement of the loop nest LOOP_NEST, and one edge per data dependence or scalar dependence.

Create the RDG vertices from the stmts of the loop nest.

References PKIND_MEMCPY, PKIND_MEMSET, and PKIND_NORMAL.

static partition_t build_rdg_partition_for_vertex ( )
static

Returns a partition with all the statements needed for computing the vertex V of the RDG, also including the loop exit conditions.

References DR_IS_READ, gimple_assign_single_p(), gimple_vuse(), NULL, RDG_DATAREFS, and RDG_STMT.

static tree build_size_arg_loc ( )
static

Build the size argument for a memory operation call.

static void classify_partition ( )
static

Classifies the builtin kind we can generate for PARTITION of RDG and LOOP. For the moment we detect only the memset zero pattern.

If the stmt has uses outside of the loop mark it as reduction.   
 Perform general partition disqualification for builtins.   


 Detect memset and memcpy.   
     Any scalar stmts are ok.   
     Otherwise just regular loads/stores.   
     But exactly one store and/or load.   
     Direct aggregate copy or via an SSA name temporary.   
     Now check that if there is a dependence this dependence is
     of a suitable form for memmove.   

References NULL.

static int const_with_all_bytes_same ( )
static

If VAL memory representation contains the same value in all bytes, return that value, otherwise return -1. E.g. for 0x24242424 return 0x24, for IEEE double 747708026454360457216.0 return 0x44, etc.

static struct loop* copy_loop_before ( )
staticread

Return a copy of LOOP placed before LOOP.

References CP_SIMPLE_PREHEADERS, create_bb_after_loop(), create_preheader(), gcc_assert, and NULL.

static void create_bb_after_loop ( )
static

Creates an empty basic block after LOOP.

Referenced by copy_loop_before().

static void create_edge_for_control_dependence ( struct graph rdg,
basic_block  bb,
int  v,
control_dependences cd 
)
static

Creates an edge for the control dependences of BB to the vertex V.

References create_rdg_edges_for_scalar(), DEF_FROM_PTR, FOR_EACH_PHI_OR_STMT_DEF, graph::n_vertices, RDG_STMT, and SSA_OP_DEF.

static void create_rdg_cd_edges ( )
static

Creates the edges of the reduced dependence graph RDG.

References vertex::data, find_data_references_in_stmt(), gimple_set_uid(), RDGV_DATAREFS, RDGV_HAS_MEM_READS, RDGV_HAS_MEM_WRITE, and RDGV_STMT.

static void create_rdg_edges_for_scalar ( )
static
static void create_rdg_flow_edges ( )
static

Creates the edges of the reduced dependence graph RDG.

static bool create_rdg_vertices ( struct graph rdg,
vec< gimple stmts,
loop_p  loop,
vec< data_reference_p > *  datarefs 
)
static

Build the vertices of the reduced dependence graph RDG. Return false if that failed.

Record statement to vertex mapping.

DEBUG_FUNCTION void debug_rdg ( )

Call dump_rdg on stderr.

References pp_gimple_stmt_1(), RDGV_STMT, TDF_SLIM, and graph::vertices.

void debug_rdg_partitions ( vec< partition_t )

Debug PARTITIONS.

DEBUG_FUNCTION void debug_rdg_partitions ( )
DEBUG_FUNCTION void debug_rdg_vertex ( )

Call dump_rdg_vertex on stderr.

static void destroy_loop ( )
static

Remove and destroy the loop LOOP.

We have made sure to not leave any dangling uses of SSA names defined in the loop. With the exception of virtuals. Make sure we replace all uses of virtual defs that will remain outside of the loop with the bare symbol as delete_basic_block will release them.

static int distribute_loop ( struct loop loop,
vec< gimple stmts,
control_dependences cd,
int *  nb_calls 
)
static

Distributes the code from LOOP in such a way that producer statements are placed before consumer statements. Tries to separate only the statements from STMTS into separate loops. Returns the number of distributed loops.

 If we are only distributing patterns but did not detect any,
 simply bail out.   
 If we are only distributing patterns fuse all partitions that
 were not classified as builtins.  This also avoids chopping
 a loop into pieces, separated by builtin calls.  That is, we
 only want no or a single loop body remaining.   
 Due to limitations in the transform phase we have to fuse all
 reduction partitions into the last partition so the existing
 loop will contain all loop-closed PHI nodes.   
 Apply our simple cost model - fuse partitions with similar
 memory accesses.   
 Build the partition dependency graph.   
         FIXME - leaks.   
           dependence direction - 0 is no dependence, -1 is back,
           1 is forth, 2 is both (we can stop then, merging will occur).   
     Add edges to the reduction partition (if any) to force it last.   
     Compute partitions we cannot separate and fuse them.   
     Now order the remaining nodes in postorder.   

References dump_bitmap(), dump_file, dump_flags, partition_free(), partition_merge_into(), and partition_s::stmts.

DEBUG_FUNCTION void dot_rdg ( )

Display the Reduced Dependence Graph using dotty.

When debugging, you may want to enable the following code.

References gcc_checking_assert, gimple_uid(), and RDG_STMT.

static void dot_rdg_1 ( )
static
     Highlight reads from memory.   
     Highlight stores to memory.   
            These are the most common dependences: don't print these.  
static void dump_rdg ( )
static

Dump the reduced dependence graph RDG to FILE.

References buffer, pretty_printer::buffer, graph::n_vertices, pp_needs_newline, and output_buffer::stream.

static void dump_rdg_partitions ( )
static

Dump to FILE the PARTITIONS.

static void dump_rdg_vertex ( )
static

Dump vertex I in RDG to FILE.

static void free_rdg ( )
static

Free the reduced dependence graph RDG.

static bool gate_tree_loop_distribution ( )
static
static void generate_code_for_partition ( struct loop loop,
partition_t  partition,
bool  copy_p 
)
static

Generates code for PARTITION.

Reductions all have to be in the last partition.   

Common tail for partitions we turn into a call. If this was the last partition for which we generate code, we have to destroy the loop.

References EXECUTE_IF_SET_IN_BITMAP, gimple_has_volatile_ops(), partition_s::kind, partition_s::main_dr, partition_s::niter, NULL, NULL_TREE, PKIND_NORMAL, RDG_STMT, partition_s::reduction_p, partition_s::secondary_dr, stmt_has_scalar_dependences_outside_loop(), and partition_s::stmts.

static void generate_loops_for_partition ( struct loop loop,
partition_t  partition,
bool  copy_p 
)
static

Generate code for PARTITION from the code in LOOP. The loop is copied when COPY_P is true. All the statements not flagged in the PARTITION bitmap are removed from the loop or from its copy. The statements are indexed in sequence inside a basic block, and the basic blocks of a loop are taken in dom order.

Remove stmts not in the PARTITION bitmap.

             Choose an arbitrary path through the empty CFG part
             that this unnecessary control stmt controls.   
static void generate_memcpy_builtin ( )
static

Generate a call to memcpy for PARTITION in LOOP.

The new statements will be placed before LOOP.

static void generate_memset_builtin ( )
static

Generate a call to memset for PARTITION in LOOP.

 The new statements will be placed before LOOP.   
 This exactly matches the pattern recognition in classify_partition.   
 Handle constants like 0x15151515 and similarly
 floating point constants etc. where all bytes are the same.   
gimple_opt_pass* make_pass_loop_distribution ( )
static int number_of_rw_in_partition ( )
static

Returns the number of read and write operations in a PARTITION of the RDG.

static int number_of_rw_in_rdg ( )
static

Returns the number of read and write operations in the RDG.

static partition_t partition_alloc ( )
static

Allocate and initialize a partition from BITMAP.

static bool partition_builtin_p ( )
static

Returns true if the partition can be generated as a builtin.

static bool partition_contains_all_rw ( struct graph rdg,
vec< partition_t partitions 
)
static

Returns true when one of the PARTITIONS contains all the read or write operations of RDG.

References find_loop_nest(), and NULL.

static void partition_free ( )
static

Free PARTITION.

References FOR_EACH_IMM_USE_FAST.

Referenced by distribute_loop().

static void partition_merge_into ( )
static

Merge PARTITION into the partition DEST.

Referenced by distribute_loop().

static bool partition_reduction_p ( )
static

Returns true if the partition contains a reduction.

static int pg_add_dependence_edges ( struct graph rdg,
vec< loop_p loops,
int  dir,
vec< data_reference_p drs1,
vec< data_reference_p drs2 
)
static

Compute partition dependence created by the data references in DRS1 and DRS2 and modify and return DIR according to that.

 dependence direction - 0 is no dependence, -1 is back,
 1 is forth, 2 is both (we can stop then, merging will occur).   
       Re-shuffle data-refs to be in dominator order.   
           Known dependences can still be unordered througout the
           iteration space, see gcc.dg/tree-ssa/ldist-16.c.   
static int pgcmp ( )
static

Compare postorder number of the partition graph vertices V1 and V2.

static void rdg_build_partitions ( struct graph rdg,
vec< gimple starting_stmts,
vec< partition_t > *  partitions 
)
static

Aggregate several components into a useful partition that is registered in the PARTITIONS vector. Partitions will be distributed in different loops.

If the vertex is already contained in another partition so
is the partition rooted at it.   

All vertices should have been assigned to at least one partition now, other than vertices belonging to dead code.

static int rdg_vertex_for_stmt ( )
static

Returns the index of STMT in RDG.

Referenced by create_rdg_edges_for_scalar().

static tree ref_base_address ( )
static

For a data reference REF, return the declaration of its base address or NULL_TREE if the base is not determined.

static bool similar_memory_accesses ( struct graph rdg,
partition_t  partition1,
partition_t  partition2 
)
static

Returns true when PARTITION1 and PARTITION2 have similar memory accesses in RDG.

First check whether in the intersection of the two partitions are any loads or stores. Common loads are the situation that happens most often.

 Then check all data-references against each other.   
static bool ssa_name_has_uses_outside_loop_p ( )
static

Returns true when DEF is an SSA_NAME defined in LOOP and used after the LOOP.

References delete_update_ssa(), free_original_copy_tables(), gcc_assert, initialize_original_copy_tables(), loop_preheader_edge(), NULL, and slpeel_tree_duplicate_loop_to_edge_cfg().

static bool stmt_has_scalar_dependences_outside_loop ( )
static

Returns true when STMT defines a scalar variable used after the loop LOOP.

Referenced by generate_code_for_partition().

static void stmts_from_loop ( )
static

Initialize STMTS with all the statements of LOOP. The order in which we discover statements is important as generate_loops_for_partition is using the same traversal for identifying statements in loop copies.

References graph_edge::data, vertex::data, free_data_refs(), free_graph(), gimple_set_uid(), graph::n_vertices, RDGV_DATAREFS, RDGV_STMT, vertex::succ, graph_edge::succ_next, and graph::vertices.

static unsigned int tree_loop_distribution ( )
static

Distribute all loops in the current function.

 We can at the moment only distribute non-nested loops, thus restrict
 walking to innermost loops.   
     If the loop doesn't have a single exit we will fail anyway,
     so do that early.   
     Only optimize hot loops.   
     Initialize the worklist with stmts we seed the partitions with.   
             Distribute stmts which have defs that are used outside of
             the loop.   
             If there is a stmt with side-effects bail out - we
             cannot and should not distribute this loop.   
             Distribute stmts which have defs that are used outside of
             the loop.   
             Otherwise only distribute stores for now.