Functions |
static tree | candidate () |
static bool | no_accesses_p () |
static void | dump_access () |
static void | dump_access_tree_1 () |
static void | dump_access_tree () |
static bool | access_has_children_p () |
static bool | access_has_replacements_p () |
static vec< access_p > * | get_base_access_vector () |
static struct access * | find_access_in_subtree (struct access *access, HOST_WIDE_INT offset, HOST_WIDE_INT size) |
static struct access * | get_first_repr_for_decl () |
static struct access * | get_var_base_offset_size_access (tree base, HOST_WIDE_INT offset, HOST_WIDE_INT size) |
static void | add_link_to_rhs () |
static void | relink_to_new_repr () |
static void | add_access_to_work_queue () |
static struct access * | pop_access_from_work_queue () |
static void | sra_initialize () |
static bool | delete_base_accesses (const void *key, void **value, void *data) |
static void | sra_deinitialize () |
static void | disqualify_candidate () |
static bool | type_internals_preclude_sra_p () |
static tree | get_ssa_base_param () |
static void | mark_parm_dereference () |
static struct access * | create_access_1 () |
static struct access * | create_access () |
static bool | type_consists_of_records_p () |
static void | completely_scalarize_record (tree base, tree decl, HOST_WIDE_INT offset, tree ref) |
static void | completely_scalarize_var () |
static bool | contains_view_convert_expr_p () |
static void | disqualify_base_of_expr () |
static struct access * | build_access_from_expr_1 () |
static bool | build_access_from_expr () |
static bool | disqualify_ops_if_throwing_stmt () |
static bool | build_accesses_from_assign () |
static bool | asm_visit_addr (gimple stmt, tree op, void *data) |
static bool | callsite_has_enough_arguments_p () |
static bool | scan_function () |
static int | compare_access_positions () |
static void | make_fancy_decl_name () |
static void | make_fancy_name_1 () |
static char * | make_fancy_name () |
tree | build_ref_for_offset (location_t loc, tree base, HOST_WIDE_INT offset, tree exp_type, gimple_stmt_iterator *gsi, bool insert_after) |
static tree | build_ref_for_model (location_t loc, tree base, HOST_WIDE_INT offset, struct access *model, gimple_stmt_iterator *gsi, bool insert_after) |
static tree | build_debug_ref_for_model (location_t loc, tree base, HOST_WIDE_INT offset, struct access *model) |
static bool | build_user_friendly_ref_for_offset (tree *res, tree type, HOST_WIDE_INT offset, tree exp_type) |
static bool | is_va_list_type () |
static void | reject () |
static bool | maybe_add_sra_candidate () |
static bool | find_var_candidates () |
static struct access * | sort_and_splice_var_accesses () |
static tree | create_access_replacement () |
static tree | get_access_replacement () |
static bool | build_access_subtree () |
static bool | build_access_trees () |
static bool | expr_with_var_bounded_array_refs_p () |
static bool | analyze_access_subtree (struct access *root, struct access *parent, bool allow_replacements) |
static bool | analyze_access_trees () |
static bool | child_would_conflict_in_lacc (struct access *lacc, HOST_WIDE_INT norm_offset, HOST_WIDE_INT size, struct access **exact_match) |
static struct access * | create_artificial_child_access (struct access *parent, struct access *model, HOST_WIDE_INT new_offset) |
static bool | propagate_subaccesses_across_link () |
static void | propagate_all_subaccesses () |
static bool | analyze_all_variable_accesses () |
static void | generate_subtree_copies (struct access *access, tree agg, HOST_WIDE_INT top_offset, HOST_WIDE_INT start_offset, HOST_WIDE_INT chunk_size, gimple_stmt_iterator *gsi, bool write, bool insert_after, location_t loc) |
static void | init_subtree_with_zero (struct access *access, gimple_stmt_iterator *gsi, bool insert_after, location_t loc) |
static struct access * | get_access_for_expr () |
static bool | sra_modify_expr () |
static enum
unscalarized_data_handling | handle_unscalarized_data_in_subtree (struct access *top_racc, gimple_stmt_iterator *gsi) |
static void | load_assign_lhs_subreplacements (struct access *lacc, struct access *top_racc, HOST_WIDE_INT left_offset, gimple_stmt_iterator *old_gsi, gimple_stmt_iterator *new_gsi, enum unscalarized_data_handling *refreshed) |
static enum assignment_mod_result | sra_modify_constructor_assign () |
static tree | get_repl_default_def_ssa_name () |
static bool | contains_vce_or_bfcref_p () |
static enum assignment_mod_result | sra_modify_assign () |
static bool | sra_modify_function_body () |
static void | initialize_parameter_reductions () |
static unsigned int | perform_intra_sra () |
static unsigned int | early_intra_sra () |
static unsigned int | late_intra_sra () |
static bool | gate_intra_sra () |
gimple_opt_pass * | make_pass_sra_early () |
gimple_opt_pass * | make_pass_sra () |
static bool | is_unused_scalar_param () |
static bool | ptr_parm_has_direct_uses () |
static bool | find_param_candidates () |
static bool | mark_maybe_modified (ao_ref *ao, tree vdef, void *data) |
static void | analyze_modified_params () |
static void | propagate_dereference_distances () |
static void | dump_dereferences_table () |
static void | analyze_caller_dereference_legality () |
static struct access * | unmodified_by_ref_scalar_representative () |
static bool | access_precludes_ipa_sra_p () |
static struct access * | splice_param_accesses () |
static int | decide_one_param_reduction () |
static enum ipa_splicing_result | splice_all_param_accesses () |
static int | get_param_index () |
static ipa_parm_adjustment_vec | turn_representatives_into_adjustments (vec< access_p > representatives, int adjustments_count) |
static ipa_parm_adjustment_vec | analyze_all_param_acesses () |
static tree | get_replaced_param_substitute () |
static struct ipa_parm_adjustment * | get_adjustment_for_base () |
static bool | replace_removed_params_ssa_names (gimple stmt, ipa_parm_adjustment_vec adjustments) |
static bool | sra_ipa_modify_expr (tree *expr, bool convert, ipa_parm_adjustment_vec adjustments) |
static bool | sra_ipa_modify_assign (gimple *stmt_ptr, gimple_stmt_iterator *gsi, ipa_parm_adjustment_vec adjustments) |
static bool | ipa_sra_modify_function_body () |
static void | sra_ipa_reset_debug_stmts () |
static bool | not_all_callers_have_enough_arguments_p (struct cgraph_node *node, void *data) |
static bool | convert_callers_for_node (struct cgraph_node *node, void *data) |
static void | convert_callers (struct cgraph_node *node, tree old_decl, ipa_parm_adjustment_vec adjustments) |
static bool | modify_function () |
static bool | has_caller_p () |
static bool | ipa_sra_preliminary_function_checks () |
static unsigned int | ipa_early_sra () |
static bool | ipa_early_sra_gate () |
gimple_opt_pass * | make_pass_early_ipa_sra () |
Scalar Replacement of Aggregates (SRA) converts some structure references into scalar references, exposing them to the scalar optimizers. Copyright (C) 2008-2013 Free Software Foundation, Inc. Contributed by Martin Jambor mjamb.nosp@m.or@s.nosp@m.use.c.nosp@m.z
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see http://www.gnu.org/licenses/. This file implements Scalar Reduction of Aggregates (SRA). SRA is run twice, once in the early stages of compilation (early SRA) and once in the late stages (late SRA). The aim of both is to turn references to scalar parts of aggregates into uses of independent scalar variables.
The two passes are nearly identical, the only difference is that early SRA does not scalarize unions which are used as the result in a GIMPLE_RETURN statement because together with inlining this can lead to weird type conversions.
Both passes operate in four stages:
- The declarations that have properties which make them candidates for scalarization are identified in function find_var_candidates(). The candidates are stored in candidate_bitmap.
The function body is scanned. In the process, declarations which are used in a manner that prevent their scalarization are removed from the candidate bitmap. More importantly, for every access into an aggregate, an access structure (struct access) is created by create_access() and stored in a vector associated with the aggregate. Among other information, the aggregate declaration, the offset and size of the access and its type are stored in the structure.
On a related note, assign_link structures are created for every assign statement between candidate aggregates and attached to the related accesses.
The vectors of accesses are analyzed. They are first sorted according to their offset and size and then scanned for partially overlapping accesses (i.e. those which overlap but one is not entirely within another). Such an access disqualifies the whole aggregate from being scalarized.
If there is no such inhibiting overlap, a representative access structure is chosen for every unique combination of offset and size. Afterwards, the pass builds a set of trees from these structures, in which children of an access are within their parent (in terms of offset and size).
Then accesses are propagated whenever possible (i.e. in cases when it does not create a partially overlapping access) across assign_links from the right hand side to the left hand side.
Then the set of trees for each declaration is traversed again and those accesses which should be replaced by a scalar are identified.
- The function is traversed again, and for every reference into an aggregate that has some component which is about to be scalarized, statements are amended and new statements are created as necessary. Finally, if a parameter got scalarized, the scalar replacements are initialized with values from respective parameter aggregates. Enumeration of all aggregate reductions we can do.
- Enumerator:
SRA_MODE_EARLY_IPA |
|
SRA_MODE_EARLY_INTRA |
|
SRA_MODE_INTRA |
|
static bool analyze_access_subtree |
( |
struct access * |
root, |
|
|
struct access * |
parent, |
|
|
bool |
allow_replacements |
|
) |
| |
|
static |
Analyze the subtree of accesses rooted in ROOT, scheduling replacements when both seeming beneficial and when ALLOW_REPLACEMENTS allows it. Also set all sorts of access flags appropriately along the way, notably always set grp_read and grp_assign_read according to MARK_READ and grp_write when MARK_WRITE is true.
Creating a replacement for a scalar access is considered beneficial if its grp_hint is set (this means we are either attempting total scalarization or there is more than one direct read access) or according to the following table:
Access written to through a scalar type (once or more times) | | Written to in an assignment statement | | | | Access read as scalar once | | | | | | Read in an assignment statement | | | |
| | | | Scalarize Comment
0 0 0 0 No access for the scalar 0 0 0 1 No access for the scalar 0 0 1 0 No Single read - won't help 0 0 1 1 No The same case 0 1 0 0 No access for the scalar 0 1 0 1 No access for the scalar 0 1 1 0 Yes s = *g; return s.i; 0 1 1 1 Yes The same case as above 1 0 0 0 No Won't help 1 0 0 1 Yes s.i = 1; *g = s; 1 0 1 0 Yes s.i = 5; g = s.i; 1 0 1 1 Yes The same case as above 1 1 0 0 No Won't help. 1 1 0 1 Yes s.i = 1; *g = s; 1 1 1 0 Yes s = *g; return s.i; 1 1 1 1 Yes Any of the above yeses
Always create access replacements that cover the whole access. For integral types this means the precision has to match. Avoid assumptions based on the integral type kind, too.
But leave bitfield accesses alone.
References access::offset, and access::size.
Referenced by get_access_replacement().
static void analyze_caller_dereference_legality |
( |
| ) |
|
|
static |
Determine what (parts of) parameters passed by reference that are not assigned to are not certainly dereferenced in this function and thus the dereferencing cannot be safely moved to the caller without potentially introducing a segfault. Mark such REPRESENTATIVES as grp_not_necessarilly_dereferenced.
The dereferenced maximum "distance," i.e. the offset + size of the accessed part is calculated rather than simple booleans are calculated for each pointer parameter to handle cases when only a fraction of the whole aggregate is allocated (see testsuite/gcc.c-torture/execute/ipa-sra-2.c for an example).
The maximum dereference distances for each pointer parameter and BB are already stored in bb_dereference. This routine simply propagates these values upwards by propagate_dereference_distances and then compares the distances of individual parameters in the ENTRY BB to the equivalent distances of each representative of a (fraction of a) parameter.
References access::base, DECL_UID, dump_access(), dump_file, gcc_assert, access::next_grp, access::non_addressable, POINTER_TYPE_P, print_generic_expr(), tree_low_cst(), TREE_TYPE, and TYPE_SIZE.
static bool build_access_from_expr |
( |
| ) |
|
|
static |
Scan expression EXPR and create access structures for all accesses to candidates for scalarization. Return true if any access has been inserted. STMT must be the statement from which the expression is taken, WRITE must be true if the expression is a store and false otherwise.
This means the aggregate is accesses as a whole in a way other than an assign statement and thus cannot be removed even if we had a scalar replacement for everything.
References DECL_P, disqualify_candidate(), and get_base_address().
Referenced by disqualify_ops_if_throwing_stmt().
static struct access* build_access_from_expr_1 |
( |
| ) |
|
|
staticread |
Scan expression EXPR and create access structures for all accesses to candidates for scalarization. Return the created access or NULL if none is created.
We need to dive through V_C_Es in order to get the size of its parameter and not the result type. Ada produces such statements. We are also capable of handling the topmost V_C_E but not any of those buried in other handled components.
fall through
References add_link_to_rhs(), AGGREGATE_TYPE_P, access::base, bitmap_set_bit, DECL_UID, disqualify_ops_if_throwing_stmt(), gimple_assign_lhs(), gimple_assign_rhs1(), gimple_assign_single_p(), gimple_clobber_p(), gimple_has_volatile_ops(), access::grp_assignment_read, access::grp_assignment_write, access::grp_unscalarizable_region, is_gimple_reg_type(), assign_link::lacc, pool_alloc(), assign_link::racc, access::size, SRA_MODE_EARLY_INTRA, SRA_MODE_INTRA, TREE_TYPE, access::type, and useless_type_conversion_p().
Construct a MEM_REF that would reference a part of aggregate BASE of type EXP_TYPE at the given OFFSET. If BASE is something for which get_addr_base_and_unit_offset returns NULL, gsi must be non-NULL and is used to insert new statements either before or below the current one as specified by INSERT_AFTER. This function is not capable of handling bitfields.
BASE must be either a declaration or a memory reference that has correct alignment ifformation embeded in it (e.g. a pre-existing one in SRA).
get_addr_base_and_unit_offset returns NULL for references with a variable offset such as array[var_index].
static int compare_access_positions |
( |
| ) |
|
|
static |
Helper of QSORT function. There are pointers to accesses in the array. An access is considered smaller than another if it has smaller offset or if the offsets are the same but is size is bigger.
Put any non-aggregate type before any aggregate type.
Put any complex or vector type before any other scalar type.
Put the integral type with the bigger precision first.
Put any integral type with non-full precision last.
Stabilize the sort.
We want the bigger accesses first, thus the opposite operator in the next
line:
static tree create_access_replacement |
( |
| ) |
|
|
static |
Create a variable for the given ACCESS which determines the type, name and a few other properties. Return the variable declaration and store it also to ACCESS->replacement.
Get rid of any SSA_NAMEs embedded in debug_expr, as DECL_DEBUG_EXPR isn't considered when looking for still used SSA_NAMEs and thus they could be freed. All debug info generation cares is whether something is constant or variable and that get_ref_base_and_extent works properly on the expression. It cannot handle accesses at a non-constant offset though, so just give up in those cases.
FALLTHRU
Referenced by expr_with_var_bounded_array_refs_p(), and load_assign_lhs_subreplacements().
static int decide_one_param_reduction |
( |
| ) |
|
|
static |
Decide whether parameters with representative accesses given by REPR should be reduced into components.
Taking the address of a non-addressable field is verboten.
Do not decompose a non-BLKmode param in a way that would
create BLKmode params. Especially for by-reference passing
(thus, pointer-type param) this is hardly worthwhile.
static bool disqualify_ops_if_throwing_stmt |
( |
| ) |
|
|
static |
Disqualify LHS and RHS for scalarization if STMT must end its basic block in modes in which it matters, return true iff they have been disqualified. RHS may be NULL, in that case ignore it. If we scalarize an aggregate in intra-SRA we may need to add statements after each statement. This is not possible if a statement unconditionally has to end the basic block.
References bitmap_set_bit, build_access_from_expr(), gimple_return_retval(), gsi_end_p(), gsi_next(), gsi_start_bb(), gsi_stmt(), basic_block_def::index, NULL_TREE, and stmt_can_throw_external().
Referenced by build_access_from_expr_1().
Generate statements copying scalar replacements of accesses within a subtree into or out of AGG. ACCESS, all its children, siblings and their children are to be processed. AGG is an aggregate type expression (can be a declaration but does not have to be, it can for example also be a mem_ref or a series of handled components). TOP_OFFSET is the offset of the processed subtree which has to be subtracted from offsets of individual accesses to get corresponding offsets for AGG. If CHUNK_SIZE is non-null, copy only replacements in the interval <start_offset, start_offset + chunk_size>, otherwise copy all. GSI is a statement iterator used to place the new statements. WRITE should be true when the statements should write from AGG to the replacement and false if vice versa. if INSERT_AFTER is true, new statements will be added after the current statement in GSI, they will be added before the statement otherwise.
References access::base, build_ref_for_model(), access::expr, access::first_child, force_gimple_operand_gsi(), get_access_for_expr(), get_access_replacement(), gimple_build_assign, gimple_build_debug_bind, gimple_location(), gimple_set_location(), access::grp_partial_lhs, access::grp_to_be_debug_replaced, access::grp_to_be_replaced, gsi_insert_after(), gsi_insert_before(), GSI_NEW_STMT, GSI_SAME_STMT, gsi_stmt(), host_integerp(), HOST_WIDE_INT, NULL, NULL_TREE, access::offset, sra_stats, access::stmt, TREE_CODE, TREE_OPERAND, TREE_TYPE, access::type, type(), and useless_type_conversion_p().
Referenced by contains_vce_or_bfcref_p().
Try to generate statements to load all sub-replacements in an access subtree formed by children of LACC from scalar replacements in the TOP_RACC subtree. If that is not possible, refresh the TOP_RACC base aggregate and load the accesses from it. LEFT_OFFSET is the offset of the left whole subtree being copied. NEW_GSI is stmt iterator used for statement insertions after the original assignment, OLD_GSI is used to insert statements before the assignment. *REFRESHED keeps the information whether we have needed to refresh replacements of the LHS and from which side of the assignments this takes place.
No suitable access on the right hand side, need to load from the aggregate. See if we have to update it first...
References cfun, create_access_replacement(), gcc_checking_assert, get_or_create_ssa_default_def(), access::grp_to_be_debug_replaced, access::grp_to_be_replaced, and access::replacement_decl.
static bool ptr_parm_has_direct_uses |
( |
| ) |
|
|
static |
Scan immediate uses of a default definition SSA name of a parameter PARM and examine whether there are any direct or otherwise infeasible ones. If so, return true, otherwise return false. PARM must be a gimple register with a non-NULL default definition.
Valid uses include dereferences on the lhs and the rhs.
If the number of valid uses does not match the number of
uses in this stmt there is an unhandled use.
static struct access* sort_and_splice_var_accesses |
( |
| ) |
|
|
staticread |
Sort all accesses for the given variable, check for partial overlaps and return NULL if there are any. If there are none, pick a representative for each combination of offset and size and create a linked list out of them. Return the pointer to the first representative and make sure it is the first one in the vector of accesses.
Sort by <OFFSET, SIZE>.
If there are both aggregate-type and scalar-type accesses with
this combination of size and offset, the comparison function
should have put the scalars first.
static struct access* splice_param_accesses |
( |
| ) |
|
|
staticread |
Sort collected accesses for parameter PARM, identify representatives for each accessed region and link them together. Return NULL if there are different but overlapping accesses, return the special ptr value meaning there are no accesses for this parameter if that is the case and return the first representative otherwise. Set *RO_GRP if there is a group of accesses with only read (i.e. no write) accesses.
Access is about to become group representative unless we find some nasty overlap which would preclude us from breaking this parameter apart.
All or nothing law for parameters.
References UNMODIF_BY_REF_ACCESSES, and unmodified_by_ref_scalar_representative().
If the statement pointed to by STMT_PTR contains any expressions that need to replaced with a different one as noted by ADJUSTMENTS, do so. Handle any potential type incompatibilities (GSI is used to accommodate conversion statements and must point to the statement). Return true iff the statement was modified.
V_C_Es of constructors can cause trouble (PR 42714).
This can happen when an assignment in between two single field structures is turned into an assignment in between two pointers to scalars (PR 42237).
Examine both sides of the assignment statement pointed to by STMT, replace them with a scalare replacement if there is one and generate copying of replacements if scalarized aggregates have been used in the assignment. GSI is used to hold generated statements for type conversions and subtree copying.
If we can avoid creating a VIEW_CONVERT_EXPR do so.
??? This should move to fold_stmt which we simply should
call after building a VIEW_CONVERT_EXPR here.
From this point on, the function deals with assignments in between
aggregates when at least one has scalar reductions of some of its
components. There are three possible scenarios: Both the LHS and RHS have
to-be-scalarized components, 2) only the RHS has or 3) only the LHS has.
In the first case, we would like to load the LHS components from RHS
components whenever possible. If that is not possible, we would like to
read it directly from the RHS (after updating it by storing in it its own
components). If there are some necessary unscalarized data in the LHS,
those will be loaded by the original assignment too. If neither of these
cases happen, the original statement can be removed. Most of this is done
by load_assign_lhs_subreplacements.
In the second case, we would like to store all RHS scalarized components
directly into LHS and if they cover the aggregate completely, remove the
statement too. In the third case, we want the LHS components to be loaded
directly from the RHS (DSE will remove the original statement if it
becomes redundant).
This is a bit complex but manageable when types match and when unions do
not cause confusion in a way that we cannot really load a component of LHS
from the RHS or vice versa (the access representing this level can have
subaccesses that are accessible only through a different union field at a
higher level - different from the one used in the examined expression).
Unions are fun.
Therefore, I specially handle a fourth case, happening when there is a
specific type cast or it is impossible to locate a scalarized subaccess on
the other side of the expression. If that happens, I simply "refresh" the
RHS by storing in it is scalarized components leave the original statement
there to do the copying and then load the scalar replacements of the LHS.
This is what the first branch does.
This gimplification must be done after generate_subtree_copies,
lest we insert the subtree copies in the middle of the gimplified
sequence.
When an access represents an unscalarizable region, it usually
represents accesses with variable offset and thus must not be used
to generate new memory accesses.
Restore the aggregate RHS from its components so the
prevailing aggregate copy does the right thing.
Re-load the components of the aggregate copy destination.
But use the RHS aggregate to load from to expose more
optimization opportunities.
Modify assignments with a CONSTRUCTOR on their RHS. STMT contains a pointer to the assignment and GSI is the statement iterator pointing at it. Returns the same values as sra_modify_assign.
Remove clobbers of fully scalarized variables, otherwise do nothing.
I have never seen this code path trigger but if it can happen the
following should handle it gracefully.
References build_ref_for_model(), and gimple_assign_set_lhs().
static bool sra_modify_expr |
( |
| ) |
|
|
static |
Replace the expression EXPR with a scalar replacement if there is one and generate other statements to do type conversion or subtree copying if necessary. GSI is used to place newly created statements, WRITE is true if the expression is being written to (it is on a LHS of a statement or output in an assembly statement).
If we replace a non-register typed access simply use the original access expression to extract the scalar component afterwards. This happens if scalarizing a function return value or parameter like in gcc.c-torture/execute/20041124-1.c, 20050316-1.c and gcc.c-torture/compile/20011217-1.c.
We also want to use this when accessing a complex or vector which can be accessed as a different type too, potentially creating a need for type conversion (see PR42196) and when scalarized unions are involved in assembler statements (see PR42398).
References access::base, build_ref_for_model(), find_access_in_subtree(), fold_build1_loc, force_gimple_operand_gsi(), get_access_replacement(), gimple_build_assign, gimple_set_location(), access::grp_partial_lhs, access::grp_to_be_replaced, gsi_insert_after(), GSI_NEW_STMT, GSI_SAME_STMT, handle_unscalarized_data_in_subtree(), NULL_TREE, access::offset, access::size, sra_stats, SRA_UDH_LEFT, SRA_UDH_NONE, access::stmt, access::type, update_stmt(), and useless_type_conversion_p().