#include "tree-data-ref.h"
#include "target.h"
#include "hash-table.h"

Include dependency graph for tree-vectorizer.h:

This graph shows which files directly or indirectly include this file:

Data Structures
struct	_stmt_info_for_cost
struct	_slp_tree
struct	_slp_instance
struct	_slp_oprnd_info
struct	_vect_peel_info
struct	_vect_peel_extended_info
struct	peel_info_hasher
struct	_loop_vec_info
struct	_bb_vec_info
struct	_stmt_vec_info
struct	dataref_aux

Macros
#define	UNKNOWN_LOC UNKNOWN_LOCATION
#define	EXPR_LOC(e) EXPR_LOCATION (e)
#define	LOC_FILE(l) LOCATION_FILE (l)
#define	LOC_LINE(l) LOCATION_LINE (l)
#define	VECTORIZABLE_CYCLE_DEF(D)
#define	SLP_INSTANCE_TREE(S) (S)->root
#define	SLP_INSTANCE_GROUP_SIZE(S) (S)->group_size
#define	SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor
#define	SLP_INSTANCE_BODY_COST_VEC(S) (S)->body_cost_vec
#define	SLP_INSTANCE_LOADS(S) (S)->loads
#define	SLP_INSTANCE_FIRST_LOAD_STMT(S) (S)->first_load
#define	SLP_TREE_CHILDREN(S) (S)->children
#define	SLP_TREE_SCALAR_STMTS(S) (S)->stmts
#define	SLP_TREE_VEC_STMTS(S) (S)->vec_stmts
#define	SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size
#define	SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation
#define	LOOP_VINFO_LOOP(L) (L)->loop
#define	LOOP_VINFO_BBS(L) (L)->bbs
#define	LOOP_VINFO_NITERS(L) (L)->num_iters
#define	LOOP_VINFO_NITERS_UNCHANGED(L) (L)->num_iters_unchanged
#define	LOOP_VINFO_COST_MODEL_MIN_ITERS(L) (L)->min_profitable_iters
#define	LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable
#define	LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor
#define	LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask
#define	LOOP_VINFO_LOOP_NEST(L) (L)->loop_nest
#define	LOOP_VINFO_DATAREFS(L) (L)->datarefs
#define	LOOP_VINFO_DDRS(L) (L)->ddrs
#define	LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters))
#define	LOOP_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment
#define	LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr
#define	LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts
#define	LOOP_VINFO_LOC(L) (L)->loop_line_number
#define	LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs
#define	LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores
#define	LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances
#define	LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
#define	LOOP_VINFO_REDUCTIONS(L) (L)->reductions
#define	LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains
#define	LOOP_VINFO_PEELING_HTAB(L) (L)->peeling_htab
#define	LOOP_VINFO_TARGET_COST_DATA(L) (L)->target_cost_data
#define	LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
#define	LOOP_VINFO_OPERANDS_SWAPPED(L) (L)->operands_swapped
#define	LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) (L)->may_misalign_stmts.length () > 0
#define	LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) (L)->may_alias_ddrs.length () > 0
#define	NITERS_KNOWN_P(n)
#define	LOOP_VINFO_NITERS_KNOWN_P(L) NITERS_KNOWN_P ((L)->num_iters)
#define	BB_VINFO_BB(B) (B)->bb
#define	BB_VINFO_GROUPED_STORES(B) (B)->grouped_stores
#define	BB_VINFO_SLP_INSTANCES(B) (B)->slp_instances
#define	BB_VINFO_DATAREFS(B) (B)->datarefs
#define	BB_VINFO_DDRS(B) (B)->ddrs
#define	BB_VINFO_TARGET_COST_DATA(B) (B)->target_cost_data
#define	STMT_VINFO_TYPE(S) (S)->type
#define	STMT_VINFO_STMT(S) (S)->stmt
#define	STMT_VINFO_LOOP_VINFO(S) (S)->loop_vinfo
#define	STMT_VINFO_BB_VINFO(S) (S)->bb_vinfo
#define	STMT_VINFO_RELEVANT(S) (S)->relevant
#define	STMT_VINFO_LIVE_P(S) (S)->live
#define	STMT_VINFO_VECTYPE(S) (S)->vectype
#define	STMT_VINFO_VEC_STMT(S) (S)->vectorized_stmt
#define	STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable
#define	STMT_VINFO_DATA_REF(S) (S)->data_ref_info
#define	STMT_VINFO_GATHER_P(S) (S)->gather_p
#define	STMT_VINFO_STRIDE_LOAD_P(S) (S)->stride_load_p
#define	STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p
#define	STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_base_address
#define	STMT_VINFO_DR_INIT(S) (S)->dr_init
#define	STMT_VINFO_DR_OFFSET(S) (S)->dr_offset
#define	STMT_VINFO_DR_STEP(S) (S)->dr_step
#define	STMT_VINFO_DR_ALIGNED_TO(S) (S)->dr_aligned_to
#define	STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p
#define	STMT_VINFO_RELATED_STMT(S) (S)->related_stmt
#define	STMT_VINFO_PATTERN_DEF_SEQ(S) (S)->pattern_def_seq
#define	STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs
#define	STMT_VINFO_DEF_TYPE(S) (S)->def_type
#define	STMT_VINFO_GROUP_FIRST_ELEMENT(S) (S)->first_element
#define	STMT_VINFO_GROUP_NEXT_ELEMENT(S) (S)->next_element
#define	STMT_VINFO_GROUP_SIZE(S) (S)->size
#define	STMT_VINFO_GROUP_STORE_COUNT(S) (S)->store_count
#define	STMT_VINFO_GROUP_GAP(S) (S)->gap
#define	STMT_VINFO_GROUP_SAME_DR_STMT(S) (S)->same_dr_stmt
#define	STMT_VINFO_GROUPED_ACCESS(S) ((S)->first_element != NULL && (S)->data_ref_info)
#define	STMT_VINFO_LOOP_PHI_EVOLUTION_PART(S) (S)->loop_phi_evolution_part
#define	GROUP_FIRST_ELEMENT(S) (S)->first_element
#define	GROUP_NEXT_ELEMENT(S) (S)->next_element
#define	GROUP_SIZE(S) (S)->size
#define	GROUP_STORE_COUNT(S) (S)->store_count
#define	GROUP_GAP(S) (S)->gap
#define	GROUP_SAME_DR_STMT(S) (S)->same_dr_stmt
#define	STMT_VINFO_RELEVANT_P(S) ((S)->relevant != vect_unused_in_scope)
#define	HYBRID_SLP_STMT(S) ((S)->slp_type == hybrid)
#define	PURE_SLP_STMT(S) ((S)->slp_type == pure_slp)
#define	STMT_SLP_TYPE(S) (S)->slp_type
#define	VECT_MAX_COST 1000
#define	MAX_INTERM_CVT_STEPS 3
#define	MAX_VECTORIZATION_FACTOR 32
#define	DR_MISALIGNMENT(DR) dr_misalignment (DR)
#define	SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL)
#define	NUM_PATTERNS 11

Typedefs
typedef source_location	LOC
typedef struct _stmt_info_for_cost	stmt_info_for_cost
typedef vec< stmt_info_for_cost >	stmt_vector_for_cost
typedef struct _slp_tree *	slp_tree
typedef struct _slp_instance *	slp_instance
typedef struct _slp_oprnd_info *	slp_oprnd_info
typedef struct _vect_peel_info *	vect_peel_info
typedef struct _vect_peel_extended_info *	vect_peel_extended_info
typedef struct _loop_vec_info *	loop_vec_info
typedef struct _bb_vec_info *	bb_vec_info
typedef struct data_reference *	dr_p
typedef struct _stmt_vec_info *	stmt_vec_info
typedef void *	vec_void_p
typedef gimple(*	vect_recog_func_ptr )(vec< gimple > , tree , tree *)

Enumerations
enum	vect_var_kind { vect_simple_var, vect_pointer_var, vect_scalar_var }
enum	operation_type { unary_op = 1, binary_op, ternary_op }
enum	dr_alignment_support { dr_unaligned_unsupported, dr_unaligned_supported, dr_explicit_realign, dr_explicit_realign_optimized, dr_aligned }
enum	vect_def_type { vect_uninitialized_def = 0, vect_constant_def = 1, vect_external_def, vect_internal_def, vect_induction_def, vect_reduction_def, vect_double_reduction_def, vect_nested_cycle, vect_unknown_def_type }
enum	stmt_vec_info_type { undef_vec_info_type = 0, load_vec_info_type, store_vec_info_type, shift_vec_info_type, op_vec_info_type, call_vec_info_type, assignment_vec_info_type, condition_vec_info_type, reduc_vec_info_type, induc_vec_info_type, type_promotion_vec_info_type, type_demotion_vec_info_type, type_conversion_vec_info_type, loop_exit_ctrl_vec_info_type }
enum	vect_relevant { vect_unused_in_scope = 0, vect_used_in_outer_by_reduction, vect_used_in_outer, vect_used_by_reduction, vect_used_in_scope }
enum	slp_vect_type { loop_vect = 0, pure_slp, hybrid }

Functions
static void	add_stmt_info_to_vec (stmt_vector_for_cost *stmt_cost_vec, int count, enum vect_cost_for_stmt kind, gimple stmt, int misalign)
static loop_vec_info	loop_vec_info_for_loop ()
static bool	nested_in_vect_loop_p ()
static bb_vec_info	vec_info_for_bb ()
void	init_stmt_vec_info_vec (void)
void	free_stmt_vec_info_vec (void)
static stmt_vec_info	vinfo_for_stmt ()
static void	set_vinfo_for_stmt ()
static gimple	get_earlier_stmt ()
static gimple	get_later_stmt ()
static bool	is_pattern_stmt_p ()
static bool	is_loop_header_bb_p ()
static int	vect_pow2 ()
static int	builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, tree vectype, int misalign)
static int	vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
static void *	init_cost ()
static unsigned	add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, stmt_vec_info stmt_info, int misalign, enum vect_cost_model_location where)
static void	finish_cost (void data, unsigned prologue_cost, unsigned body_cost, unsigned epilogue_cost)
static void	destroy_cost_data ()
void	set_dr_misalignment ()
int	dr_misalignment ()
static bool	aligned_access_p ()
static bool	known_alignment_for_access_p ()
static bool	unlimited_cost_model ()
void	slpeel_make_loop_iterate_ntimes (struct loop *, tree)
bool	slpeel_can_duplicate_loop_p (const struct loop *, const_edge)
struct loop *	slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, edge)
void	vect_loop_versioning (loop_vec_info, unsigned int, bool)
void	vect_do_peeling_for_loop_bound (loop_vec_info, tree *, unsigned int, bool)
void	vect_do_peeling_for_alignment (loop_vec_info, unsigned int, bool)
LOC	find_loop_location (struct loop *)
bool	vect_can_advance_ivs_p (loop_vec_info)
tree	get_vectype_for_scalar_type (tree)
tree	get_same_sized_vectype (tree, tree)
bool	vect_is_simple_use (tree, gimple, loop_vec_info, bb_vec_info, gimple , tree , enum vect_def_type *)
bool	vect_is_simple_use_1 (tree, gimple, loop_vec_info, bb_vec_info, gimple , tree , enum vect_def_type , tree )
bool	supportable_widening_operation (enum tree_code, gimple, tree, tree, enum tree_code , enum tree_code , int , vec< tree > )
bool	supportable_narrowing_operation (enum tree_code, tree, tree, enum tree_code , int , vec< tree > *)
stmt_vec_info	new_stmt_vec_info (gimple stmt, loop_vec_info, bb_vec_info)
void	free_stmt_vec_info (gimple stmt)
tree	vectorizable_function (gimple, tree, tree)
void	vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type , stmt_vector_for_cost , stmt_vector_for_cost *)
void	vect_model_store_cost (stmt_vec_info, int, bool, enum vect_def_type, slp_tree, stmt_vector_for_cost , stmt_vector_for_cost )
void	vect_model_load_cost (stmt_vec_info, int, bool, slp_tree, stmt_vector_for_cost , stmt_vector_for_cost )
unsigned	record_stmt_cost (stmt_vector_for_cost *, int, enum vect_cost_for_stmt, stmt_vec_info, int, enum vect_cost_model_location)
void	vect_finish_stmt_generation (gimple, gimple, gimple_stmt_iterator *)
bool	vect_mark_stmts_to_be_vectorized (loop_vec_info)
tree	vect_get_vec_def_for_operand (tree, gimple, tree *)
tree	vect_init_vector (gimple, tree, tree, gimple_stmt_iterator *)
tree	vect_get_vec_def_for_stmt_copy (enum vect_def_type, tree)
bool	vect_transform_stmt (gimple, gimple_stmt_iterator , bool , slp_tree, slp_instance)
void	vect_remove_stores (gimple)
bool	vect_analyze_stmt (gimple, bool *, slp_tree)
bool	vectorizable_condition (gimple, gimple_stmt_iterator , gimple , tree, int, slp_tree)
void	vect_get_load_cost (struct data_reference , int, bool, unsigned int , unsigned int , stmt_vector_for_cost , stmt_vector_for_cost *, bool)
void	vect_get_store_cost (struct data_reference , int, unsigned int , stmt_vector_for_cost *)
bool	vect_supportable_shift (enum tree_code, tree)
void	vect_get_vec_defs (tree, tree, gimple, vec< tree > , vec< tree > , slp_tree, int)
tree	vect_gen_perm_mask (tree, unsigned char *)
bool	vect_can_force_dr_alignment_p (const_tree, unsigned int)
enum dr_alignment_support	vect_supportable_dr_alignment (struct data_reference *, bool)
tree	vect_get_smallest_scalar_type (gimple, HOST_WIDE_INT , HOST_WIDE_INT )
bool	vect_analyze_data_ref_dependences (loop_vec_info, int *)
bool	vect_slp_analyze_data_ref_dependences (bb_vec_info)
bool	vect_enhance_data_refs_alignment (loop_vec_info)
bool	vect_analyze_data_refs_alignment (loop_vec_info, bb_vec_info)
bool	vect_verify_datarefs_alignment (loop_vec_info, bb_vec_info)
bool	vect_analyze_data_ref_accesses (loop_vec_info, bb_vec_info)
bool	vect_prune_runtime_alias_test_list (loop_vec_info)
tree	vect_check_gather (gimple, loop_vec_info, tree , tree , int *)
bool	vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *)
tree	vect_create_data_ref_ptr (gimple, tree, struct loop , tree, tree , gimple_stmt_iterator , gimple , bool, bool *)
tree	bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, tree)
tree	vect_create_destination_var (tree, tree)
bool	vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT)
bool	vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT)
bool	vect_grouped_load_supported (tree, unsigned HOST_WIDE_INT)
bool	vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT)
void	vect_permute_store_chain (vec< tree >, unsigned int, gimple, gimple_stmt_iterator , vec< tree > )
tree	vect_setup_realignment (gimple, gimple_stmt_iterator , tree , enum dr_alignment_support, tree, struct loop **)
void	vect_transform_grouped_load (gimple, vec< tree >, int, gimple_stmt_iterator *)
void	vect_record_grouped_load_vectors (gimple, vec< tree >)
tree	vect_get_new_vect_var (tree, enum vect_var_kind, const char *)
tree	vect_create_addr_base_for_vector_ref (gimple, gimple_seq , tree, struct loop )
void	destroy_loop_vec_info (loop_vec_info, bool)
gimple	vect_force_simple_reduction (loop_vec_info, gimple, bool, bool *)
loop_vec_info	vect_analyze_loop (struct loop *)
void	vect_transform_loop (loop_vec_info)
loop_vec_info	vect_analyze_loop_form (struct loop *)
bool	vectorizable_live_operation (gimple, gimple_stmt_iterator , gimple )
bool	vectorizable_reduction (gimple, gimple_stmt_iterator , gimple , slp_tree)
bool	vectorizable_induction (gimple, gimple_stmt_iterator , gimple )
tree	get_initial_def_for_reduction (gimple, tree, tree *)
int	vect_min_worthwhile_factor (enum tree_code)
int	vect_get_known_peeling_cost (loop_vec_info, int, int , int, stmt_vector_for_cost , stmt_vector_for_cost *)
int	vect_get_single_scalar_iteration_cost (loop_vec_info)
void	vect_free_slp_instance (slp_instance)
bool	vect_transform_slp_perm_load (slp_tree, vec< tree >, gimple_stmt_iterator *, int, slp_instance, bool)
bool	vect_schedule_slp (loop_vec_info, bb_vec_info)
void	vect_update_slp_costs_according_to_vf (loop_vec_info)
bool	vect_analyze_slp (loop_vec_info, bb_vec_info)
bool	vect_make_slp_decision (loop_vec_info)
void	vect_detect_hybrid_slp (loop_vec_info)
void	vect_get_slp_defs (vec< tree >, slp_tree, vec< vec< tree > > *, int)
LOC	find_bb_location (basic_block)
bb_vec_info	vect_slp_analyze_bb (basic_block)
void	vect_slp_transform_bb (basic_block)
void	vect_pattern_recog (loop_vec_info, bb_vec_info)
unsigned	vectorize_loops (void)
void	vect_destroy_datarefs (loop_vec_info, bb_vec_info)

Variables
vec< vec_void_p >	stmt_vec_info_vec
LOC	vect_location
unsigned int	current_vector_size

Macro Definition Documentation

#define BB_VINFO_BB ( B ) (B)->bb

Referenced by vect_analyze_slp(), vect_make_slp_decision(), and vect_pattern_recog_1().

#define BB_VINFO_DATAREFS ( B ) (B)->datarefs

Referenced by vect_update_misalignment_for_peel().

#define BB_VINFO_DDRS ( B ) (B)->ddrs

#define BB_VINFO_GROUPED_STORES ( B ) (B)->grouped_stores

Referenced by vect_make_slp_decision().

#define BB_VINFO_SLP_INSTANCES ( B ) (B)->slp_instances

Referenced by vect_make_slp_decision().

#define BB_VINFO_TARGET_COST_DATA ( B ) (B)->target_cost_data

Referenced by vect_make_slp_decision(), and vect_model_simple_cost().

#define DR_MISALIGNMENT ( DR ) dr_misalignment (DR)

Reflects actual alignment of first access in the vectorized loop, taking into account peeling/versioning if applied.

Referenced by is_loop_header_bb_p(), and vect_peeling_hash_insert().

#define EXPR_LOC ( e ) EXPR_LOCATION (e)

#define GROUP_FIRST_ELEMENT ( S ) (S)->first_element

Referenced by vect_analyze_data_ref_dependences(), vect_build_slp_tree_1(), vect_find_same_alignment_drs(), vect_get_store_cost(), vect_model_promotion_demotion_cost(), vect_peeling_hash_choose_best_peeling(), vect_peeling_hash_insert(), and vect_update_misalignment_for_peel().

#define GROUP_GAP ( S ) (S)->gap

Referenced by vect_build_slp_tree_1().

#define GROUP_NEXT_ELEMENT ( S ) (S)->next_element

Referenced by vect_get_place_in_interleaving_chain().

#define GROUP_SAME_DR_STMT ( S ) (S)->same_dr_stmt

#define GROUP_SIZE ( S ) (S)->size

Referenced by vect_build_slp_tree_1(), vect_find_same_alignment_drs(), and vect_model_promotion_demotion_cost().

#define GROUP_STORE_COUNT ( S ) (S)->store_count

#define HYBRID_SLP_STMT ( S ) ((S)->slp_type == hybrid)

#define LOC_FILE ( l ) LOCATION_FILE (l)

#define LOC_LINE ( l ) LOCATION_LINE (l)

#define LOOP_PEELING_FOR_ALIGNMENT ( L ) (L)->peeling_for_alignment

Referenced by vect_do_peeling_for_loop_bound().

#define LOOP_REQUIRES_VERSIONING_FOR_ALIAS ( L ) (L)->may_alias_ddrs.length () > 0

Referenced by vect_create_cond_for_alias_checks().

#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT ( L ) (L)->may_misalign_stmts.length () > 0

Referenced by vect_create_cond_for_alias_checks().

#define LOOP_VINFO_BBS ( L ) (L)->bbs

Referenced by vect_create_cond_for_alias_checks(), and vect_pattern_recog_1().

#define LOOP_VINFO_COST_MODEL_MIN_ITERS ( L ) (L)->min_profitable_iters

#define LOOP_VINFO_DATAREFS ( L ) (L)->datarefs

Referenced by vect_peeling_hash_choose_best_peeling(), and vect_update_misalignment_for_peel().

#define LOOP_VINFO_DDRS ( L ) (L)->ddrs

#define LOOP_VINFO_GROUPED_STORES ( L ) (L)->grouped_stores

#define LOOP_VINFO_INT_NITERS ( L ) (TREE_INT_CST_LOW ((L)->num_iters))

#define LOOP_VINFO_LOC ( L ) (L)->loop_line_number

#define LOOP_VINFO_LOOP ( L ) (L)->loop

Access Functions.

Referenced by find_loop_location(), get_initial_def_for_reduction(), new_loop_vec_info(), vect_analyze_slp(), vect_can_advance_ivs_p(), vect_create_cond_for_alias_checks(), vect_do_peeling_for_loop_bound(), vect_find_same_alignment_drs(), vect_get_new_vect_var(), vect_grouped_store_supported(), vect_mark_relevant(), vect_pattern_recog_1(), vect_peeling_hash_choose_best_peeling(), vect_update_inits_of_drs(), and vect_update_ivs_after_vectorizer().

#define LOOP_VINFO_LOOP_NEST ( L ) (L)->loop_nest

#define LOOP_VINFO_MAY_ALIAS_DDRS ( L ) (L)->may_alias_ddrs

#define LOOP_VINFO_MAY_MISALIGN_STMTS ( L ) (L)->may_misalign_stmts

Referenced by vect_update_inits_of_drs().

#define LOOP_VINFO_NITERS ( L ) (L)->num_iters

Referenced by find_loop_location(), and vect_create_cond_for_alias_checks().

#define LOOP_VINFO_NITERS_KNOWN_P ( L ) NITERS_KNOWN_P ((L)->num_iters)

#define LOOP_VINFO_NITERS_UNCHANGED ( L ) (L)->num_iters_unchanged

Since LOOP_VINFO_NITERS can change after prologue peeling retain total unchanged scalar loop iterations for cost model.

#define LOOP_VINFO_OPERANDS_SWAPPED ( L ) (L)->operands_swapped

#define LOOP_VINFO_PEELING_FOR_GAPS ( L ) (L)->peeling_for_gaps

Referenced by find_loop_location(), vect_find_same_alignment_drs(), and vect_update_ivs_after_vectorizer().

#define LOOP_VINFO_PEELING_HTAB ( L ) (L)->peeling_htab

Referenced by vect_peeling_hash_get_lowest_cost().

#define LOOP_VINFO_PTR_MASK ( L ) (L)->ptr_mask

#define LOOP_VINFO_REDUCTION_CHAINS ( L ) (L)->reduction_chains

#define LOOP_VINFO_REDUCTIONS ( L ) (L)->reductions

#define LOOP_VINFO_SLP_INSTANCES ( L ) (L)->slp_instances

#define LOOP_VINFO_SLP_UNROLLING_FACTOR ( L ) (L)->slp_unrolling_factor

#define LOOP_VINFO_TARGET_COST_DATA ( L ) (L)->target_cost_data

Referenced by vect_estimate_min_profitable_iters(), vect_model_reduction_cost(), and vect_model_simple_cost().

#define LOOP_VINFO_UNALIGNED_DR ( L ) (L)->unaligned_dr

Referenced by vect_do_peeling_for_loop_bound().

#define LOOP_VINFO_VECT_FACTOR ( L ) (L)->vectorization_factor

Referenced by find_loop_location(), vect_peeling_hash_choose_best_peeling(), and vect_update_ivs_after_vectorizer().

#define LOOP_VINFO_VECTORIZABLE_P ( L ) (L)->vectorizable

#define MAX_INTERM_CVT_STEPS 3

The maximum number of intermediate steps required in multi-step type conversion.

#define MAX_VECTORIZATION_FACTOR 32

The maximum vectorization factor supported by any target (V32QI).

#define NITERS_KNOWN_P ( n )

Value:

(host_integerp ((n),0) \

&& TREE_INT_CST_LOW ((n)) > 0)

#define NUM_PATTERNS 11

Referenced by vect_pattern_recog_1().

#define PURE_SLP_STMT ( S ) ((S)->slp_type == pure_slp)

Referenced by vect_analyze_slp(), vect_get_store_cost(), vect_model_simple_cost(), and vect_transform_loop().

#define SET_DR_MISALIGNMENT	(	DR,
		VAL
	)	set_dr_misalignment (DR, VAL)

Referenced by vect_peeling_hash_insert().

#define SLP_INSTANCE_BODY_COST_VEC ( S ) (S)->body_cost_vec

#define SLP_INSTANCE_FIRST_LOAD_STMT ( S ) (S)->first_load

#define SLP_INSTANCE_GROUP_SIZE ( S ) (S)->group_size

Referenced by vect_mark_slp_stmts().

#define SLP_INSTANCE_LOADS ( S ) (S)->loads

Referenced by vect_free_slp_tree(), vect_mark_slp_stmts_relevant(), and vect_slp_rearrange_stmts().

#define SLP_INSTANCE_TREE ( S ) (S)->root

Access Functions.

Referenced by vect_free_slp_tree().

#define SLP_INSTANCE_UNROLLING_FACTOR ( S ) (S)->unrolling_factor

#define SLP_TREE_CHILDREN ( S ) (S)->children

Referenced by vect_analyze_slp().

#define SLP_TREE_LOAD_PERMUTATION ( S ) (S)->load_permutation

Referenced by vect_get_mask_element().

#define SLP_TREE_NUMBER_OF_VEC_STMTS ( S ) (S)->vec_stmts_size

Referenced by vect_transform_slp_perm_load().

#define SLP_TREE_SCALAR_STMTS ( S ) (S)->stmts

Referenced by get_initial_def_for_reduction(), vect_analyze_slp(), and vect_get_mask_element().

#define SLP_TREE_VEC_STMTS ( S ) (S)->vec_stmts

Referenced by vect_gen_widened_results_half(), and vect_transform_slp_perm_load().

#define STMT_SLP_TYPE ( S ) (S)->slp_type

Referenced by vect_analyze_slp().

#define STMT_VINFO_BB_VINFO ( S ) (S)->bb_vinfo

Referenced by vect_analyze_slp(), vect_create_vectorized_demotion_stmts(), vect_find_same_alignment_drs(), vect_finish_stmt_generation(), vect_get_new_vect_var(), and vect_model_simple_cost().

#define STMT_VINFO_DATA_REF ( S ) (S)->data_ref_info

Referenced by destroy_bb_vec_info(), vect_build_slp_tree_1(), vect_create_cond_for_alias_checks(), vect_get_new_vect_var(), vect_get_store_cost(), vect_grouped_store_supported(), and vect_slp_analyze_data_ref_dependence().

#define STMT_VINFO_DEF_TYPE ( S ) (S)->def_type

Referenced by vect_analyze_slp(), vect_can_advance_ivs_p(), vect_create_vectorized_demotion_stmts(), and vect_finish_stmt_generation().

#define STMT_VINFO_DR_ALIGNED_TO ( S ) (S)->dr_aligned_to

Referenced by vect_compute_data_ref_alignment().

#define STMT_VINFO_DR_BASE_ADDRESS ( S ) (S)->dr_base_address

Referenced by vect_compute_data_ref_alignment().

#define STMT_VINFO_DR_INIT ( S ) (S)->dr_init

Referenced by vect_compute_data_ref_alignment().

#define STMT_VINFO_DR_OFFSET ( S ) (S)->dr_offset

#define STMT_VINFO_DR_STEP ( S ) (S)->dr_step

Referenced by vect_get_new_vect_var().

#define STMT_VINFO_GATHER_P ( S ) (S)->gather_p

#define STMT_VINFO_GROUP_FIRST_ELEMENT ( S ) (S)->first_element

#define STMT_VINFO_GROUP_GAP ( S ) (S)->gap

#define STMT_VINFO_GROUP_NEXT_ELEMENT ( S ) (S)->next_element

#define STMT_VINFO_GROUP_SAME_DR_STMT ( S ) (S)->same_dr_stmt

#define STMT_VINFO_GROUP_SIZE ( S ) (S)->size

#define STMT_VINFO_GROUP_STORE_COUNT ( S ) (S)->store_count

#define STMT_VINFO_GROUPED_ACCESS ( S ) ((S)->first_element != NULL && (S)->data_ref_info)

Referenced by vect_analyze_data_ref_dependences(), vect_analyze_stmt(), vect_build_slp_tree_1(), vect_get_store_cost(), vect_peeling_hash_choose_best_peeling(), vect_peeling_hash_insert(), and vect_update_misalignment_for_peel().

#define STMT_VINFO_IN_PATTERN_P ( S ) (S)->in_pattern_p

Referenced by vect_mark_relevant().

#define STMT_VINFO_LIVE_P ( S ) (S)->live

Referenced by vect_analyze_stmt(), vect_loop_kill_debug_uses(), and vect_mark_relevant().

#define STMT_VINFO_LOOP_PHI_EVOLUTION_PART ( S ) (S)->loop_phi_evolution_part

Referenced by vect_can_advance_ivs_p().

#define STMT_VINFO_LOOP_VINFO ( S ) (S)->loop_vinfo

Referenced by get_initial_def_for_reduction(), new_loop_vec_info(), vect_analyze_slp(), vect_create_vectorized_demotion_stmts(), vect_find_same_alignment_drs(), vect_finish_stmt_generation(), vect_get_new_vect_var(), vect_grouped_store_supported(), vect_mark_relevant(), vect_model_reduction_cost(), and vect_model_simple_cost().

#define STMT_VINFO_PATTERN_DEF_SEQ ( S ) (S)->pattern_def_seq

#define STMT_VINFO_RELATED_STMT ( S ) (S)->related_stmt

Referenced by get_initial_def_for_reduction(), vect_create_epilog_for_reduction(), vect_gen_widened_results_half(), and vect_mark_relevant().

#define STMT_VINFO_RELEVANT ( S ) (S)->relevant

Referenced by vect_analyze_slp(), vect_mark_relevant(), and vect_print_slp_tree().

#define STMT_VINFO_RELEVANT_P ( S ) ((S)->relevant != vect_unused_in_scope)

Referenced by vect_create_vectorized_demotion_stmts(), vect_finish_stmt_generation(), vect_loop_kill_debug_uses(), vect_peeling_hash_choose_best_peeling(), and vect_update_misalignment_for_peel().

#define STMT_VINFO_SAME_ALIGN_REFS ( S ) (S)->same_align_refs

#define STMT_VINFO_SIMD_LANE_ACCESS_P ( S ) (S)->simd_lane_access_p

#define STMT_VINFO_STMT ( S ) (S)->stmt

Referenced by record_stmt_cost(), and vect_model_promotion_demotion_cost().

#define STMT_VINFO_STRIDE_LOAD_P ( S ) (S)->stride_load_p

Referenced by vect_get_store_cost(), and vect_update_misalignment_for_peel().

#define STMT_VINFO_TYPE ( S ) (S)->type

Access Functions.

Referenced by vect_model_simple_cost().

#define STMT_VINFO_VEC_STMT ( S ) (S)->vectorized_stmt

Referenced by vect_analyze_stmt(), and vect_gen_widened_results_half().

#define STMT_VINFO_VECTORIZABLE ( S ) (S)->vectorizable

Referenced by destroy_bb_vec_info(), vect_build_slp_tree_1(), vect_pattern_recog_1(), and vect_update_misalignment_for_peel().

#define STMT_VINFO_VECTYPE ( S ) (S)->vectype

Referenced by vect_create_vectorized_demotion_stmts(), vect_do_peeling_for_loop_bound(), vect_finish_stmt_generation(), vect_get_store_cost(), vect_grouped_store_supported(), and vect_permute_load_chain().

#define UNKNOWN_LOC UNKNOWN_LOCATION

Referenced by vect_create_cond_for_alias_checks().

#define VECT_MAX_COST 1000

#define VECTORIZABLE_CYCLE_DEF ( D )

Value:

(((D) == vect_reduction_def)           \
                                   || ((D) == vect_double_reduction_def) \
                                   || ((D) == vect_nested_cycle))

Referenced by vect_analyze_slp().

Typedef Documentation

typedef struct _bb_vec_info * bb_vec_info

typedef struct data_reference* dr_p

typedef source_location LOC

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see http://www.gnu.org/licenses/.

typedef struct _loop_vec_info * loop_vec_info

Info on vectorized loops.

typedef struct _slp_instance * slp_instance

SLP instance is a sequence of stmts in a loop that can be packed into SIMD stmts.

typedef struct _slp_oprnd_info * slp_oprnd_info

This structure is used in creation of an SLP tree. Each instance corresponds to the same operand in a group of scalar stmts in an SLP node.

typedef struct _slp_tree* slp_tree

typedef struct _stmt_info_for_cost stmt_info_for_cost

Structure to encapsulate information about a group of like instructions to be presented to the target cost model.

typedef struct _stmt_vec_info * stmt_vec_info

typedef vec<stmt_info_for_cost> stmt_vector_for_cost

typedef void* vec_void_p

Avoid on stmt_vec_info.

typedef struct _vect_peel_extended_info * vect_peel_extended_info

typedef struct _vect_peel_info * vect_peel_info

typedef gimple(* vect_recog_func_ptr)(vec< gimple > *, tree *, tree *)

In tree-vect-patterns.c. Pattern recognition functions. Additional pattern recognition functions can (and will) be added in the future.

Enumeration Type Documentation

enum dr_alignment_support

Define type of available alignment support.

Enumerator:

dr_unaligned_unsupported
dr_unaligned_supported
dr_explicit_realign
dr_explicit_realign_optimized
dr_aligned

enum operation_type

Defines type of operation.

Enumerator:

unary_op
binary_op
ternary_op

enum slp_vect_type

The type of vectorization that can be applied to the stmt: regular loop-based vectorization; pure SLP - the stmt is a part of SLP instances and does not have uses outside SLP instances; or hybrid SLP and loop-based - the stmt is a part of SLP instance and also must be loop-based vectorized, since it has uses outside SLP sequences.

In the loop context the meanings of pure and hybrid SLP are slightly different. By saying that pure SLP is applied to the loop, we mean that we exploit only intra-iteration parallelism in the loop; i.e., the loop can be vectorized without doing any conceptual unrolling, cause we don't pack together stmts from different iterations, only within a single iteration. Loop hybrid SLP means that we exploit both intra-iteration and inter-iteration parallelism (e.g., number of elements in the vector is 4 and the slp-group-size is 2, in which case we don't have enough parallelism within an iteration, so we obtain the rest of the parallelism from subsequent iterations by unrolling the loop by 2).

Enumerator:

loop_vect
pure_slp
hybrid

enum stmt_vec_info_type

Info on vectorized defs.

Enumerator:

undef_vec_info_type
load_vec_info_type
store_vec_info_type
shift_vec_info_type
op_vec_info_type
call_vec_info_type
assignment_vec_info_type
condition_vec_info_type
reduc_vec_info_type
induc_vec_info_type
type_promotion_vec_info_type
type_demotion_vec_info_type
type_conversion_vec_info_type
loop_exit_ctrl_vec_info_type

enum vect_def_type

Define type of def-use cross-iteration cycle.

Enumerator:

vect_uninitialized_def
vect_constant_def
vect_external_def
vect_internal_def
vect_induction_def
vect_reduction_def
vect_double_reduction_def
vect_nested_cycle
vect_unknown_def_type

enum vect_relevant

Indicates whether/how a variable is used in the scope of loop/basic block.

Enumerator:

vect_unused_in_scope
vect_used_in_outer_by_reduction	The def is in the inner loop, and the use is in the outer loop, and the use is a reduction stmt.
vect_used_in_outer	The def is in the inner loop, and the use is in the outer loop (and is not part of reduction).
vect_used_by_reduction	defs that feed computations that end up (only) in a reduction. These defs may be used by non-reduction stmts, but eventually, any computations/values that are affected by these defs are used to compute a reduction (i.e. don't get stored to memory, for example). We use this to identify computations that we can change the order in which they are computed.
vect_used_in_scope

enum vect_var_kind

Used for naming of new temporaries.

Enumerator:

vect_simple_var
vect_pointer_var
vect_scalar_var

Function Documentation

static unsigned add_stmt_cost	(	void *	data,
		int	count,
		enum vect_cost_for_stmt	kind,
		stmt_vec_info	stmt_info,
		int	misalign,
		enum vect_cost_model_location	where
	)

inlinestatic

Alias targetm.vectorize.add_stmt_cost.

Referenced by vect_estimate_min_profitable_iters(), vect_model_reduction_cost(), and vect_model_simple_cost().

static void add_stmt_info_to_vec	(	stmt_vector_for_cost *	stmt_cost_vec,
		int	count,
		enum vect_cost_for_stmt	kind,
		gimple	stmt,
		int	misalign
	)

inlinestatic

Referenced by record_stmt_cost().

static bool aligned_access_p ( )

inlinestatic

Return TRUE if the data access is aligned, and FALSE otherwise.

static int builtin_vectorization_cost	(	enum vect_cost_for_stmt	type_of_cost,
		tree	vectype,
		int	misalign
	)

inlinestatic

Alias targetm.vectorize.builtin_vectorization_cost.

References VECT_COST_MODEL_UNLIMITED.

Referenced by record_stmt_cost().

tree bump_vector_ptr	(	tree	dataref_ptr,
		gimple	ptr_incr,
		gimple_stmt_iterator *	gsi,
		gimple	stmt,
		tree	bump
	)

Function bump_vector_ptr

Increment a pointer (to a vector type) by vector-size. If requested, i.e. if PTR-INCR is given, then also connect the new increment stmt to the existing def-use update-chain of the pointer, by modifying the PTR_INCR as illustrated below:

The pointer def-use update-chain before this function: DATAREF_PTR = phi (p_0, p_2) .... PTR_INCR: p_2 = DATAREF_PTR + step

The pointer def-use update-chain after this function: DATAREF_PTR = phi (p_0, p_2) .... NEW_DATAREF_PTR = DATAREF_PTR + BUMP .... PTR_INCR: p_2 = NEW_DATAREF_PTR + step

Input: DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated in the loop. PTR_INCR - optional. The stmt that updates the pointer in each iteration of the loop. The increment amount across iterations is expected to be vector_size. BSI - location where the new update stmt is to be placed. STMT - the original scalar memory-access stmt that is being vectorized. BUMP - optional. The offset by which to bump the pointer. If not given, the offset is assumed to be vector_size.

Output: Return NEW_DATAREF_PTR as illustrated above.

Copy the points-to information if it exists.

 Update the vector-pointer's cross-iteration increment.

static void destroy_cost_data ( )

inlinestatic

Alias targetm.vectorize.destroy_cost_data.

void destroy_loop_vec_info	(	loop_vec_info	,
		bool
	)

In tree-vect-loop.c. FORNOW: Used in tree-parloops.c.

int dr_misalignment ( )

inline

LOC find_bb_location ( basic_block )

LOC find_loop_location ( struct loop * )

static void finish_cost	(	void *	data,
		unsigned *	prologue_cost,
		unsigned *	body_cost,
		unsigned *	epilogue_cost
	)

inlinestatic

Alias targetm.vectorize.finish_cost.

void free_stmt_vec_info ( gimple stmt )

void free_stmt_vec_info_vec ( void )

Free hash table for stmt_vec_info.

static gimple get_earlier_stmt ( )

inlinestatic

Return the earlier statement between STMT1 and STMT2.

References targetm.

Referenced by vect_slp_analyze_data_ref_dependence().

tree get_initial_def_for_reduction	(	gimple	stmt,
		tree	init_val,
		tree *	adjustment_def
	)

Function get_initial_def_for_reduction

Input: STMT - a stmt that performs a reduction operation in the loop. INIT_VAL - the initial value of the reduction variable

Output: ADJUSTMENT_DEF - a tree that holds a value to be added to the final result of the reduction (used for adjusting the epilog - see below). Return a vector variable, initialized according to the operation that STMT performs. This vector will be used as the initial value of the vector of partial results.

Option1 (adjust in epilog): Initialize the vector as follows: add/bit or/xor: [0,0,...,0,0] mult/bit and: [1,1,...,1,1] min/max/cond_expr: [init_val,init_val,..,init_val,init_val] and when necessary (e.g. add/mult case) let the caller know that it needs to adjust the result by init_val.

Option2: Initialize the vector as follows: add/bit or/xor: [init_val,0,0,...,0] mult/bit and: [init_val,1,1,...,1] min/max/cond_expr: [init_val,init_val,...,init_val] and no adjustments are needed.

For example, for the following code:

s = init_val; for (i=0;i<n;i++) s = s + a[i];

STMT is 's = s + a[i]', and the reduction variable is 's'. For a vector of 4 units, we want to return either [0,0,0,init_val], or [0,0,0,0] and let the caller know that it needs to adjust the result at the end by 'init_val'.

FORNOW, we are using the 'adjust in epilog' scheme, because this way the initialization vector is simpler (same element in all entries), if ADJUSTMENT_DEF is not NULL, and Option2 otherwise.

A cost model should help decide between these two schemes.

 In case of double reduction we only create a vector variable to be put
 in the reduction phi node.  The actual statement creation is done in
 vect_create_epilog_for_reduction.

       ADJUSMENT_DEF is NULL when called from
       vect_create_epilog_for_reduction to vectorize double reduction.

       Create a vector of '0' or '1' except the first element.

       Option1: the first element is '0' or '1' as well.

       Option2: the first element is INIT_VAL.

References add_phi_arg(), dump_enabled_p(), dump_gimple_stmt(), dump_printf(), dump_printf_loc(), FOR_EACH_VEC_ELT, gcc_assert, gcc_unreachable, get_gimple_rhs_class(), get_vectype_for_scalar_type(), gimple_assign_rhs1(), gimple_assign_rhs2(), gimple_assign_rhs_code(), GIMPLE_BINARY_RHS, gimple_op(), GIMPLE_SINGLE_RHS, GIMPLE_TERNARY_RHS, GIMPLE_UNARY_RHS, loop::inner, loop_latch_edge(), loop_preheader_edge(), LOOP_VINFO_LOOP, MSG_NOTE, nested_in_vect_loop_p(), NULL, NULL_TREE, phis, reduction_phi(), SLP_TREE_SCALAR_STMTS, SSA_NAME_DEF_STMT, STMT_VINFO_LOOP_VINFO, STMT_VINFO_RELATED_STMT, TDF_SLIM, ternary_op, TREE_OPERAND, TREE_OPERAND_LENGTH, TREE_TYPE, TYPE_MODE, UNKNOWN_LOCATION, vect_get_vec_def_for_operand(), vect_get_vec_def_for_stmt_copy(), vect_get_vec_defs(), vect_location, vect_unknown_def_type, vinfo_for_stmt(), and vNULL.

static gimple get_later_stmt ( )

inlinestatic

Return the later statement between STMT1 and STMT2.

References targetm.

tree get_same_sized_vectype	(	tree	,
		tree
	)

tree get_vectype_for_scalar_type ( tree )

static void* init_cost ( )

inlinestatic

Alias targetm.vectorize.init_cost.

void init_stmt_vec_info_vec ( void )

Create a hash table for stmt_vec_info.

static bool is_loop_header_bb_p ( )

inlinestatic

Return true if BB is a loop header.

References DR_MISALIGNMENT.

static bool is_pattern_stmt_p ( )

inlinestatic

Return TRUE if a statement represented by STMT_INFO is a part of a pattern.

References data_reference::aux, and dataref_aux::misalignment.

static bool known_alignment_for_access_p ( )

inlinestatic

Return TRUE if the alignment of the data access is known, and FALSE otherwise.

Referenced by vect_permute_load_chain().

static loop_vec_info loop_vec_info_for_loop ( )

inlinestatic

static bool nested_in_vect_loop_p ( )

inlinestatic

Referenced by get_initial_def_for_reduction(), vect_get_new_vect_var(), and vect_grouped_store_supported().

stmt_vec_info new_stmt_vec_info	(	gimple	stmt,
		loop_vec_info	loop_vinfo,
		bb_vec_info	bb_vinfo
	)

Function new_stmt_vec_info.

Create and initialize a new stmt_vec_info struct for STMT.

Referenced by vect_create_epilog_for_reduction(), and vect_make_slp_decision().

unsigned record_stmt_cost	(	stmt_vector_for_cost *	body_cost_vec,
		int	count,
		enum vect_cost_for_stmt	kind,
		stmt_vec_info	stmt_info,
		int	misalign,
		enum vect_cost_model_location	where
	)

Record the cost of a statement, either by directly informing the target model or by saving it in a vector for later processing. Return a preliminary estimate of the statement's cost.

References add_stmt_info_to_vec(), builtin_vectorization_cost(), count, NULL, NULL_TREE, stmt_vectype(), and STMT_VINFO_STMT.

Referenced by vect_get_store_cost(), and vect_model_store_cost().

void set_dr_misalignment ( )

inline

Info on data references alignment.

static void set_vinfo_for_stmt ( )

inlinestatic

Set vectorizer information INFO for STMT.

Referenced by vect_create_epilog_for_reduction(), and vect_make_slp_decision().

bool slpeel_can_duplicate_loop_p	(	const struct loop *	,
		const_edge
	)

void slpeel_make_loop_iterate_ntimes	(	struct loop *	,
		tree
	)

Function prototypes. Simple loop peeling and versioning utilities for vectorizer's purposes - in tree-vect-loop-manip.c.

struct loop* slpeel_tree_duplicate_loop_to_edge_cfg	(	struct loop *	,
		edge
	)

read

bool supportable_narrowing_operation	(	enum tree_code	code,
		tree	vectype_out,
		tree	vectype_in,
		enum tree_code *	code1,
		int *	multi_step_cvt,
		vec< tree > *	interm_types
	)

Function supportable_narrowing_operation

Check whether an operation represented by the code CODE is a narrowing operation that is supported by the target platform in vector form (i.e., when operating on arguments of type VECTYPE_IN and producing a result of type VECTYPE_OUT).

Narrowing operations we currently support are NOP (CONVERT) and FIX_TRUNC. This function checks if these operations are supported by the target platform directly via vector tree-codes.

Output:

CODE1 is the code of a vector operation to be used when vectorizing the operation, if available.
MULTI_STEP_CVT determines the number of required intermediate steps in case of multi-step conversion (like int->short->char - in that case MULTI_STEP_CVT will be 1).
INTERM_TYPES contains the intermediate type required to perform the narrowing operation (short in the above example).

     ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
     tree code and optabs used for computing the operation.

   The signedness is determined from output operand.

 Check if it's a multi-step conversion that can be done using intermediate
 types.

 For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
 costly than signed.

 We assume here that there will not be more than MAX_INTERM_CVT_STEPS
 intermediate steps in promotion sequence.  We try
 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.

bool supportable_widening_operation	(	enum tree_code	code,
		gimple	stmt,
		tree	vectype_out,
		tree	vectype_in,
		enum tree_code *	code1,
		enum tree_code *	code2,
		int *	multi_step_cvt,
		vec< tree > *	interm_types
	)

Function supportable_widening_operation

Check whether an operation represented by the code CODE is a widening operation that is supported by the target platform in vector form (i.e., when operating on arguments of type VECTYPE_IN producing a result of type VECTYPE_OUT).

Widening operations we currently support are NOP (CONVERT), FLOAT and WIDEN_MULT. This function checks if these operations are supported by the target platform either directly (via vector tree-codes), or via target builtins.

Output:

CODE1 and CODE2 are codes of vector operations to be used when vectorizing the operation, if available.
MULTI_STEP_CVT determines the number of required intermediate steps in case of multi-step conversion (like char->short->int - in that case MULTI_STEP_CVT will be 1).
INTERM_TYPES contains the intermediate type required to perform the widening operation (short in the above example).

     The result of a vectorized widening operation usually requires
     two vectors (because the widened results do not fit into one vector).
     The generated vector results would normally be expected to be
     generated in the same order as in the original scalar computation,
     i.e. if 8 results are generated in each vector iteration, they are
     to be organized as follows:
            vect1: [res1,res2,res3,res4],
            vect2: [res5,res6,res7,res8].

     However, in the special case that the result of the widening
     operation is used in a reduction computation only, the order doesn't
     matter (because when vectorizing a reduction we change the order of
     the computation).  Some targets can take advantage of this and
     generate more efficient code.  For example, targets like Altivec,
     that support widen_mult using a sequence of {mult_even,mult_odd}
     generate the following vectors:
            vect1: [res1,res3,res5,res7],
            vect2: [res2,res4,res6,res8].

     When vectorizing outer-loops, we execute the inner-loop sequentially
     (each vectorized inner-loop iteration contributes to VF outer-loop
     iterations in parallel).  We therefore don't allow to change the
     order of the computation in the inner-loop during outer-loop
     vectorization.

     TODO: Another case in which order doesn't *really* matter is when we
     widen and then contract again, e.g. (short)((int)x * y >> 8).
     Normally, pack_trunc performs an even/odd permute, whereas the 
     repack from an even/odd expansion would be an interleave, which
     would be significantly simpler for e.g. AVX2.

     In any case, in order to avoid duplicating the code below, recurse
     on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
     are properly set up for the caller.  If we fail, we'll continue with
     a VEC_WIDEN_MULT_LO/HI_EXPR check.

     Support the recursion induced just above.

     ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
     VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
     computing the operation.

     The signedness is determined from output operand.

 Check if it's a multi-step conversion that can be done using intermediate
 types.

 We assume here that there will not be more than MAX_INTERM_CVT_STEPS
 intermediate steps in promotion sequence.  We try
 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
 not.

References insn_data, insn_data_d::operand, optab_default, optab_for_tree_code(), optab_handler(), lang_hooks_for_types::type_for_mode, TYPE_MODE, and lang_hooks::types.

static bool unlimited_cost_model ( )

inlinestatic

Return true if the vect cost model is unlimited.

Referenced by vect_peeling_hash_get_lowest_cost().

static bb_vec_info vec_info_for_bb ( )

inlinestatic

References loop_vect.

bool vect_analyze_data_ref_accesses	(	loop_vec_info	,
		bb_vec_info
	)

bool vect_analyze_data_ref_dependences	(	loop_vec_info	,
		int *
	)

bool vect_analyze_data_refs	(	loop_vec_info	loop_vinfo,
		bb_vec_info	bb_vinfo,
		int *	min_vf
	)

Function vect_analyze_data_refs.

Find all the data references in the loop or basic block.

The general structure of the analysis of data refs in the vectorizer is as follows: 1- vect_analyze_data_refs(loop/bb): call compute_data_dependences_for_loop/bb to find and analyze all data-refs in the loop/bb and their dependences. 2- vect_analyze_dependences(): apply dependence testing using ddrs. 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok. 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.

        Mark the rest of the basic-block as unvectorizable.

 Go through the data-refs, check that the analysis succeeded.  Update
 pointer from stmt_vec_info struct to DR and vectype.   


     Discard clobbers from the dataref vector.  We will remove
     clobber stmts during vectorization.

     Check that analysis of the data-ref succeeded.

         If target supports vector gather loads, or if this might be
         a SIMD lane access, see if they can't be used.

                                     For now.

     Update DR field in stmt_vec_info struct.

     If the dataref is in an inner-loop of the loop that is considered for
     for vectorization, we also want to analyze the access relative to
     the outer-loop (DR contains information only relative to the
     inner-most enclosing loop).  We do that by building a reference to the
     first location accessed by the inner-loop, and analyze it relative to
     the outer-loop.

         Build a reference to the first location accessed by the
         inner-loop: *(BASE+INIT).  (The first location is actually
         BASE+INIT+OFFSET, but we add OFFSET separately later).

         FIXME: Use canonicalize_base_object_address (base_iv.base);

     Set vectype for STMT.

     Adjust the minimal vectorization factor according to the
     vector type.

 If we stopped analysis at the first dataref we could not analyze
 when trying to vectorize a basic-block mark the rest of the datarefs
 as not vectorizable and truncate the vector of datarefs.  That
 avoids spending useless time in analyzing their dependence.

bool vect_analyze_data_refs_alignment	(	loop_vec_info	loop_vinfo,
		bb_vec_info	bb_vinfo
	)

Function vect_analyze_data_refs_alignment

Analyze the alignment of the data-references in the loop. Return FALSE if a data reference is found that cannot be vectorized.

Mark groups of data references with same alignment using data dependence information.

loop_vec_info vect_analyze_loop ( struct loop * )

Drive for loop analysis stage.

loop_vec_info vect_analyze_loop_form ( struct loop * )

bool vect_analyze_slp	(	loop_vec_info	,
		bb_vec_info
	)

bool vect_analyze_stmt	(	gimple	,
		bool *	,
		slp_tree
	)

bool vect_can_advance_ivs_p ( loop_vec_info )

bool vect_can_force_dr_alignment_p	(	const_tree	,
		unsigned	int
	)

In tree-vect-data-refs.c.

tree vect_check_gather	(	gimple	stmt,
		loop_vec_info	loop_vinfo,
		tree *	basep,
		tree *	offp,
		int *	scalep
	)

Check whether a non-affine read in stmt is suitable for gather load and if so, return a builtin decl for that operation.

 The gather builtins need address of the form
 loop_invariant + vector * {1, 2, 4, 8}
 or
 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
 of loop invariants/SSA_NAMEs defined in the loop, with casts,
 multiplications and additions in it.  To get a vector, we need
 a single SSA_NAME that will be defined in the loop and will
 contain everything that is not loop invariant and that can be
 vectorized.  The following code attempts to find such a preexistng
 SSA_NAME OFF and put the loop invariants into a tree BASE
 that can be gimplified before the loop.

 If base is not loop invariant, either off is 0, then we start with just
 the constant offset in the loop invariant BASE and continue with base
 as OFF, otherwise give up.
 We could handle that case by gimplifying the addition of base + off
 into some SSA_NAME and use that as off, but for now punt.

 Otherwise put base + constant offset into the loop invariant BASE
 and continue with OFF.

 OFF at this point may be either a SSA_NAME or some tree expression
 from get_inner_reference.  Try to peel off loop invariants from it
 into BASE as long as possible.

 If at the end OFF still isn't a SSA_NAME or isn't
 defined in the loop, punt.

References targetm.

tree vect_create_addr_base_for_vector_ref	(	gimple	stmt,
		gimple_seq *	new_stmt_list,
		tree	offset,
		struct loop *	loop
	)

Function vect_create_addr_base_for_vector_ref.

Create an expression that computes the address of the first memory location that will be accessed for a data reference.

Input: STMT: The statement containing the data reference. NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list. OFFSET: Optional. If supplied, it is be added to the initial address. LOOP: Specify relative to which loop-nest should the address be computed. For example, when the dataref is in an inner-loop nested in an outer-loop that is now being vectorized, LOOP can be either the outer-loop, or the inner-loop. The first memory location accessed by the following dataref ('in' points to short):

for (i=0; i<N; i++)
   for (j=0; j<M; j++)
     s += in[i+j]

        is as follows:
        if LOOP=i_loop:     &in             (relative to i_loop)
        if LOOP=j_loop:     &in+i*2B        (relative to j_loop)

Output:

Return an SSA_NAME whose value is the address of the memory location of the first vector of the data reference.
If new_stmt_list is not NULL_TREE after return then the caller must insert these statement(s) which define the returned SSA_NAME.

FORNOW: We are only handling array accesses with step 1.

Create base_offset

 base + base_offset

Referenced by vect_do_peeling_for_loop_bound().

tree vect_create_data_ref_ptr	(	gimple	stmt,
		tree	aggr_type,
		struct loop *	at_loop,
		tree	offset,
		tree *	initial_address,
		gimple_stmt_iterator *	gsi,
		gimple *	ptr_incr,
		bool	only_init,
		bool *	inv_p
	)

Function vect_create_data_ref_ptr.

Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first location accessed in the loop by STMT, along with the def-use update chain to appropriately advance the pointer through the loop iterations. Also set aliasing information for the pointer. This pointer is used by the callers to this function to create a memory reference expression for vector load/store access.

Input:

STMT: a stmt that references memory. Expected to be of the form GIMPLE_ASSIGN <name, data-ref> or GIMPLE_ASSIGN <data-ref, name>.
AGGR_TYPE: the type of the reference, which should be either a vector or an array.
AT_LOOP: the loop where the vector memref is to be created.
OFFSET (optional): an offset to be added to the initial address accessed by the data-ref in STMT.
BSI: location where the new stmts are to be placed if there is no loop
ONLY_INIT: indicate if ap is to be updated in the loop, or remain pointing to the initial address.

Output:

Declare a new ptr to vector_type, and have it point to the base of the data reference (initial addressed accessed by the data reference). For example, for vector of type V8HI, the following code is generated:

v8hi *ap; ap = (v8hi *)initial_address;

if OFFSET is not supplied: initial_address = &a[init]; if OFFSET is supplied: initial_address = &a[init + OFFSET];

Return the initial_address in INITIAL_ADDRESS.

If ONLY_INIT is true, just return the initial pointer. Otherwise, also update the pointer in each iteration of the loop.

Return the increment stmt that updates the pointer in PTR_INCR.

Set INV_P to true if the access pattern of the data reference in the vectorized loop is invariant. Set it to false otherwise.

Return the pointer.

 Check the step (evolution) of the load in LOOP, and record
 whether it's invariant.

 Create an expression for the first address accessed by this load
 in LOOP.

 (1) Create the new aggregate-pointer variable.
 Vector and array types inherit the alias set of their component
 type by default so we need to use a ref-all pointer if the data
 reference does not conflict with the created aggregated data
 reference because it is not addressable.

 Likewise for any of the data references in the stmt group.

 Note: If the dataref is in an inner-loop nested in LOOP, and we are
 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
 def-use update cycles for the pointer: one relative to the outer-loop
 (LOOP), which is what steps (3) and (4) below do.  The other is relative
 to the inner-loop (which is the inner-most loop containing the dataref),
 and this is done be step (5) below.

 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
 inner-most loop, and so steps (3),(4) work the same, and step (5) is
 redundant.  Steps (3),(4) create the following:

    vp0 = &base_addr;
    LOOP:   vp1 = phi(vp0,vp2)
            ...
            ...
            vp2 = vp1 + step
            goto LOOP

 If there is an inner-loop nested in loop, then step (5) will also be
 applied, and an additional update in the inner-loop will be created:

    vp0 = &base_addr;
    LOOP:   vp1 = phi(vp0,vp2)
            ...
    inner:     vp3 = phi(vp1,vp4)
               vp4 = vp3 + inner_step
               if () goto inner
            ...
            vp2 = vp1 + step
            if () goto LOOP

 (2) Calculate the initial address of the aggregate-pointer, and set
 the aggregate-pointer to point to it before the loop.

 Create: (&(base[init_val+offset]) in the loop preheader.

 Create: p = (aggr_type *) initial_base

     Copy the points-to information if it exists.

 (3) Handle the updating of the aggregate-pointer inside the loop.
 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
 inner-loop nested in LOOP (during outer-loop vectorization).

 No update in loop is required.

     The step of the aggregate pointer is the type size.

     One exception to the above is when the scalar step of the load in
     LOOP is zero. In this case the step here is also zero.

     Copy the points-to information if it exists.

 (4) Handle the updating of the aggregate-pointer inside the inner-loop
 nested in LOOP, if exists.

     Copy the points-to information if it exists.

Referenced by vect_permute_store_chain().

tree vect_create_destination_var	(	tree	,
		tree
	)

void vect_destroy_datarefs	(	loop_vec_info	,
		bb_vec_info
	)

void vect_detect_hybrid_slp ( loop_vec_info )

void vect_do_peeling_for_alignment	(	loop_vec_info	loop_vinfo,
		unsigned int	th,
		bool	check_profitability
	)

Function vect_do_peeling_for_alignment

Peel the first 'niters' iterations of the loop represented by LOOP_VINFO. 'niters' is set to the misalignment of one of the data references in the loop, thereby forcing it to refer to an aligned location at the beginning of the execution of this loop. The data reference for which we are peeling is recorded in LOOP_VINFO_UNALIGNED_DR.

 Peel the prolog loop and iterate it niters_of_prolog_loop.

 For vectorization factor N, we need to copy at most N-1 values 
 for alignment and this means N-2 loopback edge executions.

 Update number of times loop executes.

         Insert stmt on loop preheader edge.

 Update the init conditions of the access functions of all data refs.

 After peeling we have to reset scalar evolution analyzer.

void vect_do_peeling_for_loop_bound	(	loop_vec_info	loop_vinfo,
		tree *	ratio,
		unsigned int	th,
		bool	check_profitability
	)

Function vect_do_peeling_for_loop_bound

Peel the last iterations of the loop represented by LOOP_VINFO. The peeled iterations form a new epilog loop. Given that the loop now iterates NITERS times, the new epilog loop iterates NITERS % VECTORIZATION_FACTOR times.

The original loop will later be made to iterate NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).

COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated test.

 Generate the following variables on the preheader of original loop:

 ni_name = number of iteration the original loop executes
 ratio = ni_name / vf
 ratio_mult_vf_name = ratio * vf

 A guard that controls whether the new_loop is to be executed or skipped
 is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
 is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
 is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
 is on the path where the LOOP IVs are used and need to be updated.

 Update IVs of original loop as if they were advanced
 by ratio_mult_vf_name steps.

 For vectorization factor N, we need to copy last N-1 values in epilogue
 and this means N-2 loopback edge executions.

 PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
 will execute at least LOOP_VINFO_VECT_FACTOR times.

 After peeling we have to reset scalar evolution analyzer.

References BITS_PER_UNIT, build_int_cst(), create_tmp_var, DR_STEP, DR_STMT, dump_enabled_p(), dump_generic_expr(), dump_printf(), dump_printf_loc(), exact_log2(), fold_build2, fold_convert, force_gimple_operand(), gcc_assert, gsi_insert_seq_on_edge_immediate(), HOST_WIDE_INT, int_cst_value(), LOOP_PEELING_FOR_ALIGNMENT, loop_preheader_edge(), LOOP_VINFO_LOOP, LOOP_VINFO_UNALIGNED_DR, MSG_NOTE, NULL, NULL_TREE, offset, size_int, size_zero_node, STMT_VINFO_VECTYPE, TDF_SLIM, TREE_CODE, tree_int_cst_compare(), TREE_TYPE, TYPE_ALIGN, TYPE_SIZE_UNIT, TYPE_VECTOR_SUBPARTS, unsigned_type_for(), vect_create_addr_base_for_vector_ref(), vect_location, and vinfo_for_stmt().

bool vect_enhance_data_refs_alignment ( loop_vec_info )

void vect_finish_stmt_generation	(	gimple	stmt,
		gimple	vec_stmt,
		gimple_stmt_iterator *	gsi
	)

Function vect_finish_stmt_generation.

Insert a new stmt.

If we have an SSA vuse and insert a store, update virtual SSA form to avoid triggering the renamer. Do so only if we can easily see all uses - which is what almost always happens with the way vectorized stmts are inserted.

References gimple_call_internal_fn(), gimple_call_internal_p(), gimple_call_lhs(), gimple_call_num_args(), is_gimple_call(), NULL, NULL_TREE, stmt_can_throw_internal(), STMT_VINFO_BB_VINFO, STMT_VINFO_DEF_TYPE, STMT_VINFO_LOOP_VINFO, STMT_VINFO_RELEVANT_P, STMT_VINFO_VECTYPE, TREE_CODE, type(), vect_internal_def, vect_unknown_def_type, vinfo_for_stmt(), and vNULL.

gimple vect_force_simple_reduction	(	loop_vec_info	loop_info,
		gimple	phi,
		bool	check_reduction,
		bool *	double_reduc
	)

Wrapper around vect_is_simple_reduction_1, which will modify code in-place if it enables detection of more reductions. Arguments as there.

void vect_free_slp_instance ( slp_instance )

In tree-vect-slp.c.

tree vect_gen_perm_mask	(	tree	,
		unsigned char *
	)

int vect_get_known_peeling_cost	(	loop_vec_info	loop_vinfo,
		int	peel_iters_prologue,
		int *	peel_iters_epilogue,
		int	scalar_single_iter_cost,
		stmt_vector_for_cost *	prologue_cost_vec,
		stmt_vector_for_cost *	epilogue_cost_vec
	)

Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.

If peeled iterations are known but number of scalar loop iterations are unknown, count a taken branch per peeled loop.

     If we need to peel for gaps, but no peeling is required, we have to
     peel VF iterations.

Referenced by vect_estimate_min_profitable_iters().

void vect_get_load_cost	(	struct data_reference *	dr,
		int	ncopies,
		bool	add_realign_cost,
		unsigned int *	inside_cost,
		unsigned int *	prologue_cost,
		stmt_vector_for_cost *	prologue_cost_vec,
		stmt_vector_for_cost *	body_cost_vec,
		bool	record_prologue_costs
	)

Calculate cost of DR's memory access.

       Here, we assign an additional cost for the unaligned load.

       FIXME: If the misalignment remains fixed across the iterations of
       the containing loop, the following cost should be added to the
       prologue costs.

       Unaligned software pipeline has a load of an address, an initial
       load, and possibly a mask operation to "prime" the loop.  However,
       if this is an access in a group of loads, which provide grouped
       access, then the above cost should only be considered for one
       access in the group.  Inside the loop, there is a load op
       and a realignment op.

References targetm.

tree vect_get_new_vect_var	(	tree	,
		enum	vect_var_kind,
		const char *
	)

int vect_get_single_scalar_iteration_cost ( loop_vec_info )

void vect_get_slp_defs	(	vec< tree >	ops,
		slp_tree	slp_node,
		vec< vec< tree > > *	vec_oprnds,
		int	reduc_index
	)

Get vectorized definitions for SLP_NODE. If the scalar definitions are loop invariants or constants, collect them and call vect_get_constant_vectors() to create vector stmts. Otherwise, the def-stmts must be already vectorized and the vectorized stmts must be stored in the corresponding child of SLP_NODE, and we call vect_get_slp_vect_defs () to retrieve them.

     For each operand we check if it has vectorized definitions in a child
     node or we need to create them (for invariants and constants).  We
     check if the LHS of the first stmt of the next child matches OPRND.
     If it does, we found the correct child.  Otherwise, we call
     vect_get_constant_vectors (), and not advance CHILD_INDEX in order
     to check this child node for the next operand.

         We have to check both pattern and original def, if available.

             The number of vector defs is determined by the number of
             vector statements in the node from which we get those
             statements.

             Number of vector stmts was calculated according to LHS in
             vect_schedule_slp_instance (), fix it by replacing LHS with
             RHS, if necessary.  See vect_get_smallest_scalar_type () for
             details.

     Allocate memory for vectorized defs.

     For reduction defs we call vect_get_constant_vectors (), since we are
     looking for initial loop invariant values.

       The defs are already vectorized.

       Build vectors from scalar defs.

     For reductions, we only need initial values.

References dump_enabled_p(), dump_gimple_stmt(), dump_printf(), dump_printf_loc(), MSG_MISSED_OPTIMIZATION, TDF_SLIM, and vect_location.

tree vect_get_smallest_scalar_type	(	gimple	stmt,
		HOST_WIDE_INT *	lhs_size_unit,
		HOST_WIDE_INT *	rhs_size_unit
	)

Return the smallest scalar part of STMT. This is used to determine the vectype of the stmt. We generally set the vectype according to the type of the result (lhs). For stmts whose result-type is different than the type of the arguments (e.g., demotion, promotion), vectype will be reset appropriately (later). Note that we have to visit the smallest datatype in this function, because that determines the VF. If the smallest datatype in the loop is present only as the rhs of a promotion operation - we'd miss it. Such a case, where a variable of this datatype does not appear in the lhs anywhere in the loop, can only occur if it's an invariant: e.g.: 'int_x = (int) short_inv', which we'd expect to have been optimized away by invariant motion. However, we cannot rely on invariant motion to always take invariants out of the loop, and so in the case of promotion we also have to check the rhs. LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding types.

Referenced by vect_build_slp_tree_1().

static int vect_get_stmt_cost ( enum vect_cost_for_stmt type_of_cost )

inlinestatic

Get cost by calling cost target builtin.

Referenced by destroy_bb_vec_info().

void vect_get_store_cost	(	struct data_reference *	dr,
		int	ncopies,
		unsigned int *	inside_cost,
		stmt_vector_for_cost *	body_cost_vec
	)

Calculate cost of DR's memory access.

Here, we assign an additional cost for the unaligned store.

References dump_enabled_p(), dump_printf_loc(), exact_log2(), first_stmt(), GROUP_FIRST_ELEMENT, MSG_NOTE, PURE_SLP_STMT, record_stmt_cost(), STMT_VINFO_DATA_REF, STMT_VINFO_GROUPED_ACCESS, STMT_VINFO_STRIDE_LOAD_P, STMT_VINFO_VECTYPE, TYPE_VECTOR_SUBPARTS, vec_perm, vect_body, vect_cost_group_size(), vect_location, and vinfo_for_stmt().

tree vect_get_vec_def_for_operand	(	tree	,
		gimple	,
		tree *
	)

tree vect_get_vec_def_for_stmt_copy	(	enum	vect_def_type,
		tree
	)

void vect_get_vec_defs	(	tree	op0,
		tree	op1,
		gimple	stmt,
		vec< tree > *	vec_oprnds0,
		vec< tree > *	vec_oprnds1,
		slp_tree	slp_node,
		int	reduc_index
	)

Get vectorized definitions for OP0 and OP1. REDUC_INDEX is the index of reduction operand in case of reduction, and -1 otherwise.

Referenced by get_initial_def_for_reduction().

bool vect_grouped_load_supported	(	tree	,
		unsigned	HOST_WIDE_INT
	)

bool vect_grouped_store_supported	(	tree	,
		unsigned	HOST_WIDE_INT
	)

tree vect_init_vector	(	gimple	,
		tree	,
		tree	,
		gimple_stmt_iterator *
	)

bool vect_is_simple_use	(	tree	operand,
		gimple	stmt,
		loop_vec_info	loop_vinfo,
		bb_vec_info	bb_vinfo,
		gimple *	def_stmt,
		tree *	def,
		enum vect_def_type *	dt
	)

Function vect_is_simple_use.

Input: LOOP_VINFO - the vect info of the loop that is being vectorized. BB_VINFO - the vect info of the basic block that is being vectorized. OPERAND - operand of STMT in the loop or bb. DEF - the defining stmt in case OPERAND is an SSA_NAME.

Returns whether a stmt with OPERAND can be vectorized. For loops, supportable operands are constants, loop invariants, and operands that are defined by the current iteration of the loop. Unsupportable operands are those that are defined by a previous iteration of the loop (as is the case in reduction/induction computations). For basic blocks, supportable operands are constants and bb invariants. For now, operands defined outside the basic block are not supported.

Empty stmt is expected only in case of a function argument. (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN).

     FALLTHRU

bool vect_is_simple_use_1	(	tree	operand,
		gimple	stmt,
		loop_vec_info	loop_vinfo,
		bb_vec_info	bb_vinfo,
		gimple *	def_stmt,
		tree *	def,
		enum vect_def_type *	dt,
		tree *	vectype
	)

Function vect_is_simple_use_1.

Same as vect_is_simple_use_1 but also determines the vector operand type of OPERAND and stores it to *VECTYPE. If the definition of OPERAND is vect_uninitialized_def, vect_constant_def or vect_external_def *VECTYPE will be set to NULL_TREE and the caller is responsible to compute the best suited vector type for the scalar operand.

Now get a vector type if the def is internal, otherwise supply NULL_TREE and leave it up to the caller to figure out a proper type for the use stmt.

bool vect_load_lanes_supported	(	tree	,
		unsigned	HOST_WIDE_INT
	)

void vect_loop_versioning	(	loop_vec_info	loop_vinfo,
		unsigned int	th,
		bool	check_profitability
	)

Function vect_loop_versioning.

If the loop has data references that may or may not be aligned or/and has data reference relations whose independence was not proven then two versions of the loop need to be generated, one which is vectorized and one which isn't. A test is then generated to control which of the loops is executed. The test checks for the alignment of all of the data references that may or may not be aligned. An additional sequence of runtime tests is generated for each pairs of DDRs whose independence was not proven. The vectorized version of loop is executed only if both alias and alignment tests are passed.

The test generated to check which version of loop is executed is modified to also check for profitability as indicated by the cost model initially.

The versioning precondition(s) are placed in *COND_EXPR and *COND_EXPR_STMT_LIST.

 Loop versioning violates an assumption we try to maintain during
 vectorization - that the loop exit block has a single predecessor.
 After versioning, the exit block of both loop versions is the same
 basic block (i.e. it has two predecessors). Just in order to simplify
 following transformations in the vectorizer, we fix this situation
 here by adding a new (empty) block on the exit-edge of the loop,
 with the proper loop-exit phis to maintain loop-closed-form.

 Extract load statements on memrefs with zero-stride accesses.

     In the loop body, we iterate each statement to check if it is a load.
     Then we check the DR_STEP of the data reference.  If DR_STEP is zero,
     then we will hoist the load statement to the loop preheader.

                 We hoist a statement if all SSA uses in it are defined
                 outside of the loop.

 End loop-exit-fixes after versioning.

bool vect_make_slp_decision ( loop_vec_info )

bool vect_mark_stmts_to_be_vectorized ( loop_vec_info )

int vect_min_worthwhile_factor ( enum tree_code )

void vect_model_load_cost	(	stmt_vec_info	stmt_info,
		int	ncopies,
		bool	load_lanes_p,
		slp_tree	slp_node,
		stmt_vector_for_cost *	prologue_cost_vec,
		stmt_vector_for_cost *	body_cost_vec
	)

Function vect_model_load_cost

Models cost for loads. In the case of grouped accesses, the last access has the overhead of the grouped access attributed to it. Since unaligned accesses are supported for loads, we also account for the costs of the access scheme chosen.

 The SLP costs were already calculated during SLP tree build.

 Grouped accesses?

 Not a grouped access.

 We assume that the cost of a single load-lanes instruction is
 equivalent to the cost of GROUP_SIZE separate loads.  If a grouped
 access is instead being provided by a load-and-permute operation,
 include the cost of the permutes.

     Uses an even and odd extract operations for each needed permute.

 The loads themselves.

     N scalar loads plus gathering them into a vector.

void vect_model_simple_cost	(	stmt_vec_info	stmt_info,
		int	ncopies,
		enum vect_def_type *	dt,
		stmt_vector_for_cost *	prologue_cost_vec,
		stmt_vector_for_cost *	body_cost_vec
	)

Function vect_model_simple_cost.

Models cost for simple operations, i.e. those that only emit ncopies of a single op. Right now, this does not account for multiple insns that could be generated for the single vector op. We will handle that shortly.

 The SLP costs were already calculated during SLP tree build.

 FORNOW: Assuming maximum 2 args per stmts.

 Pass the inside-of-loop statements to the target-specific cost model.

References add_stmt_cost(), BB_VINFO_TARGET_COST_DATA, dump_enabled_p(), dump_printf_loc(), LOOP_VINFO_TARGET_COST_DATA, MSG_NOTE, PURE_SLP_STMT, STMT_VINFO_BB_VINFO, STMT_VINFO_LOOP_VINFO, STMT_VINFO_TYPE, type_promotion_vec_info_type, vec_promote_demote, vect_body, vect_constant_def, vect_external_def, vect_location, vect_pow2(), vect_prologue, and vector_stmt.

void vect_model_store_cost	(	stmt_vec_info	stmt_info,
		int	ncopies,
		bool	store_lanes_p,
		enum vect_def_type	dt,
		slp_tree	slp_node,
		stmt_vector_for_cost *	prologue_cost_vec,
		stmt_vector_for_cost *	body_cost_vec
	)

Function vect_model_store_cost

Models cost for stores. In the case of grouped accesses, one access has the overhead of the grouped access attributed to it.

 The SLP costs were already calculated during SLP tree build.

 Grouped access?

 Not a grouped access.

 We assume that the cost of a single store-lanes instruction is
 equivalent to the cost of GROUP_SIZE separate stores.  If a grouped
 access is instead being provided by a permute-and-store operation,
 include the cost of the permutes.

     Uses a high and low interleave operation for each needed permute.

 Costs of the stores.

References dump_enabled_p(), dump_printf_loc(), exact_log2(), MSG_NOTE, record_stmt_cost(), vec_perm, vect_body, and vect_location.

void vect_pattern_recog	(	loop_vec_info	,
		bb_vec_info
	)

void vect_permute_store_chain	(	vec< tree >	dr_chain,
		unsigned int	length,
		gimple	stmt,
		gimple_stmt_iterator *	gsi,
		vec< tree > *	result_chain
	)

Function vect_permute_store_chain.

Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be a power of 2, generate interleave_high/low stmts to reorder the data correctly for the stores. Return the final references for stores in RESULT_CHAIN.

E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8. The input is 4 vectors each containing 8 elements. We assign a number to each element, the input sequence is:

1st vec: 0 1 2 3 4 5 6 7 2nd vec: 8 9 10 11 12 13 14 15 3rd vec: 16 17 18 19 20 21 22 23 4th vec: 24 25 26 27 28 29 30 31

The output sequence should be:

1st vec: 0 8 16 24 1 9 17 25 2nd vec: 2 10 18 26 3 11 19 27 3rd vec: 4 12 20 28 5 13 21 30 4th vec: 6 14 22 30 7 15 23 31

i.e., we interleave the contents of the four vectors in their order.

We use interleave_high/low instructions to create such output. The input of each interleave_high/low operation is two vectors: 1st vec 2nd vec 0 1 2 3 4 5 6 7 the even elements of the result vector are obtained left-to-right from the high/low elements of the first vector. The odd elements of the result are obtained left-to-right from the high/low elements of the second vector. The output of interleave_high will be: 0 4 1 5 and of interleave_low: 2 6 3 7

The permutation is done in log LENGTH stages. In each stage interleave_high and interleave_low stmts are created for each pair of vectors in DR_CHAIN, where the first argument is taken from the first half of DR_CHAIN and the second argument from it's second half. In our example,

I1: interleave_high (1st vec, 3rd vec) I2: interleave_low (1st vec, 3rd vec) I3: interleave_high (2nd vec, 4th vec) I4: interleave_low (2nd vec, 4th vec)

The output for the first stage is:

I1: 0 16 1 17 2 18 3 19 I2: 4 20 5 21 6 22 7 23 I3: 8 24 9 25 10 26 11 27 I4: 12 28 13 29 14 30 15 31

The output of the second stage, i.e. the final result is:

I1: 0 8 16 24 1 9 17 25 I2: 2 10 18 26 3 11 19 27 I3: 4 12 20 28 5 13 21 30 I4: 6 14 22 30 7 15 23 31.

Create interleaving stmt: high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}>

         Create interleaving stmt:
         low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
                                             nelt*3/2+1, ...}>

References build2, build_int_cst(), copy_ssa_name(), DR_REF, gcc_assert, gimple_assign_lhs(), gimple_assign_set_lhs(), gimple_build_assign, gimple_build_assign_with_ops(), gsi_insert_before(), gsi_insert_on_edge_immediate(), GSI_SAME_STMT, HOST_WIDE_INT, make_ssa_name(), NULL, NULL_TREE, reference_alias_ptr_type(), TREE_TYPE, TYPE_ALIGN_UNIT, vect_create_data_ref_ptr(), and vect_create_destination_var().

static int vect_pow2 ( )

inlinestatic

Return pow2 (X).

Referenced by vect_model_simple_cost().

bool vect_prune_runtime_alias_test_list ( loop_vec_info )

void vect_record_grouped_load_vectors	(	gimple	,
		vec< tree >
	)

void vect_remove_stores ( gimple )

bool vect_schedule_slp	(	loop_vec_info	,
		bb_vec_info
	)

tree vect_setup_realignment	(	gimple	stmt,
		gimple_stmt_iterator *	gsi,
		tree *	realignment_token,
		enum dr_alignment_support	alignment_support_scheme,
		tree	init_addr,
		struct loop **	at_loop
	)

Function vect_setup_realignment

This function is called when vectorizing an unaligned load using the dr_explicit_realign[_optimized] scheme. This function generates the following code at the loop prolog:

p = initial_addr; x msq_init = *(floor(p)); # prolog load realignment_token = call target_builtin; loop: x msq = phi (msq_init, —)

The stmts marked with x are generated only for the case of dr_explicit_realign_optimized.

The code above sets up a new (vector) pointer, pointing to the first location accessed by STMT, and a "floor-aligned" load using that pointer. It also generates code to compute the "realignment-token" (if the relevant target hook was defined), and creates a phi-node at the loop-header bb whose arguments are the result of the prolog-load (created by this function) and the result of a load that takes place in the loop (to be created by the caller to this function).

For the case of dr_explicit_realign_optimized: The caller to this function uses the phi-result (msq) to create the realignment code inside the loop, and sets up the missing phi argument, as follows: loop: msq = phi (msq_init, lsq) lsq = *(floor(p')); # load in loop result = realign_load (msq, lsq, realignment_token);

For the case of dr_explicit_realign: loop: msq = *(floor(p)); # load in loop p' = p + (VS-1); lsq = *(floor(p')); # load in loop result = realign_load (msq, lsq, realignment_token);

Input: STMT - (scalar) load stmt to be vectorized. This load accesses a memory location that may be unaligned. BSI - place where new code is to be inserted. ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes is used.

Output: REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load target hook, if defined. Return value - the result of the loop-header phi node.

 We need to generate three things:
 1. the misalignment computation
 2. the extra vector load (for the optimized realignment scheme).
 3. the phi node for the two vectors from which the realignment is
  done (for the optimized realignment scheme).

 1. Determine where to generate the misalignment computation.

 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
 calculation will be generated by this function, outside the loop (in the
 preheader).  Otherwise, INIT_ADDR had already been computed for us by the
 caller, inside the loop.

 Background: If the misalignment remains fixed throughout the iterations of
 the loop, then both realignment schemes are applicable, and also the
 misalignment computation can be done outside LOOP.  This is because we are
 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
 are a multiple of VS (the Vector Size), and therefore the misalignment in
 different vectorized LOOP iterations is always the same.
 The problem arises only if the memory access is in an inner-loop nested
 inside LOOP, which is now being vectorized using outer-loop vectorization.
 This is the only case when the misalignment of the memory access may not
 remain fixed throughout the iterations of the inner-loop (as explained in
 detail in vect_supportable_dr_alignment).  In this case, not only is the
 optimized realignment scheme not applicable, but also the misalignment
 computation (and generation of the realignment token that is passed to
 REALIGN_LOAD) have to be done inside the loop.

 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
 or not, which in turn determines if the misalignment is computed inside
 the inner-loop, or outside LOOP.

 2. Determine where to generate the extra vector load.

 For the optimized realignment scheme, instead of generating two vector
 loads in each iteration, we generate a single extra vector load in the
 preheader of the loop, and in each iteration reuse the result of the
 vector load from the previous iteration.  In case the memory access is in
 an inner-loop nested inside LOOP, which is now being vectorized using
 outer-loop vectorization, we need to determine whether this initial vector
 load should be generated at the preheader of the inner-loop, or can be
 generated at the preheader of LOOP.  If the memory access has no evolution
 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
 to be generated inside LOOP (in the preheader of the inner-loop).

 3. For the case of the optimized realignment, create the first vector
  load at the loop preheader.

     Create msq_init = *(floor(p1)) in the loop preheader

 4. Create realignment token using a target builtin, if available.
  It is done either inside the containing loop, or before LOOP (as
  determined above).

     Compute INIT_ADDR - the initial addressed accessed by this memref.

         Generate the INIT_ADDR computation outside LOOP.

         Generate the misalignment computation outside LOOP.

     The result of the CALL_EXPR to this builtin is determined from
     the value of the parameter and no global variables are touched
     which makes the builtin a "const" function.  Requiring the
     builtin to have the "const" attribute makes it unnecessary
     to call mark_call_clobbered.

 5. Create msq = phi <msq_init, lsq> in loop

References dump_enabled_p(), dump_printf_loc(), MSG_MISSED_OPTIMIZATION, and vect_location.

bb_vec_info vect_slp_analyze_bb ( basic_block )

bool vect_slp_analyze_data_ref_dependences ( bb_vec_info )

void vect_slp_transform_bb ( basic_block )

bool vect_store_lanes_supported	(	tree	,
		unsigned	HOST_WIDE_INT
	)

enum dr_alignment_support vect_supportable_dr_alignment	(	struct data_reference *	dr,
		bool	check_aligned_accesses
	)

Return whether the data reference DR is supported with respect to its alignment. If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even it is aligned, i.e., check if it is possible to vectorize it with different alignment.

 Possibly unaligned access.   


 We can choose between using the implicit realignment scheme (generating
 a misaligned_move stmt) and the explicit realignment scheme (generating
 aligned loads with a REALIGN_LOAD).  There are two variants to the
 explicit realignment scheme: optimized, and unoptimized.
 We can optimize the realignment only if the step between consecutive
 vector loads is equal to the vector size.  Since the vector memory
 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
 is guaranteed that the misalignment amount remains the same throughout the
 execution of the vectorized loop.  Therefore, we can create the
 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
 at the loop preheader.

 However, in the case of outer-loop vectorization, when vectorizing a
 memory access in the inner-loop nested within the LOOP that is now being
 vectorized, while it is guaranteed that the misalignment of the
 vectorized memory access will remain the same in different outer-loop
 iterations, it is *not* guaranteed that is will remain the same throughout
 the execution of the inner-loop.  This is because the inner-loop advances
 with the original scalar step (and not in steps of VS).  If the inner-loop
 step happens to be a multiple of VS, then the misalignment remains fixed
 and we can use the optimized realignment scheme.  For example:

  for (i=0; i<N; i++)
    for (j=0; j<M; j++)
      s += a[i+j];

 When vectorizing the i-loop in the above example, the step between
 consecutive vector loads is 1, and so the misalignment does not remain
 fixed across the execution of the inner-loop, and the realignment cannot
 be optimized (as illustrated in the following pseudo vectorized loop):

  for (i=0; i<N; i+=4)
    for (j=0; j<M; j++){
      vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}

when j is {0,1,2,3,4,5,6,7,...} respectively. (assuming that we start from an aligned address). }

We therefore have to use the unoptimized realignment scheme:

for (i=0; i<N; i+=4) for (j=k; j<M; j+=4) vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming that the misalignment of the initial address is 0).

 The loop can then be vectorized as follows:

  for (k=0; k<4; k++){
    rt = get_realignment_token (&vp[k]);
    for (i=0; i<N; i+=4){
      v1 = vp[i+k];
      for (j=k; j<M; j+=4){
        v2 = vp[i+j+VS-1];
        va = REALIGN_LOAD <v1,v2,rt>;
        vs += va;
        v1 = v2;
      }
    }
}

       Can't software pipeline the loads, but can at least do them.

 Unsupported.

Referenced by vect_build_slp_tree_1(), and vect_update_misalignment_for_peel().

bool vect_supportable_shift	(	enum	tree_code,
		tree
	)

void vect_transform_grouped_load	(	gimple	stmt,
		vec< tree >	dr_chain,
		int	size,
		gimple_stmt_iterator *	gsi
	)

Function vect_transform_grouped_load.

Given a chain of input interleaved data-refs (in DR_CHAIN), build statements to perform their permutation and ascribe the result vectorized statements to the scalar statements.

DR_CHAIN contains input data-refs that are a part of the interleaving. RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted vectors, that are ready for vector computation.

References targetm.

void vect_transform_loop ( loop_vec_info )

Drive for loop transformation stage.

bool vect_transform_slp_perm_load	(	slp_tree	node,
		vec< tree >	dr_chain,
		gimple_stmt_iterator *	gsi,
		int	vf,
		slp_instance	slp_node_instance,
		bool	analyze_only
	)

Generate vector permute statements from a list of loads in DR_CHAIN. If ANALYZE_ONLY is TRUE, only check that it is possible to create valid permute statements for the SLP node NODE of the SLP instance SLP_NODE_INSTANCE.

 The generic VEC_PERM_EXPR code always uses an integral type of the
 same size as the vector element being permuted.

 The number of vector stmts to generate based only on SLP_NODE_INSTANCE
 unrolling factor.

 Number of copies is determined by the final vectorization factor
 relatively to SLP_NODE_INSTANCE unrolling factor.

 Generate permutation masks for every NODE. Number of masks for each NODE
 is equal to GROUP_SIZE.
 E.g., we have a group of three nodes with three loads from the same
 location in each node, and the vector size is 4. I.e., we have a
 a0b0c0a1b1c1... sequence and we need to create the following vectors:
 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
 ...

 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
 The last mask is illegal since we assume two operands for permute
 operation, and the mask element values can't be outside that range.
 Hence, the last mask must be converted into {2,5,5,5}.
 For the first two permutations we need the first and the second input
 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
 we need the second and the third vectors: {b1,c1,a2,b2} and
 {c2,a3,b3,c3}.

References SLP_TREE_NUMBER_OF_VEC_STMTS, and SLP_TREE_VEC_STMTS.

bool vect_transform_stmt	(	gimple	stmt,
		gimple_stmt_iterator *	gsi,
		bool *	grouped_store,
		slp_tree	slp_node,
		slp_instance	slp_node_instance
	)

Function vect_transform_stmt.

Create a vectorized stmt to replace STMT, and insert it at BSI.

    In case of interleaving, the whole chain is vectorized when the
    last store in the chain is reached.  Store stmts before the last
    one are skipped, and there vec_stmt_info shouldn't be freed
    meanwhile.

 Handle inner-loop stmts whose DEF is used in the loop-nest that
 is being vectorized, but outside the immediately enclosing loop.   


     Find the relevant loop-exit phi-node, and reord the vec_stmt there
    (to be used when vectorizing outer-loop stmts that use the DEF of
    STMT).

 Handle stmts whose DEF is used outside the loop-nest that is
 being vectorized.

void vect_update_slp_costs_according_to_vf ( loop_vec_info )

bool vect_verify_datarefs_alignment	(	loop_vec_info	,
		bb_vec_info
	)

bool vectorizable_condition	(	gimple	stmt,
		gimple_stmt_iterator *	gsi,
		gimple *	vec_stmt,
		tree	reduc_def,
		int	reduc_index,
		slp_tree	slp_node
	)

vectorizable_condition.

Check if STMT is conditional modify expression that can be vectorized. If VEC_STMT is also passed, vectorize the STMT: create a vectorized stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it at GSI.

When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in else caluse if it is 2).

Return FALSE if not a vectorizable STMT, TRUE otherwise.

 FORNOW: not yet supported.

 Is vectorizable conditional operation?

 The result of a vector comparison should be signed type.

 Transform.

 Handle def.

 Handle cond expr.

     Arguments are ready.  Create the new vector stmt.

Referenced by vect_analyze_stmt().

tree vectorizable_function	(	gimple	,
		tree	,
		tree
	)

bool vectorizable_induction	(	gimple	phi,
		gimple_stmt_iterator *	gsi,
		gimple *	vec_stmt
	)

Function vectorizable_induction

Check if PHI performs an induction computation that can be vectorized. If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized phi to replace it, put it in VEC_STMT, and add it to the same basic block. Return FALSE if not a vectorizable STMT, TRUE otherwise.

 FORNOW. These restrictions should be relaxed.

 FORNOW: SLP not supported.

 Transform.

Referenced by vect_analyze_stmt().

bool vectorizable_live_operation	(	gimple	stmt,
		gimple_stmt_iterator *	gsi,
		gimple *	vec_stmt
	)

Function vectorizable_live_operation.

STMT computes a value that is used outside the loop. Check if it can be supported.

 FORNOW. CHECKME.

 FORNOW: support only if all uses are invariant.  This means
 that the scalar operations can remain in place, unvectorized.
 The original last scalar value that they compute will be used.

 No transformation is required for the cases we currently support.

References dump_gimple_stmt(), dump_printf(), dump_printf_loc(), MSG_NOTE, TDF_SLIM, and vect_location.

bool vectorizable_reduction	(	gimple	stmt,
		gimple_stmt_iterator *	gsi,
		gimple *	vec_stmt,
		slp_tree	slp_node
	)

Function vectorizable_reduction.

Check if STMT performs a reduction operation that can be vectorized. If VEC_STMT is also passed, vectorize the STMT: create a vectorized stmt to replace it, put it in VEC_STMT, and insert it at GSI. Return FALSE if not a vectorizable STMT, TRUE otherwise.

This function also handles reduction idioms (patterns) that have been recognized in advance during vect_pattern_recog. In this case, STMT may be of this form: X = pattern_expr (arg0, arg1, ..., X) and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original sequence that had been detected and replaced by the pattern-stmt (STMT).

In some cases of reduction patterns, the type of the reduction variable X is different than the type of the other arguments of STMT. In such cases, the vectype that is used when transforming STMT into a vector stmt is different than the vectype that is used to determine the vectorization factor, because it consists of a different number of elements than the actual number of elements that are being operated upon in parallel.

For example, consider an accumulation of shorts into an int accumulator. On some targets it's possible to vectorize this pattern operating on 8 shorts at a time (hence, the vectype for purposes of determining the vectorization factor should be V8HI); on the other hand, the vectype that is used to create the vector form is actually V4SI (the type of the result).

Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that indicates what is the actual level of parallelism (V8HI in the example), so that the right vectorization factor would be derived. This vectype corresponds to the type of arguments to the reduction stmt, and should NOT be used to create the vectorized stmt. The right vectype for the vectorized stmt is obtained from the type of the result X: get_vectype_for_scalar_type (TREE_TYPE (X))

This means that, contrary to "regular" reductions (or "regular" stmts in general), the following equation: STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X)) does NOT necessarily hold for reduction patterns.

 The default is that the reduction variable is the last in statement.

 In case of reduction chain we switch to the first stmt in the chain, but
 we don't update STMT_INFO, since only the last stmt is marked as reduction
 and has reduction properties.

 1. Is vectorizable reduction?

 Not supportable if the reduction variable is used in the loop, unless
 it's a reduction chain.

 Reductions that are not used even in an enclosing outer-loop,
 are expected to be "live" (used out of the loop).

 Make sure it was already recognized as a reduction computation.

 2. Has this been recognized as a reduction pattern?

 Check if STMT represents a pattern that has been recognized
 in earlier analysis stages.  For stmts that represent a pattern,
 the STMT_VINFO_RELATED_STMT field records the last stmt in
 the original sequence that constitutes the pattern.

 3. Check the operands of the operation.  The first operands are defined
    inside the loop body. The last operand is the reduction variable,
    which is defined by the loop-header-phi.

 Flatten RHS.

 Do not try to vectorize bit-precision reductions.

 All uses but the last are expected to be defined in the loop.
 The last use is the reduction variable.  In case of nested cycle this
 assumption is not true: we use reduc_index to record the index of the
 reduction variable.

     The condition of COND_EXPR is checked in vectorizable_condition().

     For pattern recognized stmts, orig_stmt might be a reduction,
     but some helper statements for the pattern might not, or
     might be COND_EXPRs with reduction uses in the condition.

     We changed STMT to be the first stmt in reduction chain, hence we
     check that in this case the first element in the chain is STMT.

     4. Supportable by target?

         Shifts and rotates are only supported by vectorizable_shifts,
         not vectorizable_reduction.

     4.1. check support for the operation in the loop

     Worthwhile without SIMD support?

 4.2. Check support for the epilog operation.

      If STMT represents a reduction pattern, then the type of the
      reduction variable may be different than the type of the rest
      of the arguments.  For example, consider the case of accumulation
      of shorts into an int accumulator; The original code:
                    S1: int_a = (int) short_a;
      orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;

      was replaced with:
                    STMT: int_acc = widen_sum <short_a, int_acc>

      This means that:
      1. The tree-code that is used to create the vector operation in the
         epilog code (that reduces the partial results) is not the
         tree-code of STMT, but is rather the tree-code of the original
         stmt from the pattern that STMT is replacing.  I.e, in the example
         above we want to use 'widen_sum' in the loop, but 'plus' in the
         epilog.
      2. The type (mode) we use to check available target support
         for the vector operation to be created in the *epilog*, is
         determined by the type of the reduction variable (in the example
         above we'd check this: optab_handler (plus_optab, vect_int_mode])).
         However the type (mode) we use to check available target support
         for the vector operation to be created *inside the loop*, is
         determined by the type of the other arguments to STMT (in the
         example we'd check this: optab_handler (widen_sum_optab,
         vect_short_mode)).

      This is contrary to "regular" reductions, in which the types of all
      the arguments are the same as the type of the reduction variable.
      For "regular" reductions we can therefore use the same vector type
      (and also the same tree-code) when generating the epilog code and
      when generating the code inside the loop.

     This is a reduction pattern: get the vectype from the type of the
     reduction variable, and get the tree-code from orig_stmt.

     Regular reduction: use the same vectype and tree-code as used for
     the vector code inside the loop can be used for the epilog code.

 In case of widenning multiplication by a constant, we update the type
 of the constant to be the type of the other operand.  We check that the
 constant fits the type in the pattern recognition pass.

 Transform.

 FORNOW: Multiple types are not supported for condition.

 Create the destination vector

 In case the vectorization factor (VF) is bigger than the number
 of elements that we can fit in a vectype (nunits), we have to generate
 more than one vector stmt - i.e - we need to "unroll" the
 vector stmt by a factor VF/nunits.  For more details see documentation
 in vectorizable_operation.

 If the reduction is used in an outer loop we need to generate
 VF intermediate results, like so (e.g. for ncopies=2):
    r0 = phi (init, r0)
    r1 = phi (init, r1)
    r0 = x0 + r0;
    r1 = x1 + r1;
(i.e. we generate VF results in 2 registers).
In this case we have a separate def-use cycle for each copy, and therefore
for each copy we get the vector def for the reduction variable from the
respective phi node created for this copy.

Otherwise (the reduction is unused in the loop nest), we can combine
together intermediate results, like so (e.g. for ncopies=2):
    r = phi (init, r)
    r = x0 + r;
    r = x1 + r;

(i.e. we generate VF/2 results in a single register). In this case for each copy we get the vector def for the reduction variable from the vectorized reduction operation generated in the previous iteration.

             Create the reduction-phi that defines the reduction
             operand.

         Multiple types are not supported for condition.

     Handle uses.

 Finalize the reduction-phi (set its arguments) and create the
 epilog reduction code.

Referenced by vect_analyze_stmt().

unsigned vectorize_loops ( void )

In tree-vectorizer.c.

Function vectorize_loops.

Entry point to loop vectorization phase.

 Bail out if there are no loops.

  &mdash;&mdash;&mdash;&ndash; Analyze loops. &mdash;&mdash;&mdash;&ndash;

 If some loop was duplicated, it gets bigger number
 than all previously defined loops.  This fact allows us to run
 only over initial loops skipping newly generated ones.

       Now that the loop has been vectorized, allow it to be unrolled
       etc.

  &mdash;&mdash;&mdash;&ndash; Finalize. &mdash;&mdash;&mdash;&ndash;

 Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE} builtins.

 Shrink any "omp array simd" temporary arrays to the
 actual vectorization factors.

     If we vectorized any loop only virtual SSA form needs to be updated.
     ???  Also while we try hard to update loop-closed SSA form we fail
     to properly do this in some corner-cases (see PR56286).

References adjust_simduid_builtins(), and cfun.

Referenced by make_pass_tree_loop_init().

static stmt_vec_info vinfo_for_stmt ( )

inlinestatic

Return a stmt_vec_info corresponding to STMT.

Referenced by check_bool_pattern(), destroy_bb_vec_info(), get_initial_def_for_reduction(), new_loop_vec_info(), vect_analyze_data_ref_dependences(), vect_analyze_slp(), vect_build_slp_tree_1(), vect_can_advance_ivs_p(), vect_create_cond_for_alias_checks(), vect_create_epilog_for_reduction(), vect_create_vectorized_demotion_stmts(), vect_do_peeling_for_loop_bound(), vect_estimate_min_profitable_iters(), vect_find_same_alignment_drs(), vect_finish_stmt_generation(), vect_gen_widened_results_half(), vect_get_new_vect_var(), vect_get_place_in_interleaving_chain(), vect_get_store_cost(), vect_grouped_store_supported(), vect_loop_kill_debug_uses(), vect_mark_relevant(), vect_pattern_recog_1(), vect_peeling_hash_choose_best_peeling(), vect_peeling_hash_insert(), vect_print_slp_tree(), vect_recog_temp_ssa_var(), vect_slp_analyze_data_ref_dependence(), vect_transform_loop(), and vect_update_misalignment_for_peel().

Variable Documentation

unsigned int current_vector_size

In tree-vect-stmts.c.

vec<vec_void_p> stmt_vec_info_vec

Vector mapping GIMPLE stmt to stmt_vec_info.

LOC vect_location

Source location

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see http://www.gnu.org/licenses/. Loop and basic block vectorizer.

This file contains drivers for the three vectorizers: (1) loop vectorizer (inter-iteration parallelism), (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop vectorizer) (3) BB vectorizer (out-of-loops), aka SLP

The rest of the vectorizer's code is organized as follows:

tree-vect-loop.c - loop specific parts such as reductions, etc. These are used by drivers (1) and (2).
tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by drivers (1) and (2).
tree-vect-slp.c - BB vectorization specific analysis and transformation, used by drivers (2) and (3).
tree-vect-stmts.c - statements analysis and transformation (used by all).
tree-vect-data-refs.c - vectorizer specific data-refs analysis and manipulations (used by all).
tree-vect-patterns.c - vectorizable code patterns detector (used by all)

Here's a poor attempt at illustrating that:

tree-vectorizer.c: loop_vect() loop_aware_slp() slp_vect() | / \ / | / \ / tree-vect-loop.c tree-vect-slp.c | \ \ / / | | \ \/ / | | \ /\ / | | \ / \ / | tree-vect-stmts.c tree-vect-data-refs.c \ / tree-vect-patterns.c Loop or bb location.

Referenced by dr_group_sort_cmp(), get_initial_def_for_reduction(), perm_mask_for_reverse(), vect_analyze_data_ref_accesses(), vect_analyze_loop_1(), vect_analyze_scalar_cycles(), vect_analyze_stmt(), vect_build_slp_tree_1(), vect_can_advance_ivs_p(), vect_compute_data_ref_alignment(), vect_create_cond_for_alias_checks(), vect_create_vectorized_promotion_stmts(), vect_do_peeling_for_loop_bound(), vect_find_same_alignment_drs(), vect_gen_perm_mask(), vect_get_mask_element(), vect_get_slp_defs(), vect_get_store_cost(), vect_get_vec_def_for_operand(), vect_loop_kill_debug_uses(), vect_mark_for_runtime_alias_test(), vect_mark_relevant(), vect_mark_slp_stmts_relevant(), vect_min_worthwhile_factor(), vect_model_reduction_cost(), vect_model_simple_cost(), vect_model_store_cost(), vect_pattern_recog_1(), vect_peeling_hash_choose_best_peeling(), vect_setup_realignment(), vect_slp_analyze_data_ref_dependence(), vect_stmt_relevant_p(), vect_transform_loop(), vect_update_ivs_after_vectorizer(), vect_update_misalignment_for_peel(), and vectorizable_live_operation().

Data Structures

Macros

Typedefs

Enumerations

Functions

Variables

Macro Definition Documentation

Typedef Documentation

Enumeration Type Documentation

Function Documentation

Variable Documentation