foo.c

Line Hotness Pass Source Function / Inlining Chain
1
// Taken from Adam Nemet's November 2016 LLVM talk
2
3
#if 1
4
void accumulate (int x, int *a)
5
{
6
  *a += x;
567644000 slp
     ^=== vect_slp_analyze_bb ===
     
  • accumulate
  • inlined from compute_sum at foo.c:16:5
567644000 slp
     ^=== vect_slp_analyze_bb ===
     
  • accumulate
  • inlined from compute_sum at foo.c:16:5
567644000 slp
     ^=== vect_slp_analyze_bb ===
       === vect_analyze_data_refs ===
       not vectorized: not enough data-refs in basic block
     
  • accumulate
  • inlined from compute_sum at foo.c:16:5
567644000 slp
     ^=== vect_slp_analyze_bb ===
       === vect_analyze_data_refs ===
         got vectype for stmt: _3 = *_2;vector(4) int
       not vectorized: not enough data-refs in basic block
     
  • accumulate
  • inlined from compute_sum at foo.c:16:5
7
}
8
#else
9
extern void accumulate (int x, int *a);
10
#endif
11
12
int compute_sum (int arr[], int n)
13
{
14
  int sum = 0;
15
  for (int i = 0; i < n; ++i)
955630000 vect

                        
^=== analyzing loop === === analyze_loop_nest === === vect_analyze_loop_form === === get_loop_niters === symbolic number of iterations is (unsigned int) n_8(D) === vect_analyze_data_refs === got vectype for stmt: _4 = *_3;vector(4) int === vect_analyze_scalar_cycles === Analyze phi: i_17 = PHI <0(5), i_10(6)> Access function of PHI: {0, +, 1}_1 step: 1, init: 0 Detected induction Analyze phi: sum_20 = PHI <0(5), _12(6)> Access function of PHI: {0, +, _4}_1 step: _4, init: 0 step unknown Analyze phi: sum_20 = PHI <0(5), _12(6)> detected reduction: _12 = _4 + sum_20; Detected reduction === vect_pattern_recog === vect_is_simple_use: operand _1 def_stmt: _1 = (long unsigned int) i_17; type of def: internal vect_is_simple_use: operand i_17 def_stmt: i_17 = PHI <0(5), i_10(6)> type of def: induction vect_is_simple_use: operand 4 vect_is_simple_use: operand _4 def_stmt: _4 = *_3; type of def: internal vect_is_simple_use: operand _4 def_stmt: _4 = *_3; type of def: internal vect_is_simple_use: operand _4 def_stmt: _4 = *_3; type of def: internal === vect_analyze_data_ref_accesses === === vect_mark_stmts_to_be_vectorized === vect_is_simple_use: operand _4 def_stmt: _4 = *_3; type of def: internal vect_is_simple_use: operand _4 def_stmt: _4 = *_3; type of def: internal vect_is_simple_use: operand sum_20 def_stmt: sum_20 = PHI <0(5), _12(6)> type of def: reduction vect_is_simple_use: operand 0 vect_is_simple_use: operand _12 def_stmt: _12 = _4 + sum_20; type of def: reduction === vect_analyze_data_ref_dependences === === vect_determine_vectorization_factor === ==> examining phi: i_17 = PHI <0(5), i_10(6)> ==> examining phi: sum_20 = PHI <0(5), _12(6)> get vectype for scalar type: int vectype: vector(4) int nunits = FIXME: poly_int ==> examining statement: _1 = (long unsigned int) i_17; skip ==> examining statement: _2 = _1 * 4; skip ==> examining statement: _3 = arr_9(D) + _2; skip ==> examining statement: _4 = *_3; get vectype for scalar type: int vectype: vector(4) int nunits = FIXME: poly_int ==> examining statement: _12 = _4 + sum_20; get vectype for scalar type: int vectype: vector(4) int get vectype for scalar type: int vectype: vector(4) int nunits = FIXME: poly_int ==> examining statement: i_10 = i_17 + 1; skip ==> examining statement: if (n_8(D) > i_10) skip vectorization factor = FIXME: poly_int === vect_analyze_slp === === vect_make_slp_decision === === vect_analyze_data_refs_alignment === recording new base alignment for arr_9(D) alignment: 4 misalignment: 0 based on: _4 = *_3; vect_compute_data_ref_alignment: can't force alignment of ref: *_3 === vect_prune_runtime_alias_test_list === === vect_enhance_data_refs_alignment === Unknown misalignment, naturally aligned Analyze phi: i_17 = PHI <0(5), i_10(6)> Analyze phi: sum_20 = PHI <0(5), _12(6)> reduc or virtual phi. skip vect_get_data_access_cost: inside_cost = 12, outside_cost = 0. cost model: epilogue peel iters set to vf/2 because loop iterations are unknown vect_get_data_access_cost: inside_cost = 12, outside_cost = 0. cost model: epilogue peel iters set to vf/2 because loop iterations are unknown Vectorizing an unaligned access === vect_analyze_loop_operations === examining phi: i_17 = PHI <0(5), i_10(6)> examining phi: sum_20 = PHI <0(5), _12(6)> vect_is_simple_use: operand *_3 not ssa-name use not simple vect_is_simple_use: operand *_3 not ssa-name use not simple can't use a fully-masked loop because the target doesn't have the appropriate masked load or store. vect_is_simple_use: operand _4 def_stmt: _4 = *_3; type of def: internal vect_is_simple_use: operand sum_20 def_stmt: sum_20 = PHI <0(5), _12(6)> type of def: reduction reduc op not supported by target not using a fully-masked loop cost model: epilogue peel iters set to vf/2 because loop iterations are unknown Cost model analysis: Vector inside of loop cost: 16 Vector prologue cost: 36 Vector epilogue cost: 52 Scalar iteration cost: 16 Scalar outside cost: 32 Vector outside cost: 88 prologue iterations: 0 epilogue iterations: 2 Runtime profitability threshold = 5 Static estimate profitability threshold = 9 epilog loop required Analyze phi: i_17 = PHI <0(5), i_10(6)> Analyze phi: sum_20 = PHI <0(5), _12(6)> reduc or virtual phi. skip loop vectorized === vec_transform_loop === Profitability threshold is 5 loop iterations Analyze phi: i_17 = PHI <i_10(6), 0(9)> Analyze phi: sum_20 = PHI <_12(6), 0(9)> reduc or virtual phi. skip vect_update_ivs_after_vectorizer: phi: i_17 = PHI <i_10(6), 0(9)> vect_update_ivs_after_vectorizer: phi: sum_20 = PHI <_12(6), 0(9)> reduc or virtual phi. skip. ------>vectorizing phi: i_17 = PHI <i_10(6), 0(16)> ------>vectorizing phi: sum_20 = PHI <_12(6), 0(16)> transform phi ------>vectorizing phi: vect__12.4_33 = PHI <(6), (16)> ------>vectorizing statement: _1 = (long unsigned int) i_17; ------>vectorizing statement: _2 = _1 * 4; ------>vectorizing statement: _3 = arr_9(D) + _2; ------>vectorizing statement: _4 = *_3; transform statement create vector_type-pointer variable to type: vector(4) int vectorizing a pointer ref: *arr_9(D) created arr_9(D) ------>vectorizing statement: _12 = _4 + sum_20; transform statement vect_is_simple_use: operand _4 def_stmt: _4 = *_3; type of def: internal vect_is_simple_use: operand sum_20 def_stmt: sum_20 = PHI <_12(6), 0(16)> type of def: reduction reduc op not supported by target transform reduction vect_get_vec_def_for_operand: _4 vect_is_simple_use: operand _4 def_stmt: _4 = *_3; type of def: internal def_stmt = _4 = *_3; vect_get_vec_def_for_operand: sum_20 vect_is_simple_use: operand sum_20 def_stmt: sum_20 = PHI <_12(6), 0(16)> type of def: reduction def_stmt = sum_20 = PHI <_12(6), 0(16)> vect_is_simple_use: operand 0 transform reduction: created def-use cycle: vect__12.4_33 = PHI <vect__12.8_37(6), { 0, 0, 0, 0 }(16)>vect__12.8_37 = vect__4.7_36 + vect__12.4_33; Reduce using vector shifts extract scalar result ------>vectorizing statement: i_10 = i_17 + 1; ------>vectorizing statement: vectp_arr.5_35 = vectp_arr.5_34 + 16; ------>vectorizing statement: if (n_8(D) > i_10) New loop exit condition: if (ivtmp_46 < bnd.1_29) LOOP VECTORIZED
  • compute_sum
57432000 cunroll
  ^loop with 3 iterations completely unrolled (header execution count 57432045)
  
  • compute_sum
118112000 slp
  ^=== vect_slp_analyze_bb ===
    === vect_analyze_data_refs ===
    not vectorized: not enough data-refs in basic block
  
  • compute_sum
118112000 slp
  ^=== vect_slp_analyze_bb ===
    === vect_analyze_data_refs ===
    not vectorized: not enough data-refs in basic block
  
  • compute_sum
118112000 slp
  ^=== vect_slp_analyze_bb ===
    === vect_analyze_data_refs ===
      not vectorized: no vectype for stmt: vect__4.7_36 = MEM[(int *)vectp_arr.5_34]; scalar_type: vector(4) int
    not vectorized: not enough data-refs in basic block
  
  • compute_sum
81467500 slp
  ^=== vect_slp_analyze_bb ===
    === vect_analyze_data_refs ===
      got vectype for stmt: _50 = *_49;vector(4) int
    not vectorized: not enough data-refs in basic block
  
  • compute_sum
72506100 slp
  ^=== vect_slp_analyze_bb ===
    === vect_analyze_data_refs ===
      got vectype for stmt: _58 = *_57;vector(4) int
    not vectorized: not enough data-refs in basic block
  
  • compute_sum
64530400 slp
  ^=== vect_slp_analyze_bb ===
    === vect_analyze_data_refs ===
      got vectype for stmt: _5 = *_6;vector(4) int
    not vectorized: not enough data-refs in basic block
  
  • compute_sum
57432000 slp
                         ^=== vect_slp_analyze_bb ===
                           === vect_analyze_data_refs ===
                           not vectorized: not enough data-refs in basic block
                         
  • compute_sum
16
    accumulate (arr[i], &sum);
einline
    ^inlining accumulate into compute_sum
    
  • compute_sum
17
  return sum;
18
}