foo.c


// Taken from Adam Nemet's November 2016 LLVM talk

#if 1
void accumulate (int x, int *a)
{
  *a += x;
     ^=== vect_slp_analyze_bb ===
     
     ^=== vect_slp_analyze_bb ===
     
     ^=== vect_slp_analyze_bb ===
       === vect_analyze_data_refs ===
       not vectorized: not enough data-refs in basic block
     
     ^=== vect_slp_analyze_bb ===
       === vect_analyze_data_refs ===
         got vectype for stmt: _3 = *_2;vector(4) int
       not vectorized: not enough data-refs in basic block
     
}
#else
extern void accumulate (int x, int *a);
#endif

int compute_sum (int arr[], int n)
{
  int sum = 0;
  for (int i = 0; i < n; ++i)
  ^=== analyzing loop ===
    === analyze_loop_nest ===
      === vect_analyze_loop_form ===
        === get_loop_niters ===
      symbolic number of iterations is (unsigned int) n_8(D)
      === vect_analyze_data_refs ===
        got vectype for stmt: _4 = *_3;vector(4) int
      === vect_analyze_scalar_cycles ===
        Analyze phi: i_17 = PHI <0(5), i_10(6)>
        Access function of PHI: {0, +, 1}_1
        step: 1,  init: 0
        Detected induction
        Analyze phi: sum_20 = PHI <0(5), _12(6)>
        Access function of PHI: {0, +, _4}_1
        step: _4,  init: 0
        step unknown
        Analyze phi: sum_20 = PHI <0(5), _12(6)>
        Detected reduction
      === vect_pattern_recog ===
      === vect_analyze_data_ref_accesses ===
      === vect_mark_stmts_to_be_vectorized ===
      === vect_analyze_data_ref_dependences ===
      === vect_determine_vectorization_factor ===
        ==> examining phi: i_17 = PHI <0(5), i_10(6)>
        ==> examining phi: sum_20 = PHI <0(5), _12(6)>
        get vectype for scalar type:  int
        vectype: vector(4) int
        ==> examining statement: _1 = (long unsigned int) i_17;
        skip
        ==> examining statement: _2 = _1 * 4;
        skip
        ==> examining statement: _3 = arr_9(D) + _2;
        skip
        ==> examining statement: _4 = *_3;
        get vectype for scalar type:  int
        vectype: vector(4) int
        ==> examining statement: _12 = _4 + sum_20;
        get vectype for scalar type:  int
        vectype: vector(4) int
        get vectype for scalar type:  int
        vectype: vector(4) int
        ==> examining statement: i_10 = i_17 + 1;
        skip
        ==> examining statement: if (n_8(D) > i_10)
        skip
      === vect_analyze_slp ===
      === vect_make_slp_decision ===
      === vect_analyze_data_refs_alignment ===
        recording new base alignment for arr_9(D)
          alignment:    4
          misalignment: 0
          based on:     _4 = *_3;
        vect_compute_data_ref_alignment:
        can't force alignment of ref: 
      === vect_prune_runtime_alias_test_list ===
      === vect_enhance_data_refs_alignment ===
        Unknown misalignment, naturally aligned
        vect_get_data_access_cost: inside_cost = 12, outside_cost = 0.
        cost model: epilogue peel iters set to vf/2 because loop iterations are unknown
        vect_get_data_access_cost: inside_cost = 12, outside_cost = 0.
        cost model: epilogue peel iters set to vf/2 because loop iterations are unknown
        Vectorizing an unaligned access.
      === vect_analyze_loop_operations ===
        examining phi: i_17 = PHI <0(5), i_10(6)>
        examining phi: sum_20 = PHI <0(5), _12(6)>
        not ssa-name.
        use not simple
        not ssa-name.
        use not simple
        can't use a fully-masked loop because the target doesn't have the appropriate masked load or store.
        reduc op not supported by target
      not using a fully-masked loop
      cost model: epilogue peel iters set to vf/2 because loop iterations are unknown
      Cost model analysis:
        Vector inside of loop cost: 16
        Vector prologue cost: 36
        Vector epilogue cost: 52
        Scalar iteration cost: 16
        Scalar outside cost: 32
        Vector outside cost: 88
        prologue iterations: 0
        epilogue iterations: 2
        Runtime profitability threshold = 5
        Static estimate profitability threshold = 9
      epilog loop required
    loop vectorized
    === vec_transform_loop ===
      Profitability threshold is 5 loop iterations
      ------>vectorizing phi: i_17 = PHI <i_10(6), 0(16)>
      ------>vectorizing phi: sum_20 = PHI <_12(6), 0(16)>
      transform phi
      ------>vectorizing phi: vect__12.4_33 = PHI <(6), (16)>
      ------>vectorizing statement: _1 = (long unsigned int) i_17;
      ------>vectorizing statement: _2 = _1 * 4;
      ------>vectorizing statement: _3 = arr_9(D) + _2;
      ------>vectorizing statement: _4 = *_3;
      transform statement
      create vector_type-pointer variable to type: vector(4) int  vectorizing a pointer ref: *arr_9(D)
      created 
      ------>vectorizing statement: _12 = _4 + sum_20;
      transform statement
      reduc op not supported by target
      transform reduction
      transform reduction: created def-use cycle: vect__12.4_33 = PHI <vect__12.8_37(6), { 0, 0, 0, 0 }(16)>vect__12.8_37 = vect__4.7_36 + vect__12.4_33;
      Reduce using vector shifts
      extract scalar result
      ------>vectorizing statement: i_10 = i_17 + 1;
      ------>vectorizing statement: vectp_arr.5_35 = vectp_arr.5_34 + 16;
      ------>vectorizing statement: if (n_8(D) > i_10)
      LOOP VECTORIZED
  
  ^=== vect_slp_analyze_bb ===
    === vect_analyze_data_refs ===
    not vectorized: not enough data-refs in basic block
  
  ^=== vect_slp_analyze_bb ===
    === vect_analyze_data_refs ===
    not vectorized: not enough data-refs in basic block
  
  ^=== vect_slp_analyze_bb ===
    === vect_analyze_data_refs ===
      not vectorized: no vectype for stmt: vect__4.7_36 = MEM[(int *)vectp_arr.5_34]; scalar_type: vector(4) int
    not vectorized: not enough data-refs in basic block
  
  ^=== vect_slp_analyze_bb ===
    === vect_analyze_data_refs ===
      got vectype for stmt: _50 = *_49;vector(4) int
    not vectorized: not enough data-refs in basic block
  
  ^=== vect_slp_analyze_bb ===
    === vect_analyze_data_refs ===
      got vectype for stmt: _58 = *_57;vector(4) int
    not vectorized: not enough data-refs in basic block
  
  ^=== vect_slp_analyze_bb ===
    === vect_analyze_data_refs ===
      got vectype for stmt: _5 = *_6;vector(4) int
    not vectorized: not enough data-refs in basic block
  
                         ^=== vect_slp_analyze_bb ===
                           === vect_analyze_data_refs ===
                           not vectorized: not enough data-refs in basic block
                         
    accumulate (arr[i], &sum);
    ^inlining accumulate into compute_sum
    
  return sum;
}

Line	Hotness	Pass	Source
1			// Taken from Adam Nemet's November 2016 LLVM talk
2
3			#if 1
4			void accumulate (int x, int *a)
5			{
6			*a += x;
	567644000	slp	^=== vect_slp_analyze_bb ===
	567644000	slp	^=== vect_slp_analyze_bb ===
	567644000	slp	^=== vect_slp_analyze_bb === === vect_analyze_data_refs === not vectorized: not enough data-refs in basic block
	567644000	slp	^=== vect_slp_analyze_bb === === vect_analyze_data_refs === got vectype for stmt: _3 = *_2;vector(4) int not vectorized: not enough data-refs in basic block
7			}
8			#else
9			extern void accumulate (int x, int *a);
10			#endif
11
12			int compute_sum (int arr[], int n)
13			{
14			int sum = 0;
15			for (int i = 0; i < n; ++i)
	955630000	vect	^=== analyzing loop === === analyze_loop_nest === === vect_analyze_loop_form === === get_loop_niters === symbolic number of iterations is (unsigned int) n_8(D) === vect_analyze_data_refs === got vectype for stmt: _4 = _3;vector(4) int === vect_analyze_scalar_cycles === Analyze phi: i_17 = PHI <0(5), i_10(6)> Access function of PHI: {0, +, 1}_1 step: 1, init: 0 Detected induction Analyze phi: sum_20 = PHI <0(5), _12(6)> Access function of PHI: {0, +, _4}_1 step: _4, init: 0 step unknown Analyze phi: sum_20 = PHI <0(5), _12(6)> Detected reduction === vect_pattern_recog === === vect_analyze_data_ref_accesses === === vect_mark_stmts_to_be_vectorized === === vect_analyze_data_ref_dependences === === vect_determine_vectorization_factor === ==> examining phi: i_17 = PHI <0(5), i_10(6)> ==> examining phi: sum_20 = PHI <0(5), _12(6)> get vectype for scalar type: int vectype: vector(4) int ==> examining statement: _1 = (long unsigned int) i_17; skip ==> examining statement: _2 = _1 4; skip ==> examining statement: _3 = arr_9(D) + _2; skip ==> examining statement: _4 = _3; get vectype for scalar type: int vectype: vector(4) int ==> examining statement: _12 = _4 + sum_20; get vectype for scalar type: int vectype: vector(4) int get vectype for scalar type: int vectype: vector(4) int ==> examining statement: i_10 = i_17 + 1; skip ==> examining statement: if (n_8(D) > i_10) skip === vect_analyze_slp === === vect_make_slp_decision === === vect_analyze_data_refs_alignment === recording new base alignment for arr_9(D) alignment: 4 misalignment: 0 based on: _4 = _3; vect_compute_data_ref_alignment: can't force alignment of ref: === vect_prune_runtime_alias_test_list === === vect_enhance_data_refs_alignment === Unknown misalignment, naturally aligned vect_get_data_access_cost: inside_cost = 12, outside_cost = 0. cost model: epilogue peel iters set to vf/2 because loop iterations are unknown vect_get_data_access_cost: inside_cost = 12, outside_cost = 0. cost model: epilogue peel iters set to vf/2 because loop iterations are unknown Vectorizing an unaligned access. === vect_analyze_loop_operations === examining phi: i_17 = PHI <0(5), i_10(6)> examining phi: sum_20 = PHI <0(5), _12(6)> not ssa-name. use not simple not ssa-name. use not simple can't use a fully-masked loop because the target doesn't have the appropriate masked load or store. reduc op not supported by target not using a fully-masked loop cost model: epilogue peel iters set to vf/2 because loop iterations are unknown Cost model analysis: Vector inside of loop cost: 16 Vector prologue cost: 36 Vector epilogue cost: 52 Scalar iteration cost: 16 Scalar outside cost: 32 Vector outside cost: 88 prologue iterations: 0 epilogue iterations: 2 Runtime profitability threshold = 5 Static estimate profitability threshold = 9 epilog loop required loop vectorized === vec_transform_loop === Profitability threshold is 5 loop iterations ------>vectorizing phi: i_17 = PHI <i_10(6), 0(16)> ------>vectorizing phi: sum_20 = PHI <_12(6), 0(16)> transform phi ------>vectorizing phi: vect__12.4_33 = PHI <(6), (16)> ------>vectorizing statement: _1 = (long unsigned int) i_17; ------>vectorizing statement: _2 = _1 * 4; ------>vectorizing statement: _3 = arr_9(D) + _2; ------>vectorizing statement: _4 = _3; transform statement create vector_type-pointer variable to type: vector(4) int vectorizing a pointer ref: arr_9(D) created ------>vectorizing statement: _12 = _4 + sum_20; transform statement reduc op not supported by target transform reduction transform reduction: created def-use cycle: vect__12.4_33 = PHI <vect__12.8_37(6), { 0, 0, 0, 0 }(16)>vect__12.8_37 = vect__4.7_36 + vect__12.4_33; Reduce using vector shifts extract scalar result ------>vectorizing statement: i_10 = i_17 + 1; ------>vectorizing statement: vectp_arr.5_35 = vectp_arr.5_34 + 16; ------>vectorizing statement: if (n_8(D) > i_10) LOOP VECTORIZED
	118112000	slp	^=== vect_slp_analyze_bb === === vect_analyze_data_refs === not vectorized: not enough data-refs in basic block
	118112000	slp	^=== vect_slp_analyze_bb === === vect_analyze_data_refs === not vectorized: not enough data-refs in basic block
	118112000	slp	^=== vect_slp_analyze_bb === === vect_analyze_data_refs === not vectorized: no vectype for stmt: vect__4.7_36 = MEM[(int *)vectp_arr.5_34]; scalar_type: vector(4) int not vectorized: not enough data-refs in basic block
	81467500	slp	^=== vect_slp_analyze_bb === === vect_analyze_data_refs === got vectype for stmt: _50 = *_49;vector(4) int not vectorized: not enough data-refs in basic block
	72506100	slp	^=== vect_slp_analyze_bb === === vect_analyze_data_refs === got vectype for stmt: _58 = *_57;vector(4) int not vectorized: not enough data-refs in basic block
	64530400	slp	^=== vect_slp_analyze_bb === === vect_analyze_data_refs === got vectype for stmt: _5 = *_6;vector(4) int not vectorized: not enough data-refs in basic block
	57432000	slp	^=== vect_slp_analyze_bb === === vect_analyze_data_refs === not vectorized: not enough data-refs in basic block
16			accumulate (arr[i], &sum);
		einline	^inlining accumulate into compute_sum
17			return sum;
18			}