presentation-inlined.c

Line Hotness Pass Source Function / Inlining Chain
1
// Taken from Adam Nemet's November 2016 LLVM talk
2
3
#include "shared.h"
4
5
void accumulate (int x, int *a)
6
{
7
  *a += x;
8
}
9
10
int compute_sum_with_inlining (int arr[], int n)
11
{
12
  int sum = 0;
13
  for (int i = 0; i < n; ++i)
100.00 cunrolli
  ^loop 1 iterates at most 2147483647 times
  
  • compute_sum_with_inlining
99.61 ivcanon
  ^loop 1 iterates at most 2147483646 times
  
  • compute_sum_with_inlining
99.61 vect

                        
^=== analyzing loop === === analyze_loop_nest === === vect_analyze_loop_form === === get_loop_niters === symbolic number of iterations is (unsigned int) n_8(D) === vect_analyze_data_refs === got vectype for stmt: _4 = *_3;vector(4) int === vect_analyze_scalar_cycles === Analyze phi: i_17 = PHI <0(5), i_10(6)> Access function of PHI: {0, +, 1}_1 step: 1, init: 0 Detected induction Analyze phi: sum_20 = PHI <0(5), _12(6)> Access function of PHI: {0, +, _4}_1 step: _4, init: 0 step unknown Analyze phi: sum_20 = PHI <0(5), _12(6)> detected reduction: _12 = _4 + sum_20; Detected reduction === vect_pattern_recog === vect_is_simple_use: operand _1 def_stmt: _1 = (long unsigned int) i_17; type of def: internal vect_is_simple_use: operand i_17 def_stmt: i_17 = PHI <0(5), i_10(6)> type of def: induction vect_is_simple_use: operand 4 vect_is_simple_use: operand _4 def_stmt: _4 = *_3; type of def: internal vect_is_simple_use: operand _4 def_stmt: _4 = *_3; type of def: internal vect_is_simple_use: operand _4 def_stmt: _4 = *_3; type of def: internal === vect_analyze_data_ref_accesses === === vect_mark_stmts_to_be_vectorized === vect_is_simple_use: operand _4 def_stmt: _4 = *_3; type of def: internal vect_is_simple_use: operand _4 def_stmt: _4 = *_3; type of def: internal vect_is_simple_use: operand sum_20 def_stmt: sum_20 = PHI <0(5), _12(6)> type of def: reduction vect_is_simple_use: operand 0 vect_is_simple_use: operand _12 def_stmt: _12 = _4 + sum_20; type of def: reduction === vect_analyze_data_ref_dependences === === vect_determine_vectorization_factor === ==> examining phi: i_17 = PHI <0(5), i_10(6)> ==> examining phi: sum_20 = PHI <0(5), _12(6)> get vectype for scalar type: int vectype: vector(4) int nunits = FIXME: poly_int ==> examining statement: # DEBUG sum => sum_20 skip ==> examining statement: # DEBUG i => i_17 skip ==> examining statement: # DEBUG BEGIN_STMT skip ==> examining statement: _1 = (long unsigned int) i_17; skip ==> examining statement: _2 = _1 * 4; skip ==> examining statement: _3 = arr_9(D) + _2; skip ==> examining statement: _4 = *_3; get vectype for scalar type: int vectype: vector(4) int nunits = FIXME: poly_int ==> examining statement: # DEBUG x => _4 skip ==> examining statement: # DEBUG a => &sum skip ==> examining statement: # DEBUG BEGIN_STMT skip ==> examining statement: _12 = _4 + sum_20; get vectype for scalar type: int vectype: vector(4) int get vectype for scalar type: int vectype: vector(4) int nunits = FIXME: poly_int ==> examining statement: # DEBUG sum => _12 skip ==> examining statement: # DEBUG x => NULL skip ==> examining statement: # DEBUG a => NULL skip ==> examining statement: i_10 = i_17 + 1; skip ==> examining statement: # DEBUG i => i_10 skip ==> examining statement: # DEBUG sum => _12 skip ==> examining statement: # DEBUG i => i_10 skip ==> examining statement: if (n_8(D) > i_10) skip vectorization factor = FIXME: poly_int === vect_analyze_slp === === vect_make_slp_decision === === vect_analyze_data_refs_alignment === recording new base alignment for arr_9(D) alignment: 4 misalignment: 0 based on: _4 = *_3; vect_compute_data_ref_alignment: can't force alignment of ref: *_3 === vect_prune_runtime_alias_test_list === === vect_enhance_data_refs_alignment === Unknown misalignment, naturally aligned Analyze phi: i_17 = PHI <0(5), i_10(6)> Analyze phi: sum_20 = PHI <0(5), _12(6)> reduc or virtual phi. skip vect_get_data_access_cost: inside_cost = 12, outside_cost = 0. cost model: epilogue peel iters set to vf/2 because loop iterations are unknown vect_get_data_access_cost: inside_cost = 12, outside_cost = 0. cost model: epilogue peel iters set to vf/2 because loop iterations are unknown Vectorizing an unaligned access === vect_analyze_loop_operations === examining phi: i_17 = PHI <0(5), i_10(6)> examining phi: sum_20 = PHI <0(5), _12(6)> vect_is_simple_use: operand *_3 not ssa-name use not simple vect_is_simple_use: operand *_3 not ssa-name use not simple can't use a fully-masked loop because the target doesn't have the appropriate masked load or store. vect_is_simple_use: operand _4 def_stmt: _4 = *_3; type of def: internal vect_is_simple_use: operand sum_20 def_stmt: sum_20 = PHI <0(5), _12(6)> type of def: reduction reduc op not supported by target not using a fully-masked loop cost model: epilogue peel iters set to vf/2 because loop iterations are unknown Cost model analysis: Vector inside of loop cost: 16 Vector prologue cost: 36 Vector epilogue cost: 52 Scalar iteration cost: 16 Scalar outside cost: 32 Vector outside cost: 88 prologue iterations: 0 epilogue iterations: 2 Runtime profitability threshold = 5 Static estimate profitability threshold = 9 epilog loop required Analyze phi: i_17 = PHI <0(5), i_10(6)> Analyze phi: sum_20 = PHI <0(5), _12(6)> reduc or virtual phi. skip loop vectorized === vec_transform_loop === Profitability threshold is 5 loop iterations Analyze phi: i_17 = PHI <i_10(6), 0(9)> Analyze phi: sum_20 = PHI <_12(6), 0(9)> reduc or virtual phi. skip vect_update_ivs_after_vectorizer: phi: i_17 = PHI <i_10(6), 0(9)> vect_update_ivs_after_vectorizer: phi: sum_20 = PHI <_12(6), 0(9)> reduc or virtual phi. skip. ------>vectorizing phi: i_17 = PHI <i_10(6), 0(16)> ------>vectorizing phi: sum_20 = PHI <_12(6), 0(16)> transform phi ------>vectorizing phi: vect__12.4_33 = PHI <(6), (16)> ------>vectorizing statement: # DEBUG sum => sum_20 ------>vectorizing statement: # DEBUG i => i_17 ------>vectorizing statement: # DEBUG BEGIN_STMT ------>vectorizing statement: _1 = (long unsigned int) i_17; ------>vectorizing statement: _2 = _1 * 4; ------>vectorizing statement: _3 = arr_9(D) + _2; ------>vectorizing statement: _4 = *_3; transform statement create vector_type-pointer variable to type: vector(4) int vectorizing a pointer ref: *arr_9(D) created arr_9(D) ------>vectorizing statement: # DEBUG x => _4 ------>vectorizing statement: # DEBUG a => &sum ------>vectorizing statement: # DEBUG BEGIN_STMT ------>vectorizing statement: _12 = _4 + sum_20; transform statement vect_is_simple_use: operand _4 def_stmt: _4 = *_3; type of def: internal vect_is_simple_use: operand sum_20 def_stmt: sum_20 = PHI <_12(6), 0(16)> type of def: reduction reduc op not supported by target transform reduction vect_get_vec_def_for_operand: _4 vect_is_simple_use: operand _4 def_stmt: _4 = *_3; type of def: internal def_stmt = _4 = *_3; vect_get_vec_def_for_operand: sum_20 vect_is_simple_use: operand sum_20 def_stmt: sum_20 = PHI <_12(6), 0(16)> type of def: reduction def_stmt = sum_20 = PHI <_12(6), 0(16)> vect_is_simple_use: operand 0 transform reduction: created def-use cycle: vect__12.4_33 = PHI <vect__12.8_37(6), { 0, 0, 0, 0 }(16)>vect__12.8_37 = vect__4.7_36 + vect__12.4_33; Reduce using vector shifts extract scalar result ------>vectorizing statement: # DEBUG sum => _12 ------>vectorizing statement: # DEBUG x => NULL ------>vectorizing statement: # DEBUG a => NULL ------>vectorizing statement: i_10 = i_17 + 1; ------>vectorizing statement: # DEBUG i => i_10 ------>vectorizing statement: # DEBUG sum => _12 ------>vectorizing statement: # DEBUG i => i_10 ------>vectorizing statement: vectp_arr.5_35 = vectp_arr.5_34 + 16; ------>vectorizing statement: if (n_8(D) > i_10) New loop exit condition: if (ivtmp_46 < bnd.1_29) LOOP VECTORIZED
  • compute_sum_with_inlining
vect
  ^vectorized 1 loops in function
  
  • compute_sum_with_inlining
14
    accumulate (arr[i], &sum);
einline
    ^inlining accumulate into compute_sum_with_inlining
    
  • compute_sum_with_inlining
einline
    ^Inlining accumulate/2 to compute_sum_with_inlining/1 with frequency 1.00
    
  • compute_sum_with_inlining
15
  return sum;
16
}