GCC Middle and Back End API Reference
loop-unroll.c File Reference

Data Structures

struct  iv_to_split
struct  var_to_expand
struct  iv_split_hasher
struct  var_expand_hasher
struct  opt_info

Functions

static void decide_unrolling_and_peeling (int)
static void peel_loops_completely (int)
static void decide_peel_simple (struct loop *, int)
static void decide_peel_once_rolling (struct loop *, int)
static void decide_peel_completely (struct loop *, int)
static void decide_unroll_stupid (struct loop *, int)
static void decide_unroll_constant_iterations (struct loop *, int)
static void decide_unroll_runtime_iterations (struct loop *, int)
static void peel_loop_simple (struct loop *)
static void peel_loop_completely (struct loop *)
static void unroll_loop_stupid (struct loop *)
static void unroll_loop_constant_iterations (struct loop *)
static void unroll_loop_runtime_iterations (struct loop *)
static struct opt_infoanalyze_insns_in_loop (struct loop *)
static void opt_info_start_duplication (struct opt_info *)
static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool)
static void free_opt_info (struct opt_info *)
static struct var_to_expandanalyze_insn_to_expand_var (struct loop *, rtx)
static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *)
static struct iv_to_splitanalyze_iv_to_split_insn (rtx)
static void expand_var_during_unrolling (struct var_to_expand *, rtx)
static void insert_var_expansion_initialization (struct var_to_expand *, basic_block)
static void combine_var_copies_in_loop_exit (struct var_to_expand *, basic_block)
static rtx get_expansion (struct var_to_expand *)
static void report_unroll_peel ()
void unroll_and_peel_loops ()
static bool loop_exit_at_end_p ()
static void peel_loops_completely ()
static void decide_unrolling_and_peeling ()
static void decide_peel_once_rolling ()
static void decide_peel_completely ()
static void peel_loop_completely ()
static void decide_unroll_constant_iterations ()
static void unroll_loop_constant_iterations ()
static void decide_unroll_runtime_iterations ()
basic_block split_edge_and_insert ()
static void unroll_loop_runtime_iterations ()
static void decide_peel_simple ()
static void peel_loop_simple ()
static void decide_unroll_stupid ()
static void unroll_loop_stupid ()
static void reset_debug_uses_in_loop ()
static struct var_to_expandanalyze_insn_to_expand_var ()
static struct iv_to_splitanalyze_iv_to_split_insn ()
static struct opt_infoanalyze_insns_in_loop ()
static void opt_info_start_duplication ()
static unsigned determine_split_iv_delta ()
static rtxget_ivts_expr ()
static void allocate_basic_variable ()
static void insert_base_initialization ()
static void split_iv ()
static rtx get_expansion ()
static void expand_var_during_unrolling ()
static void combine_var_copies_in_loop_exit ()
static void maybe_strip_eq_note_for_split_iv ()
static void free_opt_info ()

Function Documentation

static void allocate_basic_variable ( )
static
   Allocate basic variable for the induction variable chain.  
static struct var_to_expand* analyze_insn_to_expand_var ( struct loop ,
rtx   
)
staticread
static struct var_to_expand* analyze_insn_to_expand_var ( )
staticread
@verbatim 

Determine whether INSN contains an accumulator which can be expanded into separate copies, one for each copy of the LOOP body.

for (i = 0 ; i < n; i++) sum += a[i];

==>

sum += a[i] .... i = i+1; sum1 += a[i] .... i = i+1 sum2 += a[i]; ....

Return NULL if INSN contains no opportunity for expansion of accumulator. Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant information and return a pointer to it.

         In the case of FMA, we're also changing the rounding.  
     Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
     in MD.  But if there is no optab to generate the insn, we can not
     perform the variable expansion.  This can happen if an MD provides
     an insn but not a named pattern to generate it, for example to avoid
     producing code that needs additional mode switches like for x87/mmx.

     So we check have_insn_for which looks for an optab for the operation
     in SRC.  If it doesn't exist, we can't perform the expansion even
     though INSN is valid.  
     Find the accumulator use within the operation.  
         We only support accumulation via FMA in the ADD position.  
         The method of expansion that we are using; which includes the
         initialization of the expansions with zero and the summation of
         the expansions at the end of the computation will yield wrong
         results for (x = something - x) thus avoid using it in that case.  
     It must not otherwise be used.  
     It must be used in exactly one insn.  
       Instead of resetting the debug insns, we could replace each
       debug use in the loop with the sum or product of all expanded
       accummulators.  Since we'll only know of all expansions at the
       end, we'd have to keep track of which vars_to_expand a debug
       insn in the loop references, take note of each copy of the
       debug insn during unrolling, and when it's all done, compute
       the sum or product of each variable and adjust the original
       debug insn and each copy thereof.  What a pain!  
     Record the accumulator to expand.  

References iv_to_split::base_var, biv_p(), rtx_iv::extend_mode, iv_to_split::insn, iv_analyze_result(), iv_to_split::loc, rtx_iv::mode, iv_to_split::n_loc, iv_to_split::next, iv_to_split::orig_var, iv_to_split::step, and rtx_iv::step.

static struct opt_info* analyze_insns_in_loop ( struct loop )
staticread
static struct opt_info* analyze_insns_in_loop ( )
staticread
   Determines which of insns in LOOP can be optimized.
   Return a OPT_INFO struct with the relevant hash tables filled
   with all insns to be optimized.  The FIRST_NEW_BLOCK field
   is undefined for the return value.  
     Record the loop exit bb and loop preheader before the unrolling.  
static struct iv_to_split* analyze_iv_to_split_insn ( rtx  )
staticread
static struct iv_to_split* analyze_iv_to_split_insn ( )
staticread
   Determine whether there is an induction variable in INSN that
   we would like to split during unrolling.

   I.e. replace

   i = i + 1;
   ...
   i = i + 1;
   ...
   i = i + 1;
   ...

   type chains by

   i0 = i + 1
   ...
   i = i0 + 1
   ...
   i = i0 + 2
   ...

   Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
   an IV_TO_SPLIT structure, fill it with the relevant information and return a
   pointer to it.  
     For now we just split the basic induction variables.  Later this may be
     extended for example by selecting also addresses of memory references.  
     This used to be an assert under the assumption that if biv_p returns
     true that iv_analyze_result must also return true.  However, that
     assumption is not strictly correct as evidenced by pr25569.

     Returning NULL when iv_analyze_result returns false is safe and
     avoids the problems in pr25569 until the iv_analyze_* routines
     can be fixed, which is apparently hard and time consuming
     according to their author.  
     Record the insn to split.  

References hash_table< Descriptor, Allocator >::find_slot(), opt_info::insns_with_var_to_expand, var_to_expand::next, and opt_info::var_to_expand_tail.

static void apply_opt_in_copies ( struct opt_info opt_info,
unsigned  n_copies,
bool  unrolling,
bool  rewrite_original_loop 
)
static
   Apply loop optimizations in loop copies using the
   data which gathered during the unrolling.  Structure
   OPT_INFO record that data.

   UNROLLING is true if we unrolled (not peeled) the loop.
   REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
   the loop (as it should happen in complete unrolling, but not in ordinary
   peeling of the loop).  
     Sanity check -- we need to put initialization in the original loop
     body.  
     Allocate the basic variables (i0).  
         bb->aux holds position in copy sequence initialized by
         duplicate_loop_to_header_edge.  
             Apply splitting iv optimization.  
             Apply variable expansion optimization.  
     Initialize the variable expansions in the loop preheader
     and take care of combining them at the loop exit.  
     Rewrite also the original loop body.  Find them as originals of the blocks
     in the last copied iteration, i.e. those that have
     get_bb_copy (get_bb_original (bb)) == bb.  

Referenced by decide_peel_simple().

static void combine_var_copies_in_loop_exit ( struct var_to_expand ,
basic_block   
)
static
static void combine_var_copies_in_loop_exit ( )
static
   Combine the variable expansions at the loop exit.  PLACE is the
   loop exit basic block where the summation of the expansions should
   take place.  
         Note that we only accumulate FMA via the ADD operand.  
static void decide_peel_completely ( struct loop ,
int   
)
static
static void decide_peel_completely ( )
static
   Decide whether the LOOP is suitable for complete peeling.  
     Skip non-innermost loops.  
     Do not peel cold areas.  
     Can the loop be manipulated?  
     npeel = number of iterations to peel.  
     Is the loop small enough?  
     Check for simple loops.  
     Check number of iterations.  
     Success.  
static void decide_peel_once_rolling ( struct loop ,
int   
)
static

Referenced by loop_exit_at_end_p().

static void decide_peel_once_rolling ( )
static
   Decide whether the LOOP is once rolling and suitable for complete
   peeling.  
     Is the loop small enough?  
     Check for simple loops.  
     Check number of iterations.  
     Success.  
static void decide_peel_simple ( struct loop ,
int   
)
static
static void decide_peel_simple ( )
static
   Decide whether to simply peel LOOP and how much.  
         We were not asked to, just return back silently.  
     npeel = number of iterations to peel.  
     Skip big loops.  
     Do not simply peel loops with branches inside -- it increases number
     of mispredicts.  
     Exception is when we do have profile and we however have good chance
     to peel proper number of iterations loop will iterate in practice.
     TODO: this heuristic needs tunning; while for complette unrolling
     the branch inside loop mostly eliminates any improvements, for
     peeling it is not the case.  Also a function call inside loop is
     also branch from branch prediction POV (and probably better reason
     to not unroll/peel).  
     If we have realistic estimate on number of iterations, use it.  
     If we have small enough bound on iterations, we can still peel (completely
     unroll).  
         For now we have no good heuristics to decide whether loop peeling
         will be effective, so disable it.  
     Success.  

References analyze_insns_in_loop(), apply_opt_in_copies(), bitmap_clear(), niter_desc::const_iter, duplicate_loop_to_header_edge(), free(), free_opt_info(), free_simple_loop_desc(), get_simple_loop_desc(), loop_preheader_edge(), loop::lpt_decision, niter_desc::niter, niter_desc::niter_expr, niter_desc::noloop_assumptions, opt_info_start_duplication(), sbitmap_alloc(), niter_desc::simple_p, and lpt_decision::times.

static void decide_unroll_constant_iterations ( struct loop ,
int   
)
static
static void decide_unroll_constant_iterations ( )
static
   Decide whether to unroll LOOP iterating constant number of times
   and how much.  
         We were not asked to, just return back silently.  
     nunroll = total number of copies of the original loop body in
     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  
     Skip big loops.  
     Check for simple loops.  
     Check number of iterations.  
     Check whether the loop rolls enough to consider.  
     Consult also loop bounds and profile; in the case the loop has more
     than one exit it may well loop less than determined maximal number
     of iterations.  
     Success; now compute number of iterations to unroll.  We alter
     nunroll so that as few as possible copies of loop body are
     necessary, while still not decreasing the number of unrollings
     too much (at most by 1).  
static void decide_unroll_runtime_iterations ( struct loop ,
int   
)
static
static void decide_unroll_runtime_iterations ( )
static
   Decide whether to unroll LOOP iterating runtime computable number of times
   and how much.  
         We were not asked to, just return back silently.  
     nunroll = total number of copies of the original loop body in
     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  
     Skip big loops.  
     Check for simple loops.  
     Check simpleness.  
     Check whether the loop rolls.  
     Success; now force nunroll to be power of 2, as we are unable to
     cope with overflows in computation of number of iterations.  
static void decide_unroll_stupid ( struct loop ,
int   
)
static
static void decide_unroll_stupid ( )
static
   Decide whether to unroll LOOP stupidly and how much.  
         We were not asked to, just return back silently.  
     nunroll = total number of copies of the original loop body in
     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  
     Skip big loops.  
     Check for simple loops.  
     Check simpleness.  
     Do not unroll loops with branches inside -- it increases number
     of mispredicts. 
     TODO: this heuristic needs tunning; call inside the loop body
     is also relatively good reason to not unroll.  
     Check whether the loop rolls.  
     Success.  Now force nunroll to be power of 2, as it seems that this
     improves results (partially because of better alignments, partially
     because of some dark magic).  
static void decide_unrolling_and_peeling ( int  )
static
static void decide_unrolling_and_peeling ( )
static
   Decide whether unroll or peel loops (depending on FLAGS) and how much.  
     Scan the loops, inner ones first.  
         Do not peel cold areas.  
         Can the loop be manipulated?  
         Skip non-innermost loops.  
         Try transformations one by one in decreasing order of
         priority.  

References dump_file.

static unsigned determine_split_iv_delta ( )
static
   Determine the number of iterations between initialization of the base
   variable and the current copy (N_COPY).  N_COPIES is the total number
   of newly created copies.  UNROLLING is true if we are unrolling
   (not peeling) the loop.  
         If we are unrolling, initialization is done in the original loop
         body (number 0).  
         If we are peeling, the copy in that the initialization occurs has
         number 1.  The original loop (number 0) is the last.  

Referenced by insert_var_expansion_initialization().

static void expand_var_during_unrolling ( struct var_to_expand ,
rtx   
)
static
static void expand_var_during_unrolling ( )
static
   Given INSN replace the uses of the accumulator recorded in VE
   with a new register.  
     Generate a new register only if the expansion limit has not been
     reached.  Else reuse an already existing expansion.  

References find_reg_equal_equiv_note(), iv_to_split::next, iv_to_split::orig_var, reg_mentioned_p(), and remove_note().

static void free_opt_info ( struct opt_info )
static

Referenced by decide_peel_simple().

static void free_opt_info ( )
static
   Release OPT_INFO.  
static rtx get_expansion ( struct var_to_expand )
static
static rtx get_expansion ( )
static
   Return one expansion of the accumulator recorded in struct VE.  
static rtx* get_ivts_expr ( )
static
   Locate in EXPR the expression corresponding to the location recorded
   in IVTS, and return a pointer to the RTX for this location.  

References gen_reg_rtx(), and var_to_expand::reg.

static void insert_base_initialization ( )
static
   Insert initialization of basic variable of IVTS before INSN, taking
   the initial value from INSN.  

Referenced by insert_var_expansion_initialization().

static void insert_var_expansion_initialization ( struct var_to_expand ve,
basic_block  place 
)
static
   Initialize the variable expansions in loop preheader.  PLACE is the
   loop-preheader basic block where the initialization of the
   expansions should take place.  The expansions are initialized with
   (-0) when the operation is plus or minus to honor sign zero.  This
   way we can prevent cases where the sign of the final result is
   effected by the sign of the expansion.  Here is an example to
   demonstrate this:

   for (i = 0 ; i < n; i++)
     sum += something;

   ==>

   sum += something
   ....
   i = i+1;
   sum1 += something
   ....
   i = i+1
   sum2 += something;
   ....

   When SUM is initialized with -zero and SOMETHING is also -zero; the
   final result of sum should be -zero thus the expansions sum1 and sum2
   should be initialized with -zero as well (otherwise we will get +zero
   as the final result).  
         Note that we only accumulate FMA via the ADD operand.  

References basic_block_def::aux, determine_split_iv_delta(), hash_table< Descriptor, Allocator >::find(), get_bb_original(), insert_base_initialization(), opt_info::insns_to_split, opt_info::insns_with_var_to_expand, hash_table< Descriptor, Allocator >::is_created(), maybe_strip_eq_note_for_split_iv(), and split_iv().

static bool loop_exit_at_end_p ( )
static
static void maybe_strip_eq_note_for_split_iv ( )
static
   Strip away REG_EQUAL notes for IVs we're splitting.

   Updating REG_EQUAL notes for IVs we split is tricky: We
   cannot tell until after unrolling, DF-rescanning, and liveness
   updating, whether an EQ_USE is reached by the split IV while
   the IV reg is still live.  See PR55006.

   ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
   because RTL loop-iv requires us to defer rescanning insns and
   any notes attached to them.  So resort to old techniques...  

Referenced by insert_var_expansion_initialization().

static void opt_info_start_duplication ( struct opt_info )
static

Referenced by decide_peel_simple().

static void opt_info_start_duplication ( )
static
   Called just before loop duplication.  Records start of duplicated area
   to OPT_INFO.  
static void peel_loop_completely ( struct loop )
static
static void peel_loop_completely ( )
static
   Peel all iterations of LOOP, remove exit edges and cancel the loop
   completely.  The transformation done:

   for (i = 0; i < 4; i++)
     body;

   ==>

   i = 0;
   body; i++;
   body; i++;
   body; i++;
   body; i++;
         Remove the exit edges.  
     Now remove the unreachable part of the last iteration and cancel
     the loop.  
static void peel_loop_simple ( struct loop )
static

Referenced by unroll_and_peel_loops().

static void peel_loop_simple ( )
static
   Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:

   while (cond)
     body;

   ==>  (LOOP->LPT_DECISION.TIMES == 3)

   if (!cond) goto end;
   body;
   if (!cond) goto end;
   body;
   if (!cond) goto end;
   body;
   while (cond)
     body;
   end: ;
             We cannot just update niter_expr, as its value might be clobbered
             inside loop.  We could handle this by counting the number into
             temporary just like we do in runtime unrolling, but it does not
             seem worthwhile.  
static void peel_loops_completely ( int  )
static
static void peel_loops_completely ( )
static
   Depending on FLAGS, check whether to peel loops completely and do so.  
     Scan the loops, the inner ones first.  
bool referenced_in_one_insn_in_loop_p ( struct loop loop,
rtx  reg,
int *  debug_uses 
)
static
   Returns true if REG is referenced in one nondebug insn in LOOP.
   Set *DEBUG_USES to the number of debug insns that reference the
   variable.  

References rtx_equal_p().

static void report_unroll_peel ( )
static
   Emit a message summarizing the unroll or peel that will be
   performed for LOOP, along with the loop's location LOCUS, if
   appropriate given the dump or -fopt-info settings.  
     In the special case where the loop never iterated, emit
     a different message so that we don't report an unroll by 0.
     This matches the equivalent message emitted during tree unrolling.  

References dump_printf_loc().

static void reset_debug_uses_in_loop ( )
static
   Reset the DEBUG_USES debug insns in LOOP that reference REG.  

References rtx_referenced_p().

basic_block split_edge_and_insert ( )
   Splits edge E and inserts the sequence of instructions INSNS on it, and
   returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
   and NULL is returned instead.  
     ??? We used to assume that INSNS can contain control flow insns, and
     that we had to try to find sub basic blocks in BB to maintain a valid
     CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
     and call break_superblocks when going out of cfglayout mode.  But it
     turns out that this never happens; and that if it does ever happen,
     the TODO_verify_flow at the end of the RTL loop passes would fail.

     There are two reasons why we expected we could have control flow insns
     in INSNS.  The first is when a comparison has to be done in parts, and
     the second is when the number of iterations is computed for loops with
     the number of iterations known at runtime.  In both cases, test cases
     to get control flow in INSNS appear to be impossible to construct:

      * If do_compare_rtx_and_jump needs several branches to do comparison
        in a mode that needs comparison by parts, we cannot analyze the
        number of iterations of the loop, and we never get to unrolling it.

      * The code in expand_divmod that was suspected to cause creation of
        branching code seems to be only accessed for signed division.  The
        divisions used by # of iterations analysis are always unsigned.
        Problems might arise on architectures that emits branching code
        for some operations that may appear in the unroller (especially
        for division), but we have no such architectures.

     Considering all this, it was decided that we should for now assume
     that INSNS can in theory contain control flow insns, but in practice
     it never does.  So we don't handle the theoretical case, and should
     a real failure ever show up, we have a pretty good clue for how to
     fix it.  

References analyze_insns_in_loop(), CDI_DOMINATORS, copy_rtx(), emit_move_insn(), expand_simple_binop(), flow_bb_inside_loop_p(), force_operand(), free(), gen_int_mode(), gen_reg_rtx(), get_dominated_by(), get_insns(), get_loop_body(), get_simple_loop_desc(), loop_exit_at_end_p(), loop::lpt_decision, niter_desc::mode, niter_desc::niter, niter_desc::niter_expr, loop::num_nodes, OPTAB_LIB_WIDEN, start_sequence(), and lpt_decision::times.

static void split_iv ( )
static
   Replace the use of induction variable described in IVTS in INSN
   by base variable + DELTA * step.  
     Construct base + DELTA * step.  
     Figure out where to do the replacement.  
     If we can make the replacement right away, we're done.  
     Otherwise, force EXPR into a register and try again.  
     The last chance.  Try recreating the assignment in insn
     completely from scratch.  

References emit_insn_after(), emit_move_insn(), end_sequence(), get_insns(), var_to_expand::op, var_to_expand::reg, simplify_gen_unary(), start_sequence(), and var_to_expand::var_expansions.

Referenced by insert_var_expansion_initialization().

void unroll_and_peel_loops ( )
   Unroll and/or peel (depending on FLAGS) LOOPS.  
     First perform complete loop peeling (it is almost surely a win,
     and affects parameters for further decision a lot).  
     Now decide rest of unrolling and peeling.  
     Scan the loops, inner ones first.  
         And perform the appropriate transformations.  
             Already done.  

References lpt_decision::decision, loop::lpt_decision, LPT_NONE, LPT_PEEL_COMPLETELY, LPT_PEEL_SIMPLE, LPT_UNROLL_CONSTANT, LPT_UNROLL_RUNTIME, LPT_UNROLL_STUPID, peel_loop_simple(), unroll_loop_constant_iterations(), unroll_loop_runtime_iterations(), and unroll_loop_stupid().

static void unroll_loop_constant_iterations ( struct loop )
static

Referenced by unroll_and_peel_loops().

static void unroll_loop_constant_iterations ( )
static
   Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
   The transformation does this:

   for (i = 0; i < 102; i++)
     body;

   ==>  (LOOP->LPT_DECISION.TIMES == 3)

   i = 0;
   body; i++;
   body; i++;
   while (i < 102)
     {
       body; i++;
       body; i++;
       body; i++;
       body; i++;
     }
     Should not get here (such loop should be peeled instead).  
         The exit is not at the end of the loop; leave exit test
         in the first copy, so that the loops that start with test
         of exit condition have continuous body after unrolling.  
         Peel exit_mod iterations.  
         Leave exit test in last copy, for the same reason as above if
         the loop tests the condition at the end of loop body.  
         We know that niter >= max_unroll + 2; so we do not need to care of
         case when we would exit before reaching the loop.  So just peel
         exit_mod + 1 iterations.  
     Now unroll the loop.  
         Find a new in and out edge; they are in the last copy we have made.  
     Remove the edges.  
static void unroll_loop_runtime_iterations ( struct loop )
static

Referenced by unroll_and_peel_loops().

static void unroll_loop_runtime_iterations ( )
static
   Unroll LOOP for which we are able to count number of iterations in runtime
   LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
   extra care for case n < 0):

   for (i = 0; i < n; i++)
     body;

   ==>  (LOOP->LPT_DECISION.TIMES == 3)

   i = 0;
   mod = n % 4;

   switch (mod)
     {
       case 3:
         body; i++;
       case 2:
         body; i++;
       case 1:
         body; i++;
       case 0: ;
     }

   while (i < n)
     {
       body; i++;
       body; i++;
       body; i++;
       body; i++;
     }
     Remember blocks whose dominators will have to be updated.  
         Leave exit in first copy (for explanation why see comment in
         unroll_loop_constant_iterations).  
         Leave exit in last copy (for explanation why see comment in
         unroll_loop_constant_iterations).  
     Get expression for number of iterations.  
     Count modulo by ANDing it with max_unroll; we use the fact that
     the number of unrollings is a power of two, and thus this is correct
     even if there is overflow in the computation.  
     Precondition the loop.  
     Peel the first copy of loop body (almost always we must leave exit test
     here; the only exception is when we have extra zero check and the number
     of iterations is reliable.  Also record the place of (possible) extra
     zero check.  
     Record the place where switch will be built for preconditioning.  
         Peel the copy.  
         Create item for switch.  
         We rely on the fact that the compare and jump cannot be optimized out,
         and hence the cfg we create is correct.  
         Add branch for zero iterations.  
     Recount dominators for outer blocks.  
     And unroll loop.  
         Find a new in and out edge; they are in the last copy we have
         made.  
     Remove the edges.  
     We must be careful when updating the number of iterations due to
     preconditioning and the fact that the value must be valid at entry
     of the loop.  After passing through the above code, we see that
     the correct new number of iterations is this:  
static void unroll_loop_stupid ( struct loop )
static

Referenced by unroll_and_peel_loops().

static void unroll_loop_stupid ( )
static
   Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:

   while (cond)
     body;

   ==>  (LOOP->LPT_DECISION.TIMES == 3)

   while (cond)
     {
       body;
       if (!cond) break;
       body;
       if (!cond) break;
       body;
       if (!cond) break;
       body;
     }
         We indeed may get here provided that there are nontrivial assumptions
         for a loop to be really simple.  We could update the counts, but the
         problem is that we are unable to decide which exit will be taken
         (not really true in case the number of iterations is constant,
         but no one will do anything with this information, so we do not
         worry about it).