1 /* Try to unroll loops, and split induction variables.
2 Copyright (C) 1992, 93, 94, 95, 97, 98, 1999 Free Software Foundation, Inc.
3 Contributed by James E. Wilson, Cygnus Support/UC Berkeley.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Try to unroll a loop, and split induction variables.
24 Loops for which the number of iterations can be calculated exactly are
25 handled specially. If the number of iterations times the insn_count is
26 less than MAX_UNROLLED_INSNS, then the loop is unrolled completely.
27 Otherwise, we try to unroll the loop a number of times modulo the number
28 of iterations, so that only one exit test will be needed. It is unrolled
29 a number of times approximately equal to MAX_UNROLLED_INSNS divided by
32 Otherwise, if the number of iterations can be calculated exactly at
33 run time, and the loop is always entered at the top, then we try to
34 precondition the loop. That is, at run time, calculate how many times
35 the loop will execute, and then execute the loop body a few times so
36 that the remaining iterations will be some multiple of 4 (or 2 if the
37 loop is large). Then fall through to a loop unrolled 4 (or 2) times,
38 with only one exit test needed at the end of the loop.
40 Otherwise, if the number of iterations can not be calculated exactly,
41 not even at run time, then we still unroll the loop a number of times
42 approximately equal to MAX_UNROLLED_INSNS divided by the insn count,
43 but there must be an exit test after each copy of the loop body.
45 For each induction variable, which is dead outside the loop (replaceable)
46 or for which we can easily calculate the final value, if we can easily
47 calculate its value at each place where it is set as a function of the
48 current loop unroll count and the variable's value at loop entry, then
49 the induction variable is split into `N' different variables, one for
50 each copy of the loop body. One variable is live across the backward
51 branch, and the others are all calculated as a function of this variable.
52 This helps eliminate data dependencies, and leads to further opportunities
55 /* Possible improvements follow: */
57 /* ??? Add an extra pass somewhere to determine whether unrolling will
58 give any benefit. E.g. after generating all unrolled insns, compute the
59 cost of all insns and compare against cost of insns in rolled loop.
61 - On traditional architectures, unrolling a non-constant bound loop
62 is a win if there is a giv whose only use is in memory addresses, the
63 memory addresses can be split, and hence giv increments can be
65 - It is also a win if the loop is executed many times, and preconditioning
66 can be performed for the loop.
67 Add code to check for these and similar cases. */
69 /* ??? Improve control of which loops get unrolled. Could use profiling
70 info to only unroll the most commonly executed loops. Perhaps have
71 a user specifyable option to control the amount of code expansion,
72 or the percent of loops to consider for unrolling. Etc. */
74 /* ??? Look at the register copies inside the loop to see if they form a
75 simple permutation. If so, iterate the permutation until it gets back to
76 the start state. This is how many times we should unroll the loop, for
77 best results, because then all register copies can be eliminated.
78 For example, the lisp nreverse function should be unrolled 3 times
87 ??? The number of times to unroll the loop may also be based on data
88 references in the loop. For example, if we have a loop that references
89 x[i-1], x[i], and x[i+1], we should unroll it a multiple of 3 times. */
91 /* ??? Add some simple linear equation solving capability so that we can
92 determine the number of loop iterations for more complex loops.
93 For example, consider this loop from gdb
94 #define SWAP_TARGET_AND_HOST(buffer,len)
97 char *p = (char *) buffer;
98 char *q = ((char *) buffer) + len - 1;
99 int iterations = (len + 1) >> 1;
101 for (p; p < q; p++, q--;)
109 start value = p = &buffer + current_iteration
110 end value = q = &buffer + len - 1 - current_iteration
111 Given the loop exit test of "p < q", then there must be "q - p" iterations,
112 set equal to zero and solve for number of iterations:
113 q - p = len - 1 - 2*current_iteration = 0
114 current_iteration = (len - 1) / 2
115 Hence, there are (len - 1) / 2 (rounded up to the nearest integer)
116 iterations of this loop. */
118 /* ??? Currently, no labels are marked as loop invariant when doing loop
119 unrolling. This is because an insn inside the loop, that loads the address
120 of a label inside the loop into a register, could be moved outside the loop
121 by the invariant code motion pass if labels were invariant. If the loop
122 is subsequently unrolled, the code will be wrong because each unrolled
123 body of the loop will use the same address, whereas each actually needs a
124 different address. A case where this happens is when a loop containing
125 a switch statement is unrolled.
127 It would be better to let labels be considered invariant. When we
128 unroll loops here, check to see if any insns using a label local to the
129 loop were moved before the loop. If so, then correct the problem, by
130 moving the insn back into the loop, or perhaps replicate the insn before
131 the loop, one copy for each time the loop is unrolled. */
133 /* The prime factors looked for when trying to unroll a loop by some
134 number which is modulo the total number of iterations. Just checking
135 for these 4 prime factors will find at least one factor for 75% of
136 all numbers theoretically. Practically speaking, this will succeed
137 almost all of the time since loops are generally a multiple of 2
140 #define NUM_FACTORS 4
142 struct _factor { int factor, count; } factors[NUM_FACTORS]
143 = { {2, 0}, {3, 0}, {5, 0}, {7, 0}};
145 /* Describes the different types of loop unrolling performed. */
147 enum unroll_types { UNROLL_COMPLETELY, UNROLL_MODULO, UNROLL_NAIVE };
152 #include "insn-config.h"
153 #include "integrate.h"
161 /* This controls which loops are unrolled, and by how much we unroll
164 #ifndef MAX_UNROLLED_INSNS
165 #define MAX_UNROLLED_INSNS 100
168 /* Indexed by register number, if non-zero, then it contains a pointer
169 to a struct induction for a DEST_REG giv which has been combined with
170 one of more address givs. This is needed because whenever such a DEST_REG
171 giv is modified, we must modify the value of all split address givs
172 that were combined with this DEST_REG giv. */
174 static struct induction **addr_combined_regs;
176 /* Indexed by register number, if this is a splittable induction variable,
177 then this will hold the current value of the register, which depends on the
180 static rtx *splittable_regs;
182 /* Indexed by register number, if this is a splittable induction variable,
183 this indicates if it was made from a derived giv. */
184 static char *derived_regs;
186 /* Indexed by register number, if this is a splittable induction variable,
187 then this will hold the number of instructions in the loop that modify
188 the induction variable. Used to ensure that only the last insn modifying
189 a split iv will update the original iv of the dest. */
191 static int *splittable_regs_updates;
193 /* Forward declarations. */
195 static void init_reg_map PROTO((struct inline_remap *, int));
196 static rtx calculate_giv_inc PROTO((rtx, rtx, int));
197 static rtx initial_reg_note_copy PROTO((rtx, struct inline_remap *));
198 static void final_reg_note_copy PROTO((rtx, struct inline_remap *));
199 static void copy_loop_body PROTO((rtx, rtx, struct inline_remap *, rtx, int,
200 enum unroll_types, rtx, rtx, rtx, rtx));
201 static void iteration_info PROTO((rtx, rtx *, rtx *, rtx, rtx));
202 static int find_splittable_regs PROTO((enum unroll_types, rtx, rtx, rtx, int,
203 unsigned HOST_WIDE_INT));
204 static int find_splittable_givs PROTO((struct iv_class *, enum unroll_types,
205 rtx, rtx, rtx, int));
206 static int reg_dead_after_loop PROTO((rtx, rtx, rtx));
207 static rtx fold_rtx_mult_add PROTO((rtx, rtx, rtx, enum machine_mode));
208 static int verify_addresses PROTO((struct induction *, rtx, int));
209 static rtx remap_split_bivs PROTO((rtx));
211 /* Try to unroll one loop and split induction variables in the loop.
213 The loop is described by the arguments LOOP_END, INSN_COUNT, and
214 LOOP_START. END_INSERT_BEFORE indicates where insns should be added
215 which need to be executed when the loop falls through. STRENGTH_REDUCTION_P
216 indicates whether information generated in the strength reduction pass
219 This function is intended to be called from within `strength_reduce'
223 unroll_loop (loop_end, insn_count, loop_start, end_insert_before,
224 loop_info, strength_reduce_p)
228 rtx end_insert_before;
229 struct loop_info *loop_info;
230 int strength_reduce_p;
233 int unroll_number = 1;
234 rtx copy_start, copy_end;
235 rtx insn, sequence, pattern, tem;
236 int max_labelno, max_insnno;
238 struct inline_remap *map;
241 int max_local_regnum;
246 int splitting_not_safe = 0;
247 enum unroll_types unroll_type;
248 int loop_preconditioned = 0;
250 /* This points to the last real insn in the loop, which should be either
251 a JUMP_INSN (for conditional jumps) or a BARRIER (for unconditional
255 /* Don't bother unrolling huge loops. Since the minimum factor is
256 two, loops greater than one half of MAX_UNROLLED_INSNS will never
258 if (insn_count > MAX_UNROLLED_INSNS / 2)
260 if (loop_dump_stream)
261 fprintf (loop_dump_stream, "Unrolling failure: Loop too big.\n");
265 /* When emitting debugger info, we can't unroll loops with unequal numbers
266 of block_beg and block_end notes, because that would unbalance the block
267 structure of the function. This can happen as a result of the
268 "if (foo) bar; else break;" optimization in jump.c. */
269 /* ??? Gcc has a general policy that -g is never supposed to change the code
270 that the compiler emits, so we must disable this optimization always,
271 even if debug info is not being output. This is rare, so this should
272 not be a significant performance problem. */
274 if (1 /* write_symbols != NO_DEBUG */)
276 int block_begins = 0;
279 for (insn = loop_start; insn != loop_end; insn = NEXT_INSN (insn))
281 if (GET_CODE (insn) == NOTE)
283 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_BEG)
285 else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_END)
290 if (block_begins != block_ends)
292 if (loop_dump_stream)
293 fprintf (loop_dump_stream,
294 "Unrolling failure: Unbalanced block notes.\n");
299 /* Determine type of unroll to perform. Depends on the number of iterations
300 and the size of the loop. */
302 /* If there is no strength reduce info, then set
303 loop_info->n_iterations to zero. This can happen if
304 strength_reduce can't find any bivs in the loop. A value of zero
305 indicates that the number of iterations could not be calculated. */
307 if (! strength_reduce_p)
308 loop_info->n_iterations = 0;
310 if (loop_dump_stream && loop_info->n_iterations > 0)
312 fputs ("Loop unrolling: ", loop_dump_stream);
313 fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC,
314 loop_info->n_iterations);
315 fputs (" iterations.\n", loop_dump_stream);
318 /* Find and save a pointer to the last nonnote insn in the loop. */
320 last_loop_insn = prev_nonnote_insn (loop_end);
322 /* Calculate how many times to unroll the loop. Indicate whether or
323 not the loop is being completely unrolled. */
325 if (loop_info->n_iterations == 1)
327 /* If number of iterations is exactly 1, then eliminate the compare and
328 branch at the end of the loop since they will never be taken.
329 Then return, since no other action is needed here. */
331 /* If the last instruction is not a BARRIER or a JUMP_INSN, then
332 don't do anything. */
334 if (GET_CODE (last_loop_insn) == BARRIER)
336 /* Delete the jump insn. This will delete the barrier also. */
337 delete_insn (PREV_INSN (last_loop_insn));
339 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
342 /* The immediately preceding insn is a compare which must be
344 delete_insn (last_loop_insn);
345 delete_insn (PREV_INSN (last_loop_insn));
347 /* The immediately preceding insn may not be the compare, so don't
349 delete_insn (last_loop_insn);
354 else if (loop_info->n_iterations > 0
355 && loop_info->n_iterations * insn_count < MAX_UNROLLED_INSNS)
357 unroll_number = loop_info->n_iterations;
358 unroll_type = UNROLL_COMPLETELY;
360 else if (loop_info->n_iterations > 0)
362 /* Try to factor the number of iterations. Don't bother with the
363 general case, only using 2, 3, 5, and 7 will get 75% of all
364 numbers theoretically, and almost all in practice. */
366 for (i = 0; i < NUM_FACTORS; i++)
367 factors[i].count = 0;
369 temp = loop_info->n_iterations;
370 for (i = NUM_FACTORS - 1; i >= 0; i--)
371 while (temp % factors[i].factor == 0)
374 temp = temp / factors[i].factor;
377 /* Start with the larger factors first so that we generally
378 get lots of unrolling. */
382 for (i = 3; i >= 0; i--)
383 while (factors[i].count--)
385 if (temp * factors[i].factor < MAX_UNROLLED_INSNS)
387 unroll_number *= factors[i].factor;
388 temp *= factors[i].factor;
394 /* If we couldn't find any factors, then unroll as in the normal
396 if (unroll_number == 1)
398 if (loop_dump_stream)
399 fprintf (loop_dump_stream,
400 "Loop unrolling: No factors found.\n");
403 unroll_type = UNROLL_MODULO;
407 /* Default case, calculate number of times to unroll loop based on its
409 if (unroll_number == 1)
411 if (8 * insn_count < MAX_UNROLLED_INSNS)
413 else if (4 * insn_count < MAX_UNROLLED_INSNS)
418 unroll_type = UNROLL_NAIVE;
421 /* Now we know how many times to unroll the loop. */
423 if (loop_dump_stream)
424 fprintf (loop_dump_stream,
425 "Unrolling loop %d times.\n", unroll_number);
428 if (unroll_type == UNROLL_COMPLETELY || unroll_type == UNROLL_MODULO)
430 /* Loops of these types can start with jump down to the exit condition
431 in rare circumstances.
433 Consider a pair of nested loops where the inner loop is part
434 of the exit code for the outer loop.
436 In this case jump.c will not duplicate the exit test for the outer
437 loop, so it will start with a jump to the exit code.
439 Then consider if the inner loop turns out to iterate once and
440 only once. We will end up deleting the jumps associated with
441 the inner loop. However, the loop notes are not removed from
442 the instruction stream.
444 And finally assume that we can compute the number of iterations
447 In this case unroll may want to unroll the outer loop even though
448 it starts with a jump to the outer loop's exit code.
450 We could try to optimize this case, but it hardly seems worth it.
451 Just return without unrolling the loop in such cases. */
454 while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
455 insn = NEXT_INSN (insn);
456 if (GET_CODE (insn) == JUMP_INSN)
460 if (unroll_type == UNROLL_COMPLETELY)
462 /* Completely unrolling the loop: Delete the compare and branch at
463 the end (the last two instructions). This delete must done at the
464 very end of loop unrolling, to avoid problems with calls to
465 back_branch_in_range_p, which is called by find_splittable_regs.
466 All increments of splittable bivs/givs are changed to load constant
469 copy_start = loop_start;
471 /* Set insert_before to the instruction immediately after the JUMP_INSN
472 (or BARRIER), so that any NOTEs between the JUMP_INSN and the end of
473 the loop will be correctly handled by copy_loop_body. */
474 insert_before = NEXT_INSN (last_loop_insn);
476 /* Set copy_end to the insn before the jump at the end of the loop. */
477 if (GET_CODE (last_loop_insn) == BARRIER)
478 copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
479 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
482 /* The instruction immediately before the JUMP_INSN is a compare
483 instruction which we do not want to copy. */
484 copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
486 /* The instruction immediately before the JUMP_INSN may not be the
487 compare, so we must copy it. */
488 copy_end = PREV_INSN (last_loop_insn);
493 /* We currently can't unroll a loop if it doesn't end with a
494 JUMP_INSN. There would need to be a mechanism that recognizes
495 this case, and then inserts a jump after each loop body, which
496 jumps to after the last loop body. */
497 if (loop_dump_stream)
498 fprintf (loop_dump_stream,
499 "Unrolling failure: loop does not end with a JUMP_INSN.\n");
503 else if (unroll_type == UNROLL_MODULO)
505 /* Partially unrolling the loop: The compare and branch at the end
506 (the last two instructions) must remain. Don't copy the compare
507 and branch instructions at the end of the loop. Insert the unrolled
508 code immediately before the compare/branch at the end so that the
509 code will fall through to them as before. */
511 copy_start = loop_start;
513 /* Set insert_before to the jump insn at the end of the loop.
514 Set copy_end to before the jump insn at the end of the loop. */
515 if (GET_CODE (last_loop_insn) == BARRIER)
517 insert_before = PREV_INSN (last_loop_insn);
518 copy_end = PREV_INSN (insert_before);
520 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
523 /* The instruction immediately before the JUMP_INSN is a compare
524 instruction which we do not want to copy or delete. */
525 insert_before = PREV_INSN (last_loop_insn);
526 copy_end = PREV_INSN (insert_before);
528 /* The instruction immediately before the JUMP_INSN may not be the
529 compare, so we must copy it. */
530 insert_before = last_loop_insn;
531 copy_end = PREV_INSN (last_loop_insn);
536 /* We currently can't unroll a loop if it doesn't end with a
537 JUMP_INSN. There would need to be a mechanism that recognizes
538 this case, and then inserts a jump after each loop body, which
539 jumps to after the last loop body. */
540 if (loop_dump_stream)
541 fprintf (loop_dump_stream,
542 "Unrolling failure: loop does not end with a JUMP_INSN.\n");
548 /* Normal case: Must copy the compare and branch instructions at the
551 if (GET_CODE (last_loop_insn) == BARRIER)
553 /* Loop ends with an unconditional jump and a barrier.
554 Handle this like above, don't copy jump and barrier.
555 This is not strictly necessary, but doing so prevents generating
556 unconditional jumps to an immediately following label.
558 This will be corrected below if the target of this jump is
559 not the start_label. */
561 insert_before = PREV_INSN (last_loop_insn);
562 copy_end = PREV_INSN (insert_before);
564 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
566 /* Set insert_before to immediately after the JUMP_INSN, so that
567 NOTEs at the end of the loop will be correctly handled by
569 insert_before = NEXT_INSN (last_loop_insn);
570 copy_end = last_loop_insn;
574 /* We currently can't unroll a loop if it doesn't end with a
575 JUMP_INSN. There would need to be a mechanism that recognizes
576 this case, and then inserts a jump after each loop body, which
577 jumps to after the last loop body. */
578 if (loop_dump_stream)
579 fprintf (loop_dump_stream,
580 "Unrolling failure: loop does not end with a JUMP_INSN.\n");
584 /* If copying exit test branches because they can not be eliminated,
585 then must convert the fall through case of the branch to a jump past
586 the end of the loop. Create a label to emit after the loop and save
587 it for later use. Do not use the label after the loop, if any, since
588 it might be used by insns outside the loop, or there might be insns
589 added before it later by final_[bg]iv_value which must be after
590 the real exit label. */
591 exit_label = gen_label_rtx ();
594 while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
595 insn = NEXT_INSN (insn);
597 if (GET_CODE (insn) == JUMP_INSN)
599 /* The loop starts with a jump down to the exit condition test.
600 Start copying the loop after the barrier following this
602 copy_start = NEXT_INSN (insn);
604 /* Splitting induction variables doesn't work when the loop is
605 entered via a jump to the bottom, because then we end up doing
606 a comparison against a new register for a split variable, but
607 we did not execute the set insn for the new register because
608 it was skipped over. */
609 splitting_not_safe = 1;
610 if (loop_dump_stream)
611 fprintf (loop_dump_stream,
612 "Splitting not safe, because loop not entered at top.\n");
615 copy_start = loop_start;
618 /* This should always be the first label in the loop. */
619 start_label = NEXT_INSN (copy_start);
620 /* There may be a line number note and/or a loop continue note here. */
621 while (GET_CODE (start_label) == NOTE)
622 start_label = NEXT_INSN (start_label);
623 if (GET_CODE (start_label) != CODE_LABEL)
625 /* This can happen as a result of jump threading. If the first insns in
626 the loop test the same condition as the loop's backward jump, or the
627 opposite condition, then the backward jump will be modified to point
628 to elsewhere, and the loop's start label is deleted.
630 This case currently can not be handled by the loop unrolling code. */
632 if (loop_dump_stream)
633 fprintf (loop_dump_stream,
634 "Unrolling failure: unknown insns between BEG note and loop label.\n");
637 if (LABEL_NAME (start_label))
639 /* The jump optimization pass must have combined the original start label
640 with a named label for a goto. We can't unroll this case because
641 jumps which go to the named label must be handled differently than
642 jumps to the loop start, and it is impossible to differentiate them
644 if (loop_dump_stream)
645 fprintf (loop_dump_stream,
646 "Unrolling failure: loop start label is gone\n");
650 if (unroll_type == UNROLL_NAIVE
651 && GET_CODE (last_loop_insn) == BARRIER
652 && start_label != JUMP_LABEL (PREV_INSN (last_loop_insn)))
654 /* In this case, we must copy the jump and barrier, because they will
655 not be converted to jumps to an immediately following label. */
657 insert_before = NEXT_INSN (last_loop_insn);
658 copy_end = last_loop_insn;
661 if (unroll_type == UNROLL_NAIVE
662 && GET_CODE (last_loop_insn) == JUMP_INSN
663 && start_label != JUMP_LABEL (last_loop_insn))
665 /* ??? The loop ends with a conditional branch that does not branch back
666 to the loop start label. In this case, we must emit an unconditional
667 branch to the loop exit after emitting the final branch.
668 copy_loop_body does not have support for this currently, so we
669 give up. It doesn't seem worthwhile to unroll anyways since
670 unrolling would increase the number of branch instructions
672 if (loop_dump_stream)
673 fprintf (loop_dump_stream,
674 "Unrolling failure: final conditional branch not to loop start\n");
678 /* Allocate a translation table for the labels and insn numbers.
679 They will be filled in as we copy the insns in the loop. */
681 max_labelno = max_label_num ();
682 max_insnno = get_max_uid ();
684 map = (struct inline_remap *) alloca (sizeof (struct inline_remap));
686 map->integrating = 0;
687 map->const_equiv_varray = 0;
689 /* Allocate the label map. */
693 map->label_map = (rtx *) alloca (max_labelno * sizeof (rtx));
695 local_label = (char *) alloca (max_labelno);
696 bzero (local_label, max_labelno);
701 /* Search the loop and mark all local labels, i.e. the ones which have to
702 be distinct labels when copied. For all labels which might be
703 non-local, set their label_map entries to point to themselves.
704 If they happen to be local their label_map entries will be overwritten
705 before the loop body is copied. The label_map entries for local labels
706 will be set to a different value each time the loop body is copied. */
708 for (insn = copy_start; insn != loop_end; insn = NEXT_INSN (insn))
712 if (GET_CODE (insn) == CODE_LABEL)
713 local_label[CODE_LABEL_NUMBER (insn)] = 1;
714 else if (GET_CODE (insn) == JUMP_INSN)
716 if (JUMP_LABEL (insn))
717 set_label_in_map (map,
718 CODE_LABEL_NUMBER (JUMP_LABEL (insn)),
720 else if (GET_CODE (PATTERN (insn)) == ADDR_VEC
721 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
723 rtx pat = PATTERN (insn);
724 int diff_vec_p = GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC;
725 int len = XVECLEN (pat, diff_vec_p);
728 for (i = 0; i < len; i++)
730 label = XEXP (XVECEXP (pat, diff_vec_p, i), 0);
731 set_label_in_map (map,
732 CODE_LABEL_NUMBER (label),
737 else if ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)))
738 set_label_in_map (map, CODE_LABEL_NUMBER (XEXP (note, 0)),
742 /* Allocate space for the insn map. */
744 map->insn_map = (rtx *) alloca (max_insnno * sizeof (rtx));
746 /* Set this to zero, to indicate that we are doing loop unrolling,
747 not function inlining. */
748 map->inline_target = 0;
750 /* The register and constant maps depend on the number of registers
751 present, so the final maps can't be created until after
752 find_splittable_regs is called. However, they are needed for
753 preconditioning, so we create temporary maps when preconditioning
756 /* The preconditioning code may allocate two new pseudo registers. */
757 maxregnum = max_reg_num ();
759 /* local_regno is only valid for regnos < max_local_regnum. */
760 max_local_regnum = maxregnum;
762 /* Allocate and zero out the splittable_regs and addr_combined_regs
763 arrays. These must be zeroed here because they will be used if
764 loop preconditioning is performed, and must be zero for that case.
766 It is safe to do this here, since the extra registers created by the
767 preconditioning code and find_splittable_regs will never be used
768 to access the splittable_regs[] and addr_combined_regs[] arrays. */
770 splittable_regs = (rtx *) alloca (maxregnum * sizeof (rtx));
771 bzero ((char *) splittable_regs, maxregnum * sizeof (rtx));
772 derived_regs = alloca (maxregnum);
773 bzero (derived_regs, maxregnum);
774 splittable_regs_updates = (int *) alloca (maxregnum * sizeof (int));
775 bzero ((char *) splittable_regs_updates, maxregnum * sizeof (int));
777 = (struct induction **) alloca (maxregnum * sizeof (struct induction *));
778 bzero ((char *) addr_combined_regs, maxregnum * sizeof (struct induction *));
779 local_regno = (char *) alloca (maxregnum);
780 bzero (local_regno, maxregnum);
782 /* Mark all local registers, i.e. the ones which are referenced only
784 if (INSN_UID (copy_end) < max_uid_for_loop)
786 int copy_start_luid = INSN_LUID (copy_start);
787 int copy_end_luid = INSN_LUID (copy_end);
789 /* If a register is used in the jump insn, we must not duplicate it
790 since it will also be used outside the loop. */
791 if (GET_CODE (copy_end) == JUMP_INSN)
794 /* If we have a target that uses cc0, then we also must not duplicate
795 the insn that sets cc0 before the jump insn. */
797 if (GET_CODE (copy_end) == JUMP_INSN)
801 /* If copy_start points to the NOTE that starts the loop, then we must
802 use the next luid, because invariant pseudo-regs moved out of the loop
803 have their lifetimes modified to start here, but they are not safe
805 if (copy_start == loop_start)
808 /* If a pseudo's lifetime is entirely contained within this loop, then we
809 can use a different pseudo in each unrolled copy of the loop. This
810 results in better code. */
811 /* We must limit the generic test to max_reg_before_loop, because only
812 these pseudo registers have valid regno_first_uid info. */
813 for (j = FIRST_PSEUDO_REGISTER; j < max_reg_before_loop; ++j)
814 if (REGNO_FIRST_UID (j) > 0 && REGNO_FIRST_UID (j) <= max_uid_for_loop
815 && uid_luid[REGNO_FIRST_UID (j)] >= copy_start_luid
816 && REGNO_LAST_UID (j) > 0 && REGNO_LAST_UID (j) <= max_uid_for_loop
817 && uid_luid[REGNO_LAST_UID (j)] <= copy_end_luid)
819 /* However, we must also check for loop-carried dependencies.
820 If the value the pseudo has at the end of iteration X is
821 used by iteration X+1, then we can not use a different pseudo
822 for each unrolled copy of the loop. */
823 /* A pseudo is safe if regno_first_uid is a set, and this
824 set dominates all instructions from regno_first_uid to
826 /* ??? This check is simplistic. We would get better code if
827 this check was more sophisticated. */
828 if (set_dominates_use (j, REGNO_FIRST_UID (j), REGNO_LAST_UID (j),
829 copy_start, copy_end))
832 if (loop_dump_stream)
835 fprintf (loop_dump_stream, "Marked reg %d as local\n", j);
837 fprintf (loop_dump_stream, "Did not mark reg %d as local\n",
841 /* Givs that have been created from multiple biv increments always have
843 for (j = first_increment_giv; j <= last_increment_giv; j++)
846 if (loop_dump_stream)
847 fprintf (loop_dump_stream, "Marked reg %d as local\n", j);
851 /* If this loop requires exit tests when unrolled, check to see if we
852 can precondition the loop so as to make the exit tests unnecessary.
853 Just like variable splitting, this is not safe if the loop is entered
854 via a jump to the bottom. Also, can not do this if no strength
855 reduce info, because precondition_loop_p uses this info. */
857 /* Must copy the loop body for preconditioning before the following
858 find_splittable_regs call since that will emit insns which need to
859 be after the preconditioned loop copies, but immediately before the
860 unrolled loop copies. */
862 /* Also, it is not safe to split induction variables for the preconditioned
863 copies of the loop body. If we split induction variables, then the code
864 assumes that each induction variable can be represented as a function
865 of its initial value and the loop iteration number. This is not true
866 in this case, because the last preconditioned copy of the loop body
867 could be any iteration from the first up to the `unroll_number-1'th,
868 depending on the initial value of the iteration variable. Therefore
869 we can not split induction variables here, because we can not calculate
870 their value. Hence, this code must occur before find_splittable_regs
873 if (unroll_type == UNROLL_NAIVE && ! splitting_not_safe && strength_reduce_p)
875 rtx initial_value, final_value, increment;
876 enum machine_mode mode;
878 if (precondition_loop_p (loop_start, loop_info,
879 &initial_value, &final_value, &increment,
884 int abs_inc, neg_inc;
886 map->reg_map = (rtx *) alloca (maxregnum * sizeof (rtx));
888 VARRAY_CONST_EQUIV_INIT (map->const_equiv_varray, maxregnum,
890 global_const_equiv_varray = map->const_equiv_varray;
892 init_reg_map (map, maxregnum);
894 /* Limit loop unrolling to 4, since this will make 7 copies of
896 if (unroll_number > 4)
899 /* Save the absolute value of the increment, and also whether or
900 not it is negative. */
902 abs_inc = INTVAL (increment);
911 /* Calculate the difference between the final and initial values.
912 Final value may be a (plus (reg x) (const_int 1)) rtx.
913 Let the following cse pass simplify this if initial value is
916 We must copy the final and initial values here to avoid
917 improperly shared rtl. */
919 diff = expand_binop (mode, sub_optab, copy_rtx (final_value),
920 copy_rtx (initial_value), NULL_RTX, 0,
923 /* Now calculate (diff % (unroll * abs (increment))) by using an
925 diff = expand_binop (GET_MODE (diff), and_optab, diff,
926 GEN_INT (unroll_number * abs_inc - 1),
927 NULL_RTX, 0, OPTAB_LIB_WIDEN);
929 /* Now emit a sequence of branches to jump to the proper precond
932 labels = (rtx *) alloca (sizeof (rtx) * unroll_number);
933 for (i = 0; i < unroll_number; i++)
934 labels[i] = gen_label_rtx ();
936 /* Check for the case where the initial value is greater than or
937 equal to the final value. In that case, we want to execute
938 exactly one loop iteration. The code below will fail for this
939 case. This check does not apply if the loop has a NE
940 comparison at the end. */
942 if (loop_info->comparison_code != NE)
944 emit_cmp_and_jump_insns (initial_value, final_value,
946 NULL_RTX, mode, 0, 0, labels[1]);
947 JUMP_LABEL (get_last_insn ()) = labels[1];
948 LABEL_NUSES (labels[1])++;
951 /* Assuming the unroll_number is 4, and the increment is 2, then
952 for a negative increment: for a positive increment:
953 diff = 0,1 precond 0 diff = 0,7 precond 0
954 diff = 2,3 precond 3 diff = 1,2 precond 1
955 diff = 4,5 precond 2 diff = 3,4 precond 2
956 diff = 6,7 precond 1 diff = 5,6 precond 3 */
958 /* We only need to emit (unroll_number - 1) branches here, the
959 last case just falls through to the following code. */
961 /* ??? This would give better code if we emitted a tree of branches
962 instead of the current linear list of branches. */
964 for (i = 0; i < unroll_number - 1; i++)
967 enum rtx_code cmp_code;
969 /* For negative increments, must invert the constant compared
970 against, except when comparing against zero. */
978 cmp_const = unroll_number - i;
987 emit_cmp_and_jump_insns (diff, GEN_INT (abs_inc * cmp_const),
988 cmp_code, NULL_RTX, mode, 0, 0,
990 JUMP_LABEL (get_last_insn ()) = labels[i];
991 LABEL_NUSES (labels[i])++;
994 /* If the increment is greater than one, then we need another branch,
995 to handle other cases equivalent to 0. */
997 /* ??? This should be merged into the code above somehow to help
998 simplify the code here, and reduce the number of branches emitted.
999 For the negative increment case, the branch here could easily
1000 be merged with the `0' case branch above. For the positive
1001 increment case, it is not clear how this can be simplified. */
1006 enum rtx_code cmp_code;
1010 cmp_const = abs_inc - 1;
1015 cmp_const = abs_inc * (unroll_number - 1) + 1;
1019 emit_cmp_and_jump_insns (diff, GEN_INT (cmp_const), cmp_code,
1020 NULL_RTX, mode, 0, 0, labels[0]);
1021 JUMP_LABEL (get_last_insn ()) = labels[0];
1022 LABEL_NUSES (labels[0])++;
1025 sequence = gen_sequence ();
1027 emit_insn_before (sequence, loop_start);
1029 /* Only the last copy of the loop body here needs the exit
1030 test, so set copy_end to exclude the compare/branch here,
1031 and then reset it inside the loop when get to the last
1034 if (GET_CODE (last_loop_insn) == BARRIER)
1035 copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
1036 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
1039 /* The immediately preceding insn is a compare which we do not
1041 copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
1043 /* The immediately preceding insn may not be a compare, so we
1045 copy_end = PREV_INSN (last_loop_insn);
1051 for (i = 1; i < unroll_number; i++)
1053 emit_label_after (labels[unroll_number - i],
1054 PREV_INSN (loop_start));
1056 bzero ((char *) map->insn_map, max_insnno * sizeof (rtx));
1057 bzero ((char *) &VARRAY_CONST_EQUIV (map->const_equiv_varray, 0),
1058 (VARRAY_SIZE (map->const_equiv_varray)
1059 * sizeof (struct const_equiv_data)));
1062 for (j = 0; j < max_labelno; j++)
1064 set_label_in_map (map, j, gen_label_rtx ());
1066 for (j = FIRST_PSEUDO_REGISTER; j < max_local_regnum; j++)
1069 map->reg_map[j] = gen_reg_rtx (GET_MODE (regno_reg_rtx[j]));
1070 record_base_value (REGNO (map->reg_map[j]),
1071 regno_reg_rtx[j], 0);
1073 /* The last copy needs the compare/branch insns at the end,
1074 so reset copy_end here if the loop ends with a conditional
1077 if (i == unroll_number - 1)
1079 if (GET_CODE (last_loop_insn) == BARRIER)
1080 copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
1082 copy_end = last_loop_insn;
1085 /* None of the copies are the `last_iteration', so just
1086 pass zero for that parameter. */
1087 copy_loop_body (copy_start, copy_end, map, exit_label, 0,
1088 unroll_type, start_label, loop_end,
1089 loop_start, copy_end);
1091 emit_label_after (labels[0], PREV_INSN (loop_start));
1093 if (GET_CODE (last_loop_insn) == BARRIER)
1095 insert_before = PREV_INSN (last_loop_insn);
1096 copy_end = PREV_INSN (insert_before);
1101 /* The immediately preceding insn is a compare which we do not
1103 insert_before = PREV_INSN (last_loop_insn);
1104 copy_end = PREV_INSN (insert_before);
1106 /* The immediately preceding insn may not be a compare, so we
1108 insert_before = last_loop_insn;
1109 copy_end = PREV_INSN (last_loop_insn);
1113 /* Set unroll type to MODULO now. */
1114 unroll_type = UNROLL_MODULO;
1115 loop_preconditioned = 1;
1119 /* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll
1120 the loop unless all loops are being unrolled. */
1121 if (unroll_type == UNROLL_NAIVE && ! flag_unroll_all_loops)
1123 if (loop_dump_stream)
1124 fprintf (loop_dump_stream, "Unrolling failure: Naive unrolling not being done.\n");
1128 /* At this point, we are guaranteed to unroll the loop. */
1130 /* Keep track of the unroll factor for the loop. */
1131 if (unroll_type == UNROLL_COMPLETELY)
1132 loop_info->unroll_number = -1;
1134 loop_info->unroll_number = unroll_number;
1137 /* For each biv and giv, determine whether it can be safely split into
1138 a different variable for each unrolled copy of the loop body.
1139 We precalculate and save this info here, since computing it is
1142 Do this before deleting any instructions from the loop, so that
1143 back_branch_in_range_p will work correctly. */
1145 if (splitting_not_safe)
1148 temp = find_splittable_regs (unroll_type, loop_start, loop_end,
1149 end_insert_before, unroll_number,
1150 loop_info->n_iterations);
1152 /* find_splittable_regs may have created some new registers, so must
1153 reallocate the reg_map with the new larger size, and must realloc
1154 the constant maps also. */
1156 maxregnum = max_reg_num ();
1157 map->reg_map = (rtx *) alloca (maxregnum * sizeof (rtx));
1159 init_reg_map (map, maxregnum);
1161 if (map->const_equiv_varray == 0)
1162 VARRAY_CONST_EQUIV_INIT (map->const_equiv_varray,
1163 maxregnum + temp * unroll_number * 2,
1165 global_const_equiv_varray = map->const_equiv_varray;
1167 /* Search the list of bivs and givs to find ones which need to be remapped
1168 when split, and set their reg_map entry appropriately. */
1170 for (bl = loop_iv_list; bl; bl = bl->next)
1172 if (REGNO (bl->biv->src_reg) != bl->regno)
1173 map->reg_map[bl->regno] = bl->biv->src_reg;
1175 /* Currently, non-reduced/final-value givs are never split. */
1176 for (v = bl->giv; v; v = v->next_iv)
1177 if (REGNO (v->src_reg) != bl->regno)
1178 map->reg_map[REGNO (v->dest_reg)] = v->src_reg;
1182 /* Use our current register alignment and pointer flags. */
1183 map->regno_pointer_flag = regno_pointer_flag;
1184 map->regno_pointer_align = regno_pointer_align;
1186 /* If the loop is being partially unrolled, and the iteration variables
1187 are being split, and are being renamed for the split, then must fix up
1188 the compare/jump instruction at the end of the loop to refer to the new
1189 registers. This compare isn't copied, so the registers used in it
1190 will never be replaced if it isn't done here. */
1192 if (unroll_type == UNROLL_MODULO)
1194 insn = NEXT_INSN (copy_end);
1195 if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN)
1196 PATTERN (insn) = remap_split_bivs (PATTERN (insn));
1199 /* For unroll_number times, make a copy of each instruction
1200 between copy_start and copy_end, and insert these new instructions
1201 before the end of the loop. */
1203 for (i = 0; i < unroll_number; i++)
1205 bzero ((char *) map->insn_map, max_insnno * sizeof (rtx));
1206 bzero ((char *) &VARRAY_CONST_EQUIV (map->const_equiv_varray, 0),
1207 VARRAY_SIZE (map->const_equiv_varray) * sizeof (struct const_equiv_data));
1210 for (j = 0; j < max_labelno; j++)
1212 set_label_in_map (map, j, gen_label_rtx ());
1214 for (j = FIRST_PSEUDO_REGISTER; j < max_local_regnum; j++)
1217 map->reg_map[j] = gen_reg_rtx (GET_MODE (regno_reg_rtx[j]));
1218 record_base_value (REGNO (map->reg_map[j]),
1219 regno_reg_rtx[j], 0);
1222 /* If loop starts with a branch to the test, then fix it so that
1223 it points to the test of the first unrolled copy of the loop. */
1224 if (i == 0 && loop_start != copy_start)
1226 insn = PREV_INSN (copy_start);
1227 pattern = PATTERN (insn);
1229 tem = get_label_from_map (map,
1231 (XEXP (SET_SRC (pattern), 0)));
1232 SET_SRC (pattern) = gen_rtx_LABEL_REF (VOIDmode, tem);
1234 /* Set the jump label so that it can be used by later loop unrolling
1236 JUMP_LABEL (insn) = tem;
1237 LABEL_NUSES (tem)++;
1240 copy_loop_body (copy_start, copy_end, map, exit_label,
1241 i == unroll_number - 1, unroll_type, start_label,
1242 loop_end, insert_before, insert_before);
1245 /* Before deleting any insns, emit a CODE_LABEL immediately after the last
1246 insn to be deleted. This prevents any runaway delete_insn call from
1247 more insns that it should, as it always stops at a CODE_LABEL. */
1249 /* Delete the compare and branch at the end of the loop if completely
1250 unrolling the loop. Deleting the backward branch at the end also
1251 deletes the code label at the start of the loop. This is done at
1252 the very end to avoid problems with back_branch_in_range_p. */
1254 if (unroll_type == UNROLL_COMPLETELY)
1255 safety_label = emit_label_after (gen_label_rtx (), last_loop_insn);
1257 safety_label = emit_label_after (gen_label_rtx (), copy_end);
1259 /* Delete all of the original loop instructions. Don't delete the
1260 LOOP_BEG note, or the first code label in the loop. */
1262 insn = NEXT_INSN (copy_start);
1263 while (insn != safety_label)
1265 /* ??? Don't delete named code labels. They will be deleted when the
1266 jump that references them is deleted. Otherwise, we end up deleting
1267 them twice, which causes them to completely disappear instead of turn
1268 into NOTE_INSN_DELETED_LABEL notes. This in turn causes aborts in
1269 dwarfout.c/dwarf2out.c. We could perhaps fix the dwarf*out.c files
1270 to handle deleted labels instead. Or perhaps fix DECL_RTL of the
1271 associated LABEL_DECL to point to one of the new label instances. */
1272 /* ??? Likewise, we can't delete a NOTE_INSN_DELETED_LABEL note. */
1273 if (insn != start_label
1274 && ! (GET_CODE (insn) == CODE_LABEL && LABEL_NAME (insn))
1275 && ! (GET_CODE (insn) == NOTE
1276 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL))
1277 insn = delete_insn (insn);
1279 insn = NEXT_INSN (insn);
1282 /* Can now delete the 'safety' label emitted to protect us from runaway
1283 delete_insn calls. */
1284 if (INSN_DELETED_P (safety_label))
1286 delete_insn (safety_label);
1288 /* If exit_label exists, emit it after the loop. Doing the emit here
1289 forces it to have a higher INSN_UID than any insn in the unrolled loop.
1290 This is needed so that mostly_true_jump in reorg.c will treat jumps
1291 to this loop end label correctly, i.e. predict that they are usually
1294 emit_label_after (exit_label, loop_end);
1297 if (map && map->const_equiv_varray)
1298 VARRAY_FREE (map->const_equiv_varray);
1301 /* Return true if the loop can be safely, and profitably, preconditioned
1302 so that the unrolled copies of the loop body don't need exit tests.
1304 This only works if final_value, initial_value and increment can be
1305 determined, and if increment is a constant power of 2.
1306 If increment is not a power of 2, then the preconditioning modulo
1307 operation would require a real modulo instead of a boolean AND, and this
1308 is not considered `profitable'. */
1310 /* ??? If the loop is known to be executed very many times, or the machine
1311 has a very cheap divide instruction, then preconditioning is a win even
1312 when the increment is not a power of 2. Use RTX_COST to compute
1313 whether divide is cheap.
1314 ??? A divide by constant doesn't actually need a divide, look at
1315 expand_divmod. The reduced cost of this optimized modulo is not
1316 reflected in RTX_COST. */
1319 precondition_loop_p (loop_start, loop_info,
1320 initial_value, final_value, increment, mode)
1322 struct loop_info *loop_info;
1323 rtx *initial_value, *final_value, *increment;
1324 enum machine_mode *mode;
1327 if (loop_info->n_iterations > 0)
1329 *initial_value = const0_rtx;
1330 *increment = const1_rtx;
1331 *final_value = GEN_INT (loop_info->n_iterations);
1334 if (loop_dump_stream)
1336 fputs ("Preconditioning: Success, number of iterations known, ",
1338 fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC,
1339 loop_info->n_iterations);
1340 fputs (".\n", loop_dump_stream);
1345 if (loop_info->initial_value == 0)
1347 if (loop_dump_stream)
1348 fprintf (loop_dump_stream,
1349 "Preconditioning: Could not find initial value.\n");
1352 else if (loop_info->increment == 0)
1354 if (loop_dump_stream)
1355 fprintf (loop_dump_stream,
1356 "Preconditioning: Could not find increment value.\n");
1359 else if (GET_CODE (loop_info->increment) != CONST_INT)
1361 if (loop_dump_stream)
1362 fprintf (loop_dump_stream,
1363 "Preconditioning: Increment not a constant.\n");
1366 else if ((exact_log2 (INTVAL (loop_info->increment)) < 0)
1367 && (exact_log2 (- INTVAL (loop_info->increment)) < 0))
1369 if (loop_dump_stream)
1370 fprintf (loop_dump_stream,
1371 "Preconditioning: Increment not a constant power of 2.\n");
1375 /* Unsigned_compare and compare_dir can be ignored here, since they do
1376 not matter for preconditioning. */
1378 if (loop_info->final_value == 0)
1380 if (loop_dump_stream)
1381 fprintf (loop_dump_stream,
1382 "Preconditioning: EQ comparison loop.\n");
1386 /* Must ensure that final_value is invariant, so call invariant_p to
1387 check. Before doing so, must check regno against max_reg_before_loop
1388 to make sure that the register is in the range covered by invariant_p.
1389 If it isn't, then it is most likely a biv/giv which by definition are
1391 if ((GET_CODE (loop_info->final_value) == REG
1392 && REGNO (loop_info->final_value) >= max_reg_before_loop)
1393 || (GET_CODE (loop_info->final_value) == PLUS
1394 && REGNO (XEXP (loop_info->final_value, 0)) >= max_reg_before_loop)
1395 || ! invariant_p (loop_info->final_value))
1397 if (loop_dump_stream)
1398 fprintf (loop_dump_stream,
1399 "Preconditioning: Final value not invariant.\n");
1403 /* Fail for floating point values, since the caller of this function
1404 does not have code to deal with them. */
1405 if (GET_MODE_CLASS (GET_MODE (loop_info->final_value)) == MODE_FLOAT
1406 || GET_MODE_CLASS (GET_MODE (loop_info->initial_value)) == MODE_FLOAT)
1408 if (loop_dump_stream)
1409 fprintf (loop_dump_stream,
1410 "Preconditioning: Floating point final or initial value.\n");
1414 /* Fail if loop_info->iteration_var is not live before loop_start,
1415 since we need to test its value in the preconditioning code. */
1417 if (uid_luid[REGNO_FIRST_UID (REGNO (loop_info->iteration_var))]
1418 > INSN_LUID (loop_start))
1420 if (loop_dump_stream)
1421 fprintf (loop_dump_stream,
1422 "Preconditioning: Iteration var not live before loop start.\n");
1426 /* Note that iteration_info biases the initial value for GIV iterators
1427 such as "while (i-- > 0)" so that we can calculate the number of
1428 iterations just like for BIV iterators.
1430 Also note that the absolute values of initial_value and
1431 final_value are unimportant as only their difference is used for
1432 calculating the number of loop iterations. */
1433 *initial_value = loop_info->initial_value;
1434 *increment = loop_info->increment;
1435 *final_value = loop_info->final_value;
1437 /* Decide what mode to do these calculations in. Choose the larger
1438 of final_value's mode and initial_value's mode, or a full-word if
1439 both are constants. */
1440 *mode = GET_MODE (*final_value);
1441 if (*mode == VOIDmode)
1443 *mode = GET_MODE (*initial_value);
1444 if (*mode == VOIDmode)
1447 else if (*mode != GET_MODE (*initial_value)
1448 && (GET_MODE_SIZE (*mode)
1449 < GET_MODE_SIZE (GET_MODE (*initial_value))))
1450 *mode = GET_MODE (*initial_value);
1453 if (loop_dump_stream)
1454 fprintf (loop_dump_stream, "Preconditioning: Successful.\n");
1459 /* All pseudo-registers must be mapped to themselves. Two hard registers
1460 must be mapped, VIRTUAL_STACK_VARS_REGNUM and VIRTUAL_INCOMING_ARGS_
1461 REGNUM, to avoid function-inlining specific conversions of these
1462 registers. All other hard regs can not be mapped because they may be
1467 init_reg_map (map, maxregnum)
1468 struct inline_remap *map;
1473 for (i = maxregnum - 1; i > LAST_VIRTUAL_REGISTER; i--)
1474 map->reg_map[i] = regno_reg_rtx[i];
1475 /* Just clear the rest of the entries. */
1476 for (i = LAST_VIRTUAL_REGISTER; i >= 0; i--)
1477 map->reg_map[i] = 0;
1479 map->reg_map[VIRTUAL_STACK_VARS_REGNUM]
1480 = regno_reg_rtx[VIRTUAL_STACK_VARS_REGNUM];
1481 map->reg_map[VIRTUAL_INCOMING_ARGS_REGNUM]
1482 = regno_reg_rtx[VIRTUAL_INCOMING_ARGS_REGNUM];
1485 /* Strength-reduction will often emit code for optimized biv/givs which
1486 calculates their value in a temporary register, and then copies the result
1487 to the iv. This procedure reconstructs the pattern computing the iv;
1488 verifying that all operands are of the proper form.
1490 PATTERN must be the result of single_set.
1491 The return value is the amount that the giv is incremented by. */
1494 calculate_giv_inc (pattern, src_insn, regno)
1495 rtx pattern, src_insn;
1499 rtx increment_total = 0;
1503 /* Verify that we have an increment insn here. First check for a plus
1504 as the set source. */
1505 if (GET_CODE (SET_SRC (pattern)) != PLUS)
1507 /* SR sometimes computes the new giv value in a temp, then copies it
1509 src_insn = PREV_INSN (src_insn);
1510 pattern = PATTERN (src_insn);
1511 if (GET_CODE (SET_SRC (pattern)) != PLUS)
1514 /* The last insn emitted is not needed, so delete it to avoid confusing
1515 the second cse pass. This insn sets the giv unnecessarily. */
1516 delete_insn (get_last_insn ());
1519 /* Verify that we have a constant as the second operand of the plus. */
1520 increment = XEXP (SET_SRC (pattern), 1);
1521 if (GET_CODE (increment) != CONST_INT)
1523 /* SR sometimes puts the constant in a register, especially if it is
1524 too big to be an add immed operand. */
1525 src_insn = PREV_INSN (src_insn);
1526 increment = SET_SRC (PATTERN (src_insn));
1528 /* SR may have used LO_SUM to compute the constant if it is too large
1529 for a load immed operand. In this case, the constant is in operand
1530 one of the LO_SUM rtx. */
1531 if (GET_CODE (increment) == LO_SUM)
1532 increment = XEXP (increment, 1);
1534 /* Some ports store large constants in memory and add a REG_EQUAL
1535 note to the store insn. */
1536 else if (GET_CODE (increment) == MEM)
1538 rtx note = find_reg_note (src_insn, REG_EQUAL, 0);
1540 increment = XEXP (note, 0);
1543 else if (GET_CODE (increment) == IOR
1544 || GET_CODE (increment) == ASHIFT
1545 || GET_CODE (increment) == PLUS)
1547 /* The rs6000 port loads some constants with IOR.
1548 The alpha port loads some constants with ASHIFT and PLUS. */
1549 rtx second_part = XEXP (increment, 1);
1550 enum rtx_code code = GET_CODE (increment);
1552 src_insn = PREV_INSN (src_insn);
1553 increment = SET_SRC (PATTERN (src_insn));
1554 /* Don't need the last insn anymore. */
1555 delete_insn (get_last_insn ());
1557 if (GET_CODE (second_part) != CONST_INT
1558 || GET_CODE (increment) != CONST_INT)
1562 increment = GEN_INT (INTVAL (increment) | INTVAL (second_part));
1563 else if (code == PLUS)
1564 increment = GEN_INT (INTVAL (increment) + INTVAL (second_part));
1566 increment = GEN_INT (INTVAL (increment) << INTVAL (second_part));
1569 if (GET_CODE (increment) != CONST_INT)
1572 /* The insn loading the constant into a register is no longer needed,
1574 delete_insn (get_last_insn ());
1577 if (increment_total)
1578 increment_total = GEN_INT (INTVAL (increment_total) + INTVAL (increment));
1580 increment_total = increment;
1582 /* Check that the source register is the same as the register we expected
1583 to see as the source. If not, something is seriously wrong. */
1584 if (GET_CODE (XEXP (SET_SRC (pattern), 0)) != REG
1585 || REGNO (XEXP (SET_SRC (pattern), 0)) != regno)
1587 /* Some machines (e.g. the romp), may emit two add instructions for
1588 certain constants, so lets try looking for another add immediately
1589 before this one if we have only seen one add insn so far. */
1595 src_insn = PREV_INSN (src_insn);
1596 pattern = PATTERN (src_insn);
1598 delete_insn (get_last_insn ());
1606 return increment_total;
1609 /* Copy REG_NOTES, except for insn references, because not all insn_map
1610 entries are valid yet. We do need to copy registers now though, because
1611 the reg_map entries can change during copying. */
1614 initial_reg_note_copy (notes, map)
1616 struct inline_remap *map;
1623 copy = rtx_alloc (GET_CODE (notes));
1624 PUT_MODE (copy, GET_MODE (notes));
1626 if (GET_CODE (notes) == EXPR_LIST)
1627 XEXP (copy, 0) = copy_rtx_and_substitute (XEXP (notes, 0), map);
1628 else if (GET_CODE (notes) == INSN_LIST)
1629 /* Don't substitute for these yet. */
1630 XEXP (copy, 0) = XEXP (notes, 0);
1634 XEXP (copy, 1) = initial_reg_note_copy (XEXP (notes, 1), map);
1639 /* Fixup insn references in copied REG_NOTES. */
1642 final_reg_note_copy (notes, map)
1644 struct inline_remap *map;
1648 for (note = notes; note; note = XEXP (note, 1))
1649 if (GET_CODE (note) == INSN_LIST)
1650 XEXP (note, 0) = map->insn_map[INSN_UID (XEXP (note, 0))];
1653 /* Copy each instruction in the loop, substituting from map as appropriate.
1654 This is very similar to a loop in expand_inline_function. */
1657 copy_loop_body (copy_start, copy_end, map, exit_label, last_iteration,
1658 unroll_type, start_label, loop_end, insert_before,
1660 rtx copy_start, copy_end;
1661 struct inline_remap *map;
1664 enum unroll_types unroll_type;
1665 rtx start_label, loop_end, insert_before, copy_notes_from;
1669 int dest_reg_was_split, i;
1673 rtx final_label = 0;
1674 rtx giv_inc, giv_dest_reg, giv_src_reg;
1676 /* If this isn't the last iteration, then map any references to the
1677 start_label to final_label. Final label will then be emitted immediately
1678 after the end of this loop body if it was ever used.
1680 If this is the last iteration, then map references to the start_label
1682 if (! last_iteration)
1684 final_label = gen_label_rtx ();
1685 set_label_in_map (map, CODE_LABEL_NUMBER (start_label),
1689 set_label_in_map (map, CODE_LABEL_NUMBER (start_label), start_label);
1693 /* Emit a NOTE_INSN_DELETED to force at least two insns onto the sequence.
1694 Else gen_sequence could return a raw pattern for a jump which we pass
1695 off to emit_insn_before (instead of emit_jump_insn_before) which causes
1696 a variety of losing behaviors later. */
1697 emit_note (0, NOTE_INSN_DELETED);
1702 insn = NEXT_INSN (insn);
1704 map->orig_asm_operands_vector = 0;
1706 switch (GET_CODE (insn))
1709 pattern = PATTERN (insn);
1713 /* Check to see if this is a giv that has been combined with
1714 some split address givs. (Combined in the sense that
1715 `combine_givs' in loop.c has put two givs in the same register.)
1716 In this case, we must search all givs based on the same biv to
1717 find the address givs. Then split the address givs.
1718 Do this before splitting the giv, since that may map the
1719 SET_DEST to a new register. */
1721 if ((set = single_set (insn))
1722 && GET_CODE (SET_DEST (set)) == REG
1723 && addr_combined_regs[REGNO (SET_DEST (set))])
1725 struct iv_class *bl;
1726 struct induction *v, *tv;
1727 int regno = REGNO (SET_DEST (set));
1729 v = addr_combined_regs[REGNO (SET_DEST (set))];
1730 bl = reg_biv_class[REGNO (v->src_reg)];
1732 /* Although the giv_inc amount is not needed here, we must call
1733 calculate_giv_inc here since it might try to delete the
1734 last insn emitted. If we wait until later to call it,
1735 we might accidentally delete insns generated immediately
1736 below by emit_unrolled_add. */
1738 if (! derived_regs[regno])
1739 giv_inc = calculate_giv_inc (set, insn, regno);
1741 /* Now find all address giv's that were combined with this
1743 for (tv = bl->giv; tv; tv = tv->next_iv)
1744 if (tv->giv_type == DEST_ADDR && tv->same == v)
1748 /* If this DEST_ADDR giv was not split, then ignore it. */
1749 if (*tv->location != tv->dest_reg)
1752 /* Scale this_giv_inc if the multiplicative factors of
1753 the two givs are different. */
1754 this_giv_inc = INTVAL (giv_inc);
1755 if (tv->mult_val != v->mult_val)
1756 this_giv_inc = (this_giv_inc / INTVAL (v->mult_val)
1757 * INTVAL (tv->mult_val));
1759 tv->dest_reg = plus_constant (tv->dest_reg, this_giv_inc);
1760 *tv->location = tv->dest_reg;
1762 if (last_iteration && unroll_type != UNROLL_COMPLETELY)
1764 /* Must emit an insn to increment the split address
1765 giv. Add in the const_adjust field in case there
1766 was a constant eliminated from the address. */
1767 rtx value, dest_reg;
1769 /* tv->dest_reg will be either a bare register,
1770 or else a register plus a constant. */
1771 if (GET_CODE (tv->dest_reg) == REG)
1772 dest_reg = tv->dest_reg;
1774 dest_reg = XEXP (tv->dest_reg, 0);
1776 /* Check for shared address givs, and avoid
1777 incrementing the shared pseudo reg more than
1779 if (! tv->same_insn && ! tv->shared)
1781 /* tv->dest_reg may actually be a (PLUS (REG)
1782 (CONST)) here, so we must call plus_constant
1783 to add the const_adjust amount before calling
1784 emit_unrolled_add below. */
1785 value = plus_constant (tv->dest_reg,
1788 /* The constant could be too large for an add
1789 immediate, so can't directly emit an insn
1791 emit_unrolled_add (dest_reg, XEXP (value, 0),
1795 /* Reset the giv to be just the register again, in case
1796 it is used after the set we have just emitted.
1797 We must subtract the const_adjust factor added in
1799 tv->dest_reg = plus_constant (dest_reg,
1800 - tv->const_adjust);
1801 *tv->location = tv->dest_reg;
1806 /* If this is a setting of a splittable variable, then determine
1807 how to split the variable, create a new set based on this split,
1808 and set up the reg_map so that later uses of the variable will
1809 use the new split variable. */
1811 dest_reg_was_split = 0;
1813 if ((set = single_set (insn))
1814 && GET_CODE (SET_DEST (set)) == REG
1815 && splittable_regs[REGNO (SET_DEST (set))])
1817 int regno = REGNO (SET_DEST (set));
1820 dest_reg_was_split = 1;
1822 giv_dest_reg = SET_DEST (set);
1823 if (derived_regs[regno])
1825 /* ??? This relies on SET_SRC (SET) to be of
1826 the form (plus (reg) (const_int)), and thus
1827 forces recombine_givs to restrict the kind
1828 of giv derivations it does before unrolling. */
1829 giv_src_reg = XEXP (SET_SRC (set), 0);
1830 giv_inc = XEXP (SET_SRC (set), 1);
1834 giv_src_reg = giv_dest_reg;
1835 /* Compute the increment value for the giv, if it wasn't
1836 already computed above. */
1838 giv_inc = calculate_giv_inc (set, insn, regno);
1840 src_regno = REGNO (giv_src_reg);
1842 if (unroll_type == UNROLL_COMPLETELY)
1844 /* Completely unrolling the loop. Set the induction
1845 variable to a known constant value. */
1847 /* The value in splittable_regs may be an invariant
1848 value, so we must use plus_constant here. */
1849 splittable_regs[regno]
1850 = plus_constant (splittable_regs[src_regno],
1853 if (GET_CODE (splittable_regs[regno]) == PLUS)
1855 giv_src_reg = XEXP (splittable_regs[regno], 0);
1856 giv_inc = XEXP (splittable_regs[regno], 1);
1860 /* The splittable_regs value must be a REG or a
1861 CONST_INT, so put the entire value in the giv_src_reg
1863 giv_src_reg = splittable_regs[regno];
1864 giv_inc = const0_rtx;
1869 /* Partially unrolling loop. Create a new pseudo
1870 register for the iteration variable, and set it to
1871 be a constant plus the original register. Except
1872 on the last iteration, when the result has to
1873 go back into the original iteration var register. */
1875 /* Handle bivs which must be mapped to a new register
1876 when split. This happens for bivs which need their
1877 final value set before loop entry. The new register
1878 for the biv was stored in the biv's first struct
1879 induction entry by find_splittable_regs. */
1881 if (regno < max_reg_before_loop
1882 && REG_IV_TYPE (regno) == BASIC_INDUCT)
1884 giv_src_reg = reg_biv_class[regno]->biv->src_reg;
1885 giv_dest_reg = giv_src_reg;
1889 /* If non-reduced/final-value givs were split, then
1890 this would have to remap those givs also. See
1891 find_splittable_regs. */
1894 splittable_regs[regno]
1895 = GEN_INT (INTVAL (giv_inc)
1896 + INTVAL (splittable_regs[src_regno]));
1897 giv_inc = splittable_regs[regno];
1899 /* Now split the induction variable by changing the dest
1900 of this insn to a new register, and setting its
1901 reg_map entry to point to this new register.
1903 If this is the last iteration, and this is the last insn
1904 that will update the iv, then reuse the original dest,
1905 to ensure that the iv will have the proper value when
1906 the loop exits or repeats.
1908 Using splittable_regs_updates here like this is safe,
1909 because it can only be greater than one if all
1910 instructions modifying the iv are always executed in
1913 if (! last_iteration
1914 || (splittable_regs_updates[regno]-- != 1))
1916 tem = gen_reg_rtx (GET_MODE (giv_src_reg));
1918 map->reg_map[regno] = tem;
1919 record_base_value (REGNO (tem),
1920 giv_inc == const0_rtx
1922 : gen_rtx_PLUS (GET_MODE (giv_src_reg),
1923 giv_src_reg, giv_inc),
1927 map->reg_map[regno] = giv_src_reg;
1930 /* The constant being added could be too large for an add
1931 immediate, so can't directly emit an insn here. */
1932 emit_unrolled_add (giv_dest_reg, giv_src_reg, giv_inc);
1933 copy = get_last_insn ();
1934 pattern = PATTERN (copy);
1938 pattern = copy_rtx_and_substitute (pattern, map);
1939 copy = emit_insn (pattern);
1941 REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
1944 /* If this insn is setting CC0, it may need to look at
1945 the insn that uses CC0 to see what type of insn it is.
1946 In that case, the call to recog via validate_change will
1947 fail. So don't substitute constants here. Instead,
1948 do it when we emit the following insn.
1950 For example, see the pyr.md file. That machine has signed and
1951 unsigned compares. The compare patterns must check the
1952 following branch insn to see which what kind of compare to
1955 If the previous insn set CC0, substitute constants on it as
1957 if (sets_cc0_p (PATTERN (copy)) != 0)
1962 try_constants (cc0_insn, map);
1964 try_constants (copy, map);
1967 try_constants (copy, map);
1970 /* Make split induction variable constants `permanent' since we
1971 know there are no backward branches across iteration variable
1972 settings which would invalidate this. */
1973 if (dest_reg_was_split)
1975 int regno = REGNO (SET_DEST (pattern));
1977 if (regno < VARRAY_SIZE (map->const_equiv_varray)
1978 && (VARRAY_CONST_EQUIV (map->const_equiv_varray, regno).age
1980 VARRAY_CONST_EQUIV (map->const_equiv_varray, regno).age = -1;
1985 pattern = copy_rtx_and_substitute (PATTERN (insn), map);
1986 copy = emit_jump_insn (pattern);
1987 REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
1989 if (JUMP_LABEL (insn) == start_label && insn == copy_end
1990 && ! last_iteration)
1992 /* This is a branch to the beginning of the loop; this is the
1993 last insn being copied; and this is not the last iteration.
1994 In this case, we want to change the original fall through
1995 case to be a branch past the end of the loop, and the
1996 original jump label case to fall_through. */
1998 if (invert_exp (pattern, copy))
2000 if (! redirect_exp (&pattern,
2001 get_label_from_map (map,
2003 (JUMP_LABEL (insn))),
2010 rtx lab = gen_label_rtx ();
2011 /* Can't do it by reversing the jump (probably because we
2012 couldn't reverse the conditions), so emit a new
2013 jump_insn after COPY, and redirect the jump around
2015 jmp = emit_jump_insn_after (gen_jump (exit_label), copy);
2016 jmp = emit_barrier_after (jmp);
2017 emit_label_after (lab, jmp);
2018 LABEL_NUSES (lab) = 0;
2019 if (! redirect_exp (&pattern,
2020 get_label_from_map (map,
2022 (JUMP_LABEL (insn))),
2030 try_constants (cc0_insn, map);
2033 try_constants (copy, map);
2035 /* Set the jump label of COPY correctly to avoid problems with
2036 later passes of unroll_loop, if INSN had jump label set. */
2037 if (JUMP_LABEL (insn))
2041 /* Can't use the label_map for every insn, since this may be
2042 the backward branch, and hence the label was not mapped. */
2043 if ((set = single_set (copy)))
2045 tem = SET_SRC (set);
2046 if (GET_CODE (tem) == LABEL_REF)
2047 label = XEXP (tem, 0);
2048 else if (GET_CODE (tem) == IF_THEN_ELSE)
2050 if (XEXP (tem, 1) != pc_rtx)
2051 label = XEXP (XEXP (tem, 1), 0);
2053 label = XEXP (XEXP (tem, 2), 0);
2057 if (label && GET_CODE (label) == CODE_LABEL)
2058 JUMP_LABEL (copy) = label;
2061 /* An unrecognizable jump insn, probably the entry jump
2062 for a switch statement. This label must have been mapped,
2063 so just use the label_map to get the new jump label. */
2065 = get_label_from_map (map,
2066 CODE_LABEL_NUMBER (JUMP_LABEL (insn)));
2069 /* If this is a non-local jump, then must increase the label
2070 use count so that the label will not be deleted when the
2071 original jump is deleted. */
2072 LABEL_NUSES (JUMP_LABEL (copy))++;
2074 else if (GET_CODE (PATTERN (copy)) == ADDR_VEC
2075 || GET_CODE (PATTERN (copy)) == ADDR_DIFF_VEC)
2077 rtx pat = PATTERN (copy);
2078 int diff_vec_p = GET_CODE (pat) == ADDR_DIFF_VEC;
2079 int len = XVECLEN (pat, diff_vec_p);
2082 for (i = 0; i < len; i++)
2083 LABEL_NUSES (XEXP (XVECEXP (pat, diff_vec_p, i), 0))++;
2086 /* If this used to be a conditional jump insn but whose branch
2087 direction is now known, we must do something special. */
2088 if (condjump_p (insn) && !simplejump_p (insn) && map->last_pc_value)
2091 /* The previous insn set cc0 for us. So delete it. */
2092 delete_insn (PREV_INSN (copy));
2095 /* If this is now a no-op, delete it. */
2096 if (map->last_pc_value == pc_rtx)
2098 /* Don't let delete_insn delete the label referenced here,
2099 because we might possibly need it later for some other
2100 instruction in the loop. */
2101 if (JUMP_LABEL (copy))
2102 LABEL_NUSES (JUMP_LABEL (copy))++;
2104 if (JUMP_LABEL (copy))
2105 LABEL_NUSES (JUMP_LABEL (copy))--;
2109 /* Otherwise, this is unconditional jump so we must put a
2110 BARRIER after it. We could do some dead code elimination
2111 here, but jump.c will do it just as well. */
2117 pattern = copy_rtx_and_substitute (PATTERN (insn), map);
2118 copy = emit_call_insn (pattern);
2119 REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
2121 /* Because the USAGE information potentially contains objects other
2122 than hard registers, we need to copy it. */
2123 CALL_INSN_FUNCTION_USAGE (copy)
2124 = copy_rtx_and_substitute (CALL_INSN_FUNCTION_USAGE (insn), map);
2128 try_constants (cc0_insn, map);
2131 try_constants (copy, map);
2133 /* Be lazy and assume CALL_INSNs clobber all hard registers. */
2134 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
2135 VARRAY_CONST_EQUIV (map->const_equiv_varray, i).rtx = 0;
2139 /* If this is the loop start label, then we don't need to emit a
2140 copy of this label since no one will use it. */
2142 if (insn != start_label)
2144 copy = emit_label (get_label_from_map (map,
2145 CODE_LABEL_NUMBER (insn)));
2151 copy = emit_barrier ();
2155 /* VTOP and CONT notes are valid only before the loop exit test.
2156 If placed anywhere else, loop may generate bad code. */
2157 /* BASIC_BLOCK notes exist to stabilize basic block structures with
2158 the associated rtl. We do not want to share the structure in
2161 if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED
2162 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_BASIC_BLOCK
2163 && ((NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP
2164 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_CONT)
2165 || (last_iteration && unroll_type != UNROLL_COMPLETELY)))
2166 copy = emit_note (NOTE_SOURCE_FILE (insn),
2167 NOTE_LINE_NUMBER (insn));
2177 map->insn_map[INSN_UID (insn)] = copy;
2179 while (insn != copy_end);
2181 /* Now finish coping the REG_NOTES. */
2185 insn = NEXT_INSN (insn);
2186 if ((GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN
2187 || GET_CODE (insn) == CALL_INSN)
2188 && map->insn_map[INSN_UID (insn)])
2189 final_reg_note_copy (REG_NOTES (map->insn_map[INSN_UID (insn)]), map);
2191 while (insn != copy_end);
2193 /* There may be notes between copy_notes_from and loop_end. Emit a copy of
2194 each of these notes here, since there may be some important ones, such as
2195 NOTE_INSN_BLOCK_END notes, in this group. We don't do this on the last
2196 iteration, because the original notes won't be deleted.
2198 We can't use insert_before here, because when from preconditioning,
2199 insert_before points before the loop. We can't use copy_end, because
2200 there may be insns already inserted after it (which we don't want to
2201 copy) when not from preconditioning code. */
2203 if (! last_iteration)
2205 for (insn = copy_notes_from; insn != loop_end; insn = NEXT_INSN (insn))
2207 /* VTOP notes are valid only before the loop exit test.
2208 If placed anywhere else, loop may generate bad code.
2209 There is no need to test for NOTE_INSN_LOOP_CONT notes
2210 here, since COPY_NOTES_FROM will be at most one or two (for cc0)
2211 instructions before the last insn in the loop, and if the
2212 end test is that short, there will be a VTOP note between
2213 the CONT note and the test. */
2214 if (GET_CODE (insn) == NOTE
2215 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED
2216 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_BASIC_BLOCK
2217 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP)
2218 emit_note (NOTE_SOURCE_FILE (insn), NOTE_LINE_NUMBER (insn));
2222 if (final_label && LABEL_NUSES (final_label) > 0)
2223 emit_label (final_label);
2225 tem = gen_sequence ();
2227 emit_insn_before (tem, insert_before);
2230 /* Emit an insn, using the expand_binop to ensure that a valid insn is
2231 emitted. This will correctly handle the case where the increment value
2232 won't fit in the immediate field of a PLUS insns. */
2235 emit_unrolled_add (dest_reg, src_reg, increment)
2236 rtx dest_reg, src_reg, increment;
2240 result = expand_binop (GET_MODE (dest_reg), add_optab, src_reg, increment,
2241 dest_reg, 0, OPTAB_LIB_WIDEN);
2243 if (dest_reg != result)
2244 emit_move_insn (dest_reg, result);
2247 /* Searches the insns between INSN and LOOP_END. Returns 1 if there
2248 is a backward branch in that range that branches to somewhere between
2249 LOOP_START and INSN. Returns 0 otherwise. */
2251 /* ??? This is quadratic algorithm. Could be rewritten to be linear.
2252 In practice, this is not a problem, because this function is seldom called,
2253 and uses a negligible amount of CPU time on average. */
2256 back_branch_in_range_p (insn, loop_start, loop_end)
2258 rtx loop_start, loop_end;
2260 rtx p, q, target_insn;
2261 rtx orig_loop_end = loop_end;
2263 /* Stop before we get to the backward branch at the end of the loop. */
2264 loop_end = prev_nonnote_insn (loop_end);
2265 if (GET_CODE (loop_end) == BARRIER)
2266 loop_end = PREV_INSN (loop_end);
2268 /* Check in case insn has been deleted, search forward for first non
2269 deleted insn following it. */
2270 while (INSN_DELETED_P (insn))
2271 insn = NEXT_INSN (insn);
2273 /* Check for the case where insn is the last insn in the loop. Deal
2274 with the case where INSN was a deleted loop test insn, in which case
2275 it will now be the NOTE_LOOP_END. */
2276 if (insn == loop_end || insn == orig_loop_end)
2279 for (p = NEXT_INSN (insn); p != loop_end; p = NEXT_INSN (p))
2281 if (GET_CODE (p) == JUMP_INSN)
2283 target_insn = JUMP_LABEL (p);
2285 /* Search from loop_start to insn, to see if one of them is
2286 the target_insn. We can't use INSN_LUID comparisons here,
2287 since insn may not have an LUID entry. */
2288 for (q = loop_start; q != insn; q = NEXT_INSN (q))
2289 if (q == target_insn)
2297 /* Try to generate the simplest rtx for the expression
2298 (PLUS (MULT mult1 mult2) add1). This is used to calculate the initial
2302 fold_rtx_mult_add (mult1, mult2, add1, mode)
2303 rtx mult1, mult2, add1;
2304 enum machine_mode mode;
2309 /* The modes must all be the same. This should always be true. For now,
2310 check to make sure. */
2311 if ((GET_MODE (mult1) != mode && GET_MODE (mult1) != VOIDmode)
2312 || (GET_MODE (mult2) != mode && GET_MODE (mult2) != VOIDmode)
2313 || (GET_MODE (add1) != mode && GET_MODE (add1) != VOIDmode))
2316 /* Ensure that if at least one of mult1/mult2 are constant, then mult2
2317 will be a constant. */
2318 if (GET_CODE (mult1) == CONST_INT)
2325 mult_res = simplify_binary_operation (MULT, mode, mult1, mult2);
2327 mult_res = gen_rtx_MULT (mode, mult1, mult2);
2329 /* Again, put the constant second. */
2330 if (GET_CODE (add1) == CONST_INT)
2337 result = simplify_binary_operation (PLUS, mode, add1, mult_res);
2339 result = gen_rtx_PLUS (mode, add1, mult_res);
2344 /* Searches the list of induction struct's for the biv BL, to try to calculate
2345 the total increment value for one iteration of the loop as a constant.
2347 Returns the increment value as an rtx, simplified as much as possible,
2348 if it can be calculated. Otherwise, returns 0. */
2351 biv_total_increment (bl, loop_start, loop_end)
2352 struct iv_class *bl;
2353 rtx loop_start, loop_end;
2355 struct induction *v;
2358 /* For increment, must check every instruction that sets it. Each
2359 instruction must be executed only once each time through the loop.
2360 To verify this, we check that the insn is always executed, and that
2361 there are no backward branches after the insn that branch to before it.
2362 Also, the insn must have a mult_val of one (to make sure it really is
2365 result = const0_rtx;
2366 for (v = bl->biv; v; v = v->next_iv)
2368 if (v->always_computable && v->mult_val == const1_rtx
2369 && ! v->maybe_multiple)
2370 result = fold_rtx_mult_add (result, const1_rtx, v->add_val, v->mode);
2378 /* Determine the initial value of the iteration variable, and the amount
2379 that it is incremented each loop. Use the tables constructed by
2380 the strength reduction pass to calculate these values.
2382 Initial_value and/or increment are set to zero if their values could not
2386 iteration_info (iteration_var, initial_value, increment, loop_start, loop_end)
2387 rtx iteration_var, *initial_value, *increment;
2388 rtx loop_start, loop_end;
2390 struct iv_class *bl;
2392 struct induction *v;
2395 /* Clear the result values, in case no answer can be found. */
2399 /* The iteration variable can be either a giv or a biv. Check to see
2400 which it is, and compute the variable's initial value, and increment
2401 value if possible. */
2403 /* If this is a new register, can't handle it since we don't have any
2404 reg_iv_type entry for it. */
2405 if ((unsigned) REGNO (iteration_var) >= reg_iv_type->num_elements)
2407 if (loop_dump_stream)
2408 fprintf (loop_dump_stream,
2409 "Loop unrolling: No reg_iv_type entry for iteration var.\n");
2413 /* Reject iteration variables larger than the host wide int size, since they
2414 could result in a number of iterations greater than the range of our
2415 `unsigned HOST_WIDE_INT' variable loop_info->n_iterations. */
2416 else if ((GET_MODE_BITSIZE (GET_MODE (iteration_var))
2417 > HOST_BITS_PER_WIDE_INT))
2419 if (loop_dump_stream)
2420 fprintf (loop_dump_stream,
2421 "Loop unrolling: Iteration var rejected because mode too large.\n");
2424 else if (GET_MODE_CLASS (GET_MODE (iteration_var)) != MODE_INT)
2426 if (loop_dump_stream)
2427 fprintf (loop_dump_stream,
2428 "Loop unrolling: Iteration var not an integer.\n");
2431 else if (REG_IV_TYPE (REGNO (iteration_var)) == BASIC_INDUCT)
2433 /* When reg_iv_type / reg_iv_info is resized for biv increments
2434 that are turned into givs, reg_biv_class is not resized.
2435 So check here that we don't make an out-of-bounds access. */
2436 if (REGNO (iteration_var) >= max_reg_before_loop)
2439 /* Grab initial value, only useful if it is a constant. */
2440 bl = reg_biv_class[REGNO (iteration_var)];
2441 *initial_value = bl->initial_value;
2443 *increment = biv_total_increment (bl, loop_start, loop_end);
2445 else if (REG_IV_TYPE (REGNO (iteration_var)) == GENERAL_INDUCT)
2447 HOST_WIDE_INT offset = 0;
2448 struct induction *v = REG_IV_INFO (REGNO (iteration_var));
2450 if (REGNO (v->src_reg) >= max_reg_before_loop)
2453 bl = reg_biv_class[REGNO (v->src_reg)];
2455 /* Increment value is mult_val times the increment value of the biv. */
2457 *increment = biv_total_increment (bl, loop_start, loop_end);
2460 struct induction *biv_inc;
2463 = fold_rtx_mult_add (v->mult_val, *increment, const0_rtx, v->mode);
2464 /* The caller assumes that one full increment has occured at the
2465 first loop test. But that's not true when the biv is incremented
2466 after the giv is set (which is the usual case), e.g.:
2467 i = 6; do {;} while (i++ < 9) .
2468 Therefore, we bias the initial value by subtracting the amount of
2469 the increment that occurs between the giv set and the giv test. */
2470 for (biv_inc = bl->biv; biv_inc; biv_inc = biv_inc->next_iv)
2472 if (loop_insn_first_p (v->insn, biv_inc->insn))
2473 offset -= INTVAL (biv_inc->add_val);
2475 offset *= INTVAL (v->mult_val);
2477 if (loop_dump_stream)
2478 fprintf (loop_dump_stream,
2479 "Loop unrolling: Giv iterator, initial value bias %ld.\n",
2481 /* Initial value is mult_val times the biv's initial value plus
2482 add_val. Only useful if it is a constant. */
2484 = fold_rtx_mult_add (v->mult_val,
2485 plus_constant (bl->initial_value, offset),
2486 v->add_val, v->mode);
2490 if (loop_dump_stream)
2491 fprintf (loop_dump_stream,
2492 "Loop unrolling: Not basic or general induction var.\n");
2498 /* For each biv and giv, determine whether it can be safely split into
2499 a different variable for each unrolled copy of the loop body. If it
2500 is safe to split, then indicate that by saving some useful info
2501 in the splittable_regs array.
2503 If the loop is being completely unrolled, then splittable_regs will hold
2504 the current value of the induction variable while the loop is unrolled.
2505 It must be set to the initial value of the induction variable here.
2506 Otherwise, splittable_regs will hold the difference between the current
2507 value of the induction variable and the value the induction variable had
2508 at the top of the loop. It must be set to the value 0 here.
2510 Returns the total number of instructions that set registers that are
2513 /* ?? If the loop is only unrolled twice, then most of the restrictions to
2514 constant values are unnecessary, since we can easily calculate increment
2515 values in this case even if nothing is constant. The increment value
2516 should not involve a multiply however. */
2518 /* ?? Even if the biv/giv increment values aren't constant, it may still
2519 be beneficial to split the variable if the loop is only unrolled a few
2520 times, since multiplies by small integers (1,2,3,4) are very cheap. */
2523 find_splittable_regs (unroll_type, loop_start, loop_end, end_insert_before,
2524 unroll_number, n_iterations)
2525 enum unroll_types unroll_type;
2526 rtx loop_start, loop_end;
2527 rtx end_insert_before;
2529 unsigned HOST_WIDE_INT n_iterations;
2531 struct iv_class *bl;
2532 struct induction *v;
2534 rtx biv_final_value;
2538 for (bl = loop_iv_list; bl; bl = bl->next)
2540 /* Biv_total_increment must return a constant value,
2541 otherwise we can not calculate the split values. */
2543 increment = biv_total_increment (bl, loop_start, loop_end);
2544 if (! increment || GET_CODE (increment) != CONST_INT)
2547 /* The loop must be unrolled completely, or else have a known number
2548 of iterations and only one exit, or else the biv must be dead
2549 outside the loop, or else the final value must be known. Otherwise,
2550 it is unsafe to split the biv since it may not have the proper
2551 value on loop exit. */
2553 /* loop_number_exit_count is non-zero if the loop has an exit other than
2554 a fall through at the end. */
2557 biv_final_value = 0;
2558 if (unroll_type != UNROLL_COMPLETELY
2559 && (loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]]
2560 || unroll_type == UNROLL_NAIVE)
2561 && (uid_luid[REGNO_LAST_UID (bl->regno)] >= INSN_LUID (loop_end)
2563 || INSN_UID (bl->init_insn) >= max_uid_for_loop
2564 || (uid_luid[REGNO_FIRST_UID (bl->regno)]
2565 < INSN_LUID (bl->init_insn))
2566 || reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set)))
2567 && ! (biv_final_value = final_biv_value (bl, loop_start, loop_end,
2571 /* If any of the insns setting the BIV don't do so with a simple
2572 PLUS, we don't know how to split it. */
2573 for (v = bl->biv; biv_splittable && v; v = v->next_iv)
2574 if ((tem = single_set (v->insn)) == 0
2575 || GET_CODE (SET_DEST (tem)) != REG
2576 || REGNO (SET_DEST (tem)) != bl->regno
2577 || GET_CODE (SET_SRC (tem)) != PLUS)
2580 /* If final value is non-zero, then must emit an instruction which sets
2581 the value of the biv to the proper value. This is done after
2582 handling all of the givs, since some of them may need to use the
2583 biv's value in their initialization code. */
2585 /* This biv is splittable. If completely unrolling the loop, save
2586 the biv's initial value. Otherwise, save the constant zero. */
2588 if (biv_splittable == 1)
2590 if (unroll_type == UNROLL_COMPLETELY)
2592 /* If the initial value of the biv is itself (i.e. it is too
2593 complicated for strength_reduce to compute), or is a hard
2594 register, or it isn't invariant, then we must create a new
2595 pseudo reg to hold the initial value of the biv. */
2597 if (GET_CODE (bl->initial_value) == REG
2598 && (REGNO (bl->initial_value) == bl->regno
2599 || REGNO (bl->initial_value) < FIRST_PSEUDO_REGISTER
2600 || ! invariant_p (bl->initial_value)))
2602 rtx tem = gen_reg_rtx (bl->biv->mode);
2604 record_base_value (REGNO (tem), bl->biv->add_val, 0);
2605 emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
2608 if (loop_dump_stream)
2609 fprintf (loop_dump_stream, "Biv %d initial value remapped to %d.\n",
2610 bl->regno, REGNO (tem));
2612 splittable_regs[bl->regno] = tem;
2615 splittable_regs[bl->regno] = bl->initial_value;
2618 splittable_regs[bl->regno] = const0_rtx;
2620 /* Save the number of instructions that modify the biv, so that
2621 we can treat the last one specially. */
2623 splittable_regs_updates[bl->regno] = bl->biv_count;
2624 result += bl->biv_count;
2626 if (loop_dump_stream)
2627 fprintf (loop_dump_stream,
2628 "Biv %d safe to split.\n", bl->regno);
2631 /* Check every giv that depends on this biv to see whether it is
2632 splittable also. Even if the biv isn't splittable, givs which
2633 depend on it may be splittable if the biv is live outside the
2634 loop, and the givs aren't. */
2636 result += find_splittable_givs (bl, unroll_type, loop_start, loop_end,
2637 increment, unroll_number);
2639 /* If final value is non-zero, then must emit an instruction which sets
2640 the value of the biv to the proper value. This is done after
2641 handling all of the givs, since some of them may need to use the
2642 biv's value in their initialization code. */
2643 if (biv_final_value)
2645 /* If the loop has multiple exits, emit the insns before the
2646 loop to ensure that it will always be executed no matter
2647 how the loop exits. Otherwise emit the insn after the loop,
2648 since this is slightly more efficient. */
2649 if (! loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]])
2650 emit_insn_before (gen_move_insn (bl->biv->src_reg,
2655 /* Create a new register to hold the value of the biv, and then
2656 set the biv to its final value before the loop start. The biv
2657 is set to its final value before loop start to ensure that
2658 this insn will always be executed, no matter how the loop
2660 rtx tem = gen_reg_rtx (bl->biv->mode);
2661 record_base_value (REGNO (tem), bl->biv->add_val, 0);
2663 emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
2665 emit_insn_before (gen_move_insn (bl->biv->src_reg,
2669 if (loop_dump_stream)
2670 fprintf (loop_dump_stream, "Biv %d mapped to %d for split.\n",
2671 REGNO (bl->biv->src_reg), REGNO (tem));
2673 /* Set up the mapping from the original biv register to the new
2675 bl->biv->src_reg = tem;
2682 /* Return 1 if the first and last unrolled copy of the address giv V is valid
2683 for the instruction that is using it. Do not make any changes to that
2687 verify_addresses (v, giv_inc, unroll_number)
2688 struct induction *v;
2693 rtx orig_addr = *v->location;
2694 rtx last_addr = plus_constant (v->dest_reg,
2695 INTVAL (giv_inc) * (unroll_number - 1));
2697 /* First check to see if either address would fail. Handle the fact
2698 that we have may have a match_dup. */
2699 if (! validate_replace_rtx (*v->location, v->dest_reg, v->insn)
2700 || ! validate_replace_rtx (*v->location, last_addr, v->insn))
2703 /* Now put things back the way they were before. This should always
2705 if (! validate_replace_rtx (*v->location, orig_addr, v->insn))
2711 /* For every giv based on the biv BL, check to determine whether it is
2712 splittable. This is a subroutine to find_splittable_regs ().
2714 Return the number of instructions that set splittable registers. */
2717 find_splittable_givs (bl, unroll_type, loop_start, loop_end, increment,
2719 struct iv_class *bl;
2720 enum unroll_types unroll_type;
2721 rtx loop_start, loop_end;
2725 struct induction *v, *v2;
2730 /* Scan the list of givs, and set the same_insn field when there are
2731 multiple identical givs in the same insn. */
2732 for (v = bl->giv; v; v = v->next_iv)
2733 for (v2 = v->next_iv; v2; v2 = v2->next_iv)
2734 if (v->insn == v2->insn && rtx_equal_p (v->new_reg, v2->new_reg)
2738 for (v = bl->giv; v; v = v->next_iv)
2742 /* Only split the giv if it has already been reduced, or if the loop is
2743 being completely unrolled. */
2744 if (unroll_type != UNROLL_COMPLETELY && v->ignore)
2747 /* The giv can be split if the insn that sets the giv is executed once
2748 and only once on every iteration of the loop. */
2749 /* An address giv can always be split. v->insn is just a use not a set,
2750 and hence it does not matter whether it is always executed. All that
2751 matters is that all the biv increments are always executed, and we
2752 won't reach here if they aren't. */
2753 if (v->giv_type != DEST_ADDR
2754 && (! v->always_computable
2755 || back_branch_in_range_p (v->insn, loop_start, loop_end)))
2758 /* The giv increment value must be a constant. */
2759 giv_inc = fold_rtx_mult_add (v->mult_val, increment, const0_rtx,
2761 if (! giv_inc || GET_CODE (giv_inc) != CONST_INT)
2764 /* The loop must be unrolled completely, or else have a known number of
2765 iterations and only one exit, or else the giv must be dead outside
2766 the loop, or else the final value of the giv must be known.
2767 Otherwise, it is not safe to split the giv since it may not have the
2768 proper value on loop exit. */
2770 /* The used outside loop test will fail for DEST_ADDR givs. They are
2771 never used outside the loop anyways, so it is always safe to split a
2775 if (unroll_type != UNROLL_COMPLETELY
2776 && (loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]]
2777 || unroll_type == UNROLL_NAIVE)
2778 && v->giv_type != DEST_ADDR
2779 /* The next part is true if the pseudo is used outside the loop.
2780 We assume that this is true for any pseudo created after loop
2781 starts, because we don't have a reg_n_info entry for them. */
2782 && (REGNO (v->dest_reg) >= max_reg_before_loop
2783 || (REGNO_FIRST_UID (REGNO (v->dest_reg)) != INSN_UID (v->insn)
2784 /* Check for the case where the pseudo is set by a shift/add
2785 sequence, in which case the first insn setting the pseudo
2786 is the first insn of the shift/add sequence. */
2787 && (! (tem = find_reg_note (v->insn, REG_RETVAL, NULL_RTX))
2788 || (REGNO_FIRST_UID (REGNO (v->dest_reg))
2789 != INSN_UID (XEXP (tem, 0)))))
2790 /* Line above always fails if INSN was moved by loop opt. */
2791 || (uid_luid[REGNO_LAST_UID (REGNO (v->dest_reg))]
2792 >= INSN_LUID (loop_end)))
2793 /* Givs made from biv increments are missed by the above test, so
2794 test explicitly for them. */
2795 && (REGNO (v->dest_reg) < first_increment_giv
2796 || REGNO (v->dest_reg) > last_increment_giv)
2797 && ! (final_value = v->final_value))
2801 /* Currently, non-reduced/final-value givs are never split. */
2802 /* Should emit insns after the loop if possible, as the biv final value
2805 /* If the final value is non-zero, and the giv has not been reduced,
2806 then must emit an instruction to set the final value. */
2807 if (final_value && !v->new_reg)
2809 /* Create a new register to hold the value of the giv, and then set
2810 the giv to its final value before the loop start. The giv is set
2811 to its final value before loop start to ensure that this insn
2812 will always be executed, no matter how we exit. */
2813 tem = gen_reg_rtx (v->mode);
2814 emit_insn_before (gen_move_insn (tem, v->dest_reg), loop_start);
2815 emit_insn_before (gen_move_insn (v->dest_reg, final_value),
2818 if (loop_dump_stream)
2819 fprintf (loop_dump_stream, "Giv %d mapped to %d for split.\n",
2820 REGNO (v->dest_reg), REGNO (tem));
2826 /* This giv is splittable. If completely unrolling the loop, save the
2827 giv's initial value. Otherwise, save the constant zero for it. */
2829 if (unroll_type == UNROLL_COMPLETELY)
2831 /* It is not safe to use bl->initial_value here, because it may not
2832 be invariant. It is safe to use the initial value stored in
2833 the splittable_regs array if it is set. In rare cases, it won't
2834 be set, so then we do exactly the same thing as
2835 find_splittable_regs does to get a safe value. */
2836 rtx biv_initial_value;
2838 if (splittable_regs[bl->regno])
2839 biv_initial_value = splittable_regs[bl->regno];
2840 else if (GET_CODE (bl->initial_value) != REG
2841 || (REGNO (bl->initial_value) != bl->regno
2842 && REGNO (bl->initial_value) >= FIRST_PSEUDO_REGISTER))
2843 biv_initial_value = bl->initial_value;
2846 rtx tem = gen_reg_rtx (bl->biv->mode);
2848 record_base_value (REGNO (tem), bl->biv->add_val, 0);
2849 emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
2851 biv_initial_value = tem;
2853 value = fold_rtx_mult_add (v->mult_val, biv_initial_value,
2854 v->add_val, v->mode);
2861 /* If a giv was combined with another giv, then we can only split
2862 this giv if the giv it was combined with was reduced. This
2863 is because the value of v->new_reg is meaningless in this
2865 if (v->same && ! v->same->new_reg)
2867 if (loop_dump_stream)
2868 fprintf (loop_dump_stream,
2869 "giv combined with unreduced giv not split.\n");
2872 /* If the giv is an address destination, it could be something other
2873 than a simple register, these have to be treated differently. */
2874 else if (v->giv_type == DEST_REG)
2876 /* If value is not a constant, register, or register plus
2877 constant, then compute its value into a register before
2878 loop start. This prevents invalid rtx sharing, and should
2879 generate better code. We can use bl->initial_value here
2880 instead of splittable_regs[bl->regno] because this code
2881 is going before the loop start. */
2882 if (unroll_type == UNROLL_COMPLETELY
2883 && GET_CODE (value) != CONST_INT
2884 && GET_CODE (value) != REG
2885 && (GET_CODE (value) != PLUS
2886 || GET_CODE (XEXP (value, 0)) != REG
2887 || GET_CODE (XEXP (value, 1)) != CONST_INT))
2889 rtx tem = gen_reg_rtx (v->mode);
2890 record_base_value (REGNO (tem), v->add_val, 0);
2891 emit_iv_add_mult (bl->initial_value, v->mult_val,
2892 v->add_val, tem, loop_start);
2896 splittable_regs[REGNO (v->new_reg)] = value;
2897 derived_regs[REGNO (v->new_reg)] = v->derived_from != 0;
2901 /* Splitting address givs is useful since it will often allow us
2902 to eliminate some increment insns for the base giv as
2905 /* If the addr giv is combined with a dest_reg giv, then all
2906 references to that dest reg will be remapped, which is NOT
2907 what we want for split addr regs. We always create a new
2908 register for the split addr giv, just to be safe. */
2910 /* If we have multiple identical address givs within a
2911 single instruction, then use a single pseudo reg for
2912 both. This is necessary in case one is a match_dup
2915 v->const_adjust = 0;
2919 v->dest_reg = v->same_insn->dest_reg;
2920 if (loop_dump_stream)
2921 fprintf (loop_dump_stream,
2922 "Sharing address givs in insn %d\n",
2923 INSN_UID (v->insn));
2925 /* If multiple address GIVs have been combined with the
2926 same dest_reg GIV, do not create a new register for
2928 else if (unroll_type != UNROLL_COMPLETELY
2929 && v->giv_type == DEST_ADDR
2930 && v->same && v->same->giv_type == DEST_ADDR
2931 && v->same->unrolled
2932 /* combine_givs_p may return true for some cases
2933 where the add and mult values are not equal.
2934 To share a register here, the values must be
2936 && rtx_equal_p (v->same->mult_val, v->mult_val)
2937 && rtx_equal_p (v->same->add_val, v->add_val)
2938 /* If the memory references have different modes,
2939 then the address may not be valid and we must
2940 not share registers. */
2941 && verify_addresses (v, giv_inc, unroll_number))
2943 v->dest_reg = v->same->dest_reg;
2946 else if (unroll_type != UNROLL_COMPLETELY)
2948 /* If not completely unrolling the loop, then create a new
2949 register to hold the split value of the DEST_ADDR giv.
2950 Emit insn to initialize its value before loop start. */
2952 rtx tem = gen_reg_rtx (v->mode);
2953 struct induction *same = v->same;
2954 rtx new_reg = v->new_reg;
2955 record_base_value (REGNO (tem), v->add_val, 0);
2957 if (same && same->derived_from)
2959 /* calculate_giv_inc doesn't work for derived givs.
2960 copy_loop_body works around the problem for the
2961 DEST_REG givs themselves, but it can't handle
2962 DEST_ADDR givs that have been combined with
2963 a derived DEST_REG giv.
2964 So Handle V as if the giv from which V->SAME has
2965 been derived has been combined with V.
2966 recombine_givs only derives givs from givs that
2967 are reduced the ordinary, so we need not worry
2968 about same->derived_from being in turn derived. */
2970 same = same->derived_from;
2971 new_reg = express_from (same, v);
2972 new_reg = replace_rtx (new_reg, same->dest_reg,
2976 /* If the address giv has a constant in its new_reg value,
2977 then this constant can be pulled out and put in value,
2978 instead of being part of the initialization code. */
2980 if (GET_CODE (new_reg) == PLUS
2981 && GET_CODE (XEXP (new_reg, 1)) == CONST_INT)
2984 = plus_constant (tem, INTVAL (XEXP (new_reg, 1)));
2986 /* Only succeed if this will give valid addresses.
2987 Try to validate both the first and the last
2988 address resulting from loop unrolling, if
2989 one fails, then can't do const elim here. */
2990 if (verify_addresses (v, giv_inc, unroll_number))
2992 /* Save the negative of the eliminated const, so
2993 that we can calculate the dest_reg's increment
2995 v->const_adjust = - INTVAL (XEXP (new_reg, 1));
2997 new_reg = XEXP (new_reg, 0);
2998 if (loop_dump_stream)
2999 fprintf (loop_dump_stream,
3000 "Eliminating constant from giv %d\n",
3009 /* If the address hasn't been checked for validity yet, do so
3010 now, and fail completely if either the first or the last
3011 unrolled copy of the address is not a valid address
3012 for the instruction that uses it. */
3013 if (v->dest_reg == tem
3014 && ! verify_addresses (v, giv_inc, unroll_number))
3016 for (v2 = v->next_iv; v2; v2 = v2->next_iv)
3017 if (v2->same_insn == v)
3020 if (loop_dump_stream)
3021 fprintf (loop_dump_stream,
3022 "Invalid address for giv at insn %d\n",
3023 INSN_UID (v->insn));
3027 v->new_reg = new_reg;
3030 /* We set this after the address check, to guarantee that
3031 the register will be initialized. */
3034 /* To initialize the new register, just move the value of
3035 new_reg into it. This is not guaranteed to give a valid
3036 instruction on machines with complex addressing modes.
3037 If we can't recognize it, then delete it and emit insns
3038 to calculate the value from scratch. */
3039 emit_insn_before (gen_rtx_SET (VOIDmode, tem,
3040 copy_rtx (v->new_reg)),
3042 if (recog_memoized (PREV_INSN (loop_start)) < 0)
3046 /* We can't use bl->initial_value to compute the initial
3047 value, because the loop may have been preconditioned.
3048 We must calculate it from NEW_REG. Try using
3049 force_operand instead of emit_iv_add_mult. */
3050 delete_insn (PREV_INSN (loop_start));
3053 ret = force_operand (v->new_reg, tem);
3055 emit_move_insn (tem, ret);
3056 sequence = gen_sequence ();
3058 emit_insn_before (sequence, loop_start);
3060 if (loop_dump_stream)
3061 fprintf (loop_dump_stream,
3062 "Invalid init insn, rewritten.\n");
3067 v->dest_reg = value;
3069 /* Check the resulting address for validity, and fail
3070 if the resulting address would be invalid. */
3071 if (! verify_addresses (v, giv_inc, unroll_number))
3073 for (v2 = v->next_iv; v2; v2 = v2->next_iv)
3074 if (v2->same_insn == v)
3077 if (loop_dump_stream)
3078 fprintf (loop_dump_stream,
3079 "Invalid address for giv at insn %d\n",
3080 INSN_UID (v->insn));
3083 if (v->same && v->same->derived_from)
3085 /* Handle V as if the giv from which V->SAME has
3086 been derived has been combined with V. */
3088 v->same = v->same->derived_from;
3089 v->new_reg = express_from (v->same, v);
3090 v->new_reg = replace_rtx (v->new_reg, v->same->dest_reg,
3096 /* Store the value of dest_reg into the insn. This sharing
3097 will not be a problem as this insn will always be copied
3100 *v->location = v->dest_reg;
3102 /* If this address giv is combined with a dest reg giv, then
3103 save the base giv's induction pointer so that we will be
3104 able to handle this address giv properly. The base giv
3105 itself does not have to be splittable. */
3107 if (v->same && v->same->giv_type == DEST_REG)
3108 addr_combined_regs[REGNO (v->same->new_reg)] = v->same;
3110 if (GET_CODE (v->new_reg) == REG)
3112 /* This giv maybe hasn't been combined with any others.
3113 Make sure that it's giv is marked as splittable here. */
3115 splittable_regs[REGNO (v->new_reg)] = value;
3116 derived_regs[REGNO (v->new_reg)] = v->derived_from != 0;
3118 /* Make it appear to depend upon itself, so that the
3119 giv will be properly split in the main loop above. */
3123 addr_combined_regs[REGNO (v->new_reg)] = v;
3127 if (loop_dump_stream)
3128 fprintf (loop_dump_stream, "DEST_ADDR giv being split.\n");
3134 /* Currently, unreduced giv's can't be split. This is not too much
3135 of a problem since unreduced giv's are not live across loop
3136 iterations anyways. When unrolling a loop completely though,
3137 it makes sense to reduce&split givs when possible, as this will
3138 result in simpler instructions, and will not require that a reg
3139 be live across loop iterations. */
3141 splittable_regs[REGNO (v->dest_reg)] = value;
3142 fprintf (stderr, "Giv %d at insn %d not reduced\n",
3143 REGNO (v->dest_reg), INSN_UID (v->insn));
3149 /* Unreduced givs are only updated once by definition. Reduced givs
3150 are updated as many times as their biv is. Mark it so if this is
3151 a splittable register. Don't need to do anything for address givs
3152 where this may not be a register. */
3154 if (GET_CODE (v->new_reg) == REG)
3158 count = reg_biv_class[REGNO (v->src_reg)]->biv_count;
3160 if (count > 1 && v->derived_from)
3161 /* In this case, there is one set where the giv insn was and one
3162 set each after each biv increment. (Most are likely dead.) */
3165 splittable_regs_updates[REGNO (v->new_reg)] = count;
3170 if (loop_dump_stream)
3174 if (GET_CODE (v->dest_reg) == CONST_INT)
3176 else if (GET_CODE (v->dest_reg) != REG)
3177 regnum = REGNO (XEXP (v->dest_reg, 0));
3179 regnum = REGNO (v->dest_reg);
3180 fprintf (loop_dump_stream, "Giv %d at insn %d safe to split.\n",
3181 regnum, INSN_UID (v->insn));
3188 /* Try to prove that the register is dead after the loop exits. Trace every
3189 loop exit looking for an insn that will always be executed, which sets
3190 the register to some value, and appears before the first use of the register
3191 is found. If successful, then return 1, otherwise return 0. */
3193 /* ?? Could be made more intelligent in the handling of jumps, so that
3194 it can search past if statements and other similar structures. */
3197 reg_dead_after_loop (reg, loop_start, loop_end)
3198 rtx reg, loop_start, loop_end;
3203 int label_count = 0;
3204 int this_loop_num = uid_loop_num[INSN_UID (loop_start)];
3206 /* In addition to checking all exits of this loop, we must also check
3207 all exits of inner nested loops that would exit this loop. We don't
3208 have any way to identify those, so we just give up if there are any
3209 such inner loop exits. */
3211 for (label = loop_number_exit_labels[this_loop_num]; label;
3212 label = LABEL_NEXTREF (label))
3215 if (label_count != loop_number_exit_count[this_loop_num])
3218 /* HACK: Must also search the loop fall through exit, create a label_ref
3219 here which points to the loop_end, and append the loop_number_exit_labels
3221 label = gen_rtx_LABEL_REF (VOIDmode, loop_end);
3222 LABEL_NEXTREF (label) = loop_number_exit_labels[this_loop_num];
3224 for ( ; label; label = LABEL_NEXTREF (label))
3226 /* Succeed if find an insn which sets the biv or if reach end of
3227 function. Fail if find an insn that uses the biv, or if come to
3228 a conditional jump. */
3230 insn = NEXT_INSN (XEXP (label, 0));
3233 code = GET_CODE (insn);
3234 if (GET_RTX_CLASS (code) == 'i')
3238 if (reg_referenced_p (reg, PATTERN (insn)))
3241 set = single_set (insn);
3242 if (set && rtx_equal_p (SET_DEST (set), reg))
3246 if (code == JUMP_INSN)
3248 if (GET_CODE (PATTERN (insn)) == RETURN)
3250 else if (! simplejump_p (insn)
3251 /* Prevent infinite loop following infinite loops. */
3252 || jump_count++ > 20)
3255 insn = JUMP_LABEL (insn);
3258 insn = NEXT_INSN (insn);
3262 /* Success, the register is dead on all loop exits. */
3266 /* Try to calculate the final value of the biv, the value it will have at
3267 the end of the loop. If we can do it, return that value. */
3270 final_biv_value (bl, loop_start, loop_end, n_iterations)
3271 struct iv_class *bl;
3272 rtx loop_start, loop_end;
3273 unsigned HOST_WIDE_INT n_iterations;
3277 /* ??? This only works for MODE_INT biv's. Reject all others for now. */
3279 if (GET_MODE_CLASS (bl->biv->mode) != MODE_INT)
3282 /* The final value for reversed bivs must be calculated differently than
3283 for ordinary bivs. In this case, there is already an insn after the
3284 loop which sets this biv's final value (if necessary), and there are
3285 no other loop exits, so we can return any value. */
3288 if (loop_dump_stream)
3289 fprintf (loop_dump_stream,
3290 "Final biv value for %d, reversed biv.\n", bl->regno);
3295 /* Try to calculate the final value as initial value + (number of iterations
3296 * increment). For this to work, increment must be invariant, the only
3297 exit from the loop must be the fall through at the bottom (otherwise
3298 it may not have its final value when the loop exits), and the initial
3299 value of the biv must be invariant. */
3301 if (n_iterations != 0
3302 && ! loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]]
3303 && invariant_p (bl->initial_value))
3305 increment = biv_total_increment (bl, loop_start, loop_end);
3307 if (increment && invariant_p (increment))
3309 /* Can calculate the loop exit value, emit insns after loop
3310 end to calculate this value into a temporary register in
3311 case it is needed later. */
3313 tem = gen_reg_rtx (bl->biv->mode);
3314 record_base_value (REGNO (tem), bl->biv->add_val, 0);
3315 /* Make sure loop_end is not the last insn. */
3316 if (NEXT_INSN (loop_end) == 0)
3317 emit_note_after (NOTE_INSN_DELETED, loop_end);
3318 emit_iv_add_mult (increment, GEN_INT (n_iterations),
3319 bl->initial_value, tem, NEXT_INSN (loop_end));
3321 if (loop_dump_stream)
3322 fprintf (loop_dump_stream,
3323 "Final biv value for %d, calculated.\n", bl->regno);
3329 /* Check to see if the biv is dead at all loop exits. */
3330 if (reg_dead_after_loop (bl->biv->src_reg, loop_start, loop_end))
3332 if (loop_dump_stream)
3333 fprintf (loop_dump_stream,
3334 "Final biv value for %d, biv dead after loop exit.\n",
3343 /* Try to calculate the final value of the giv, the value it will have at
3344 the end of the loop. If we can do it, return that value. */
3347 final_giv_value (v, loop_start, loop_end, n_iterations)
3348 struct induction *v;
3349 rtx loop_start, loop_end;
3350 unsigned HOST_WIDE_INT n_iterations;
3352 struct iv_class *bl;
3355 rtx insert_before, seq;
3357 bl = reg_biv_class[REGNO (v->src_reg)];
3359 /* The final value for givs which depend on reversed bivs must be calculated
3360 differently than for ordinary givs. In this case, there is already an
3361 insn after the loop which sets this giv's final value (if necessary),
3362 and there are no other loop exits, so we can return any value. */
3365 if (loop_dump_stream)
3366 fprintf (loop_dump_stream,
3367 "Final giv value for %d, depends on reversed biv\n",
3368 REGNO (v->dest_reg));
3372 /* Try to calculate the final value as a function of the biv it depends
3373 upon. The only exit from the loop must be the fall through at the bottom
3374 (otherwise it may not have its final value when the loop exits). */
3376 /* ??? Can calculate the final giv value by subtracting off the
3377 extra biv increments times the giv's mult_val. The loop must have
3378 only one exit for this to work, but the loop iterations does not need
3381 if (n_iterations != 0
3382 && ! loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]])
3384 /* ?? It is tempting to use the biv's value here since these insns will
3385 be put after the loop, and hence the biv will have its final value
3386 then. However, this fails if the biv is subsequently eliminated.
3387 Perhaps determine whether biv's are eliminable before trying to
3388 determine whether giv's are replaceable so that we can use the
3389 biv value here if it is not eliminable. */
3391 /* We are emitting code after the end of the loop, so we must make
3392 sure that bl->initial_value is still valid then. It will still
3393 be valid if it is invariant. */
3395 increment = biv_total_increment (bl, loop_start, loop_end);
3397 if (increment && invariant_p (increment)
3398 && invariant_p (bl->initial_value))
3400 /* Can calculate the loop exit value of its biv as
3401 (n_iterations * increment) + initial_value */
3403 /* The loop exit value of the giv is then
3404 (final_biv_value - extra increments) * mult_val + add_val.
3405 The extra increments are any increments to the biv which
3406 occur in the loop after the giv's value is calculated.
3407 We must search from the insn that sets the giv to the end
3408 of the loop to calculate this value. */
3410 insert_before = NEXT_INSN (loop_end);
3412 /* Put the final biv value in tem. */
3413 tem = gen_reg_rtx (bl->biv->mode);
3414 record_base_value (REGNO (tem), bl->biv->add_val, 0);
3415 emit_iv_add_mult (increment, GEN_INT (n_iterations),
3416 bl->initial_value, tem, insert_before);
3418 /* Subtract off extra increments as we find them. */
3419 for (insn = NEXT_INSN (v->insn); insn != loop_end;
3420 insn = NEXT_INSN (insn))
3422 struct induction *biv;
3424 for (biv = bl->biv; biv; biv = biv->next_iv)
3425 if (biv->insn == insn)
3428 tem = expand_binop (GET_MODE (tem), sub_optab, tem,
3429 biv->add_val, NULL_RTX, 0,
3431 seq = gen_sequence ();
3433 emit_insn_before (seq, insert_before);
3437 /* Now calculate the giv's final value. */
3438 emit_iv_add_mult (tem, v->mult_val, v->add_val, tem,
3441 if (loop_dump_stream)
3442 fprintf (loop_dump_stream,
3443 "Final giv value for %d, calc from biv's value.\n",
3444 REGNO (v->dest_reg));
3450 /* Replaceable giv's should never reach here. */
3454 /* Check to see if the biv is dead at all loop exits. */
3455 if (reg_dead_after_loop (v->dest_reg, loop_start, loop_end))
3457 if (loop_dump_stream)
3458 fprintf (loop_dump_stream,
3459 "Final giv value for %d, giv dead after loop exit.\n",
3460 REGNO (v->dest_reg));
3469 /* Look back before LOOP_START for then insn that sets REG and return
3470 the equivalent constant if there is a REG_EQUAL note otherwise just
3471 the SET_SRC of REG. */
3474 loop_find_equiv_value (loop_start, reg)
3482 for (insn = PREV_INSN (loop_start); insn ; insn = PREV_INSN (insn))
3484 if (GET_CODE (insn) == CODE_LABEL)
3487 else if (GET_RTX_CLASS (GET_CODE (insn)) == 'i'
3488 && reg_set_p (reg, insn))
3490 /* We found the last insn before the loop that sets the register.
3491 If it sets the entire register, and has a REG_EQUAL note,
3492 then use the value of the REG_EQUAL note. */
3493 if ((set = single_set (insn))
3494 && (SET_DEST (set) == reg))
3496 rtx note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3498 /* Only use the REG_EQUAL note if it is a constant.
3499 Other things, divide in particular, will cause
3500 problems later if we use them. */
3501 if (note && GET_CODE (XEXP (note, 0)) != EXPR_LIST
3502 && CONSTANT_P (XEXP (note, 0)))
3503 ret = XEXP (note, 0);
3505 ret = SET_SRC (set);
3514 /* Return a simplified rtx for the expression OP - REG.
3516 REG must appear in OP, and OP must be a register or the sum of a register
3519 Thus, the return value must be const0_rtx or the second term.
3521 The caller is responsible for verifying that REG appears in OP and OP has
3525 subtract_reg_term (op, reg)
3530 if (GET_CODE (op) == PLUS)
3532 if (XEXP (op, 0) == reg)
3533 return XEXP (op, 1);
3534 else if (XEXP (op, 1) == reg)
3535 return XEXP (op, 0);
3537 /* OP does not contain REG as a term. */
3542 /* Find and return register term common to both expressions OP0 and
3543 OP1 or NULL_RTX if no such term exists. Each expression must be a
3544 REG or a PLUS of a REG. */
3547 find_common_reg_term (op0, op1)
3550 if ((GET_CODE (op0) == REG || GET_CODE (op0) == PLUS)
3551 && (GET_CODE (op1) == REG || GET_CODE (op1) == PLUS))
3558 if (GET_CODE (op0) == PLUS)
3559 op01 = XEXP (op0, 1), op00 = XEXP (op0, 0);
3561 op01 = const0_rtx, op00 = op0;
3563 if (GET_CODE (op1) == PLUS)
3564 op11 = XEXP (op1, 1), op10 = XEXP (op1, 0);
3566 op11 = const0_rtx, op10 = op1;
3568 /* Find and return common register term if present. */
3569 if (REG_P (op00) && (op00 == op10 || op00 == op11))
3571 else if (REG_P (op01) && (op01 == op10 || op01 == op11))
3575 /* No common register term found. */
3580 /* Calculate the number of loop iterations. Returns the exact number of loop
3581 iterations if it can be calculated, otherwise returns zero. */
3583 unsigned HOST_WIDE_INT
3584 loop_iterations (loop_start, loop_end, loop_info)
3585 rtx loop_start, loop_end;
3586 struct loop_info *loop_info;
3588 rtx comparison, comparison_value;
3589 rtx iteration_var, initial_value, increment, final_value;
3590 enum rtx_code comparison_code;
3591 HOST_WIDE_INT abs_inc;
3592 unsigned HOST_WIDE_INT abs_diff;
3595 int unsigned_p, compare_dir, final_larger;
3600 loop_info->n_iterations = 0;
3601 loop_info->initial_value = 0;
3602 loop_info->initial_equiv_value = 0;
3603 loop_info->comparison_value = 0;
3604 loop_info->final_value = 0;
3605 loop_info->final_equiv_value = 0;
3606 loop_info->increment = 0;
3607 loop_info->iteration_var = 0;
3608 loop_info->unroll_number = 1;
3609 loop_info->vtop = 0;
3611 /* We used to use prev_nonnote_insn here, but that fails because it might
3612 accidentally get the branch for a contained loop if the branch for this
3613 loop was deleted. We can only trust branches immediately before the
3615 last_loop_insn = PREV_INSN (loop_end);
3617 /* ??? We should probably try harder to find the jump insn
3618 at the end of the loop. The following code assumes that
3619 the last loop insn is a jump to the top of the loop. */
3620 if (GET_CODE (last_loop_insn) != JUMP_INSN)
3622 if (loop_dump_stream)
3623 fprintf (loop_dump_stream,
3624 "Loop iterations: No final conditional branch found.\n");
3628 /* If there is a more than a single jump to the top of the loop
3629 we cannot (easily) determine the iteration count. */
3630 if (LABEL_NUSES (JUMP_LABEL (last_loop_insn)) > 1)
3632 if (loop_dump_stream)
3633 fprintf (loop_dump_stream,
3634 "Loop iterations: Loop has multiple back edges.\n");
3638 /* Find the iteration variable. If the last insn is a conditional
3639 branch, and the insn before tests a register value, make that the
3640 iteration variable. */
3642 comparison = get_condition_for_loop (last_loop_insn);
3643 if (comparison == 0)
3645 if (loop_dump_stream)
3646 fprintf (loop_dump_stream,
3647 "Loop iterations: No final comparison found.\n");
3651 /* ??? Get_condition may switch position of induction variable and
3652 invariant register when it canonicalizes the comparison. */
3654 comparison_code = GET_CODE (comparison);
3655 iteration_var = XEXP (comparison, 0);
3656 comparison_value = XEXP (comparison, 1);
3658 /* Check if there is a NOTE_INSN_LOOP_VTOP note. If there is,
3659 that means that this is a for or while style loop, with
3660 a loop exit test at the start. Thus, we can assume that
3661 the loop condition was true when the loop was entered.
3663 We start at the end and search backwards for the previous
3664 NOTE. If there is no NOTE_INSN_LOOP_VTOP for this loop,
3665 the search will stop at the NOTE_INSN_LOOP_CONT. */
3668 vtop = PREV_INSN (vtop);
3669 while (GET_CODE (vtop) != NOTE
3670 || NOTE_LINE_NUMBER (vtop) > 0
3671 || NOTE_LINE_NUMBER (vtop) == NOTE_REPEATED_LINE_NUMBER
3672 || NOTE_LINE_NUMBER (vtop) == NOTE_INSN_DELETED);
3673 if (NOTE_LINE_NUMBER (vtop) != NOTE_INSN_LOOP_VTOP)
3675 loop_info->vtop = vtop;
3677 if (GET_CODE (iteration_var) != REG)
3679 if (loop_dump_stream)
3680 fprintf (loop_dump_stream,
3681 "Loop iterations: Comparison not against register.\n");
3685 /* The only new registers that care created before loop iterations are
3686 givs made from biv increments, so this should never occur. */
3688 if ((unsigned) REGNO (iteration_var) >= reg_iv_type->num_elements)
3691 iteration_info (iteration_var, &initial_value, &increment,
3692 loop_start, loop_end);
3693 if (initial_value == 0)
3694 /* iteration_info already printed a message. */
3699 switch (comparison_code)
3714 /* Cannot determine loop iterations with this case. */
3733 /* If the comparison value is an invariant register, then try to find
3734 its value from the insns before the start of the loop. */
3736 final_value = comparison_value;
3737 if (GET_CODE (comparison_value) == REG && invariant_p (comparison_value))
3739 final_value = loop_find_equiv_value (loop_start, comparison_value);
3740 /* If we don't get an invariant final value, we are better
3741 off with the original register. */
3742 if (!invariant_p (final_value))
3743 final_value = comparison_value;
3746 /* Calculate the approximate final value of the induction variable
3747 (on the last successful iteration). The exact final value
3748 depends on the branch operator, and increment sign. It will be
3749 wrong if the iteration variable is not incremented by one each
3750 time through the loop and (comparison_value + off_by_one -
3751 initial_value) % increment != 0.
3752 ??? Note that the final_value may overflow and thus final_larger
3753 will be bogus. A potentially infinite loop will be classified
3754 as immediate, e.g. for (i = 0x7ffffff0; i <= 0x7fffffff; i++) */
3756 final_value = plus_constant (final_value, off_by_one);
3758 /* Save the calculated values describing this loop's bounds, in case
3759 precondition_loop_p will need them later. These values can not be
3760 recalculated inside precondition_loop_p because strength reduction
3761 optimizations may obscure the loop's structure.
3763 These values are only required by precondition_loop_p and insert_bct
3764 whenever the number of iterations cannot be computed at compile time.
3765 Only the difference between final_value and initial_value is
3766 important. Note that final_value is only approximate. */
3767 loop_info->initial_value = initial_value;
3768 loop_info->comparison_value = comparison_value;
3769 loop_info->final_value = plus_constant (comparison_value, off_by_one);
3770 loop_info->increment = increment;
3771 loop_info->iteration_var = iteration_var;
3772 loop_info->comparison_code = comparison_code;
3774 /* Try to determine the iteration count for loops such
3775 as (for i = init; i < init + const; i++). When running the
3776 loop optimization twice, the first pass often converts simple
3777 loops into this form. */
3779 if (REG_P (initial_value))
3785 reg1 = initial_value;
3786 if (GET_CODE (final_value) == PLUS)
3787 reg2 = XEXP (final_value, 0), const2 = XEXP (final_value, 1);
3789 reg2 = final_value, const2 = const0_rtx;
3791 /* Check for initial_value = reg1, final_value = reg2 + const2,
3792 where reg1 != reg2. */
3793 if (REG_P (reg2) && reg2 != reg1)
3797 /* Find what reg1 is equivalent to. Hopefully it will
3798 either be reg2 or reg2 plus a constant. */
3799 temp = loop_find_equiv_value (loop_start, reg1);
3800 if (find_common_reg_term (temp, reg2))
3801 initial_value = temp;
3804 /* Find what reg2 is equivalent to. Hopefully it will
3805 either be reg1 or reg1 plus a constant. Let's ignore
3806 the latter case for now since it is not so common. */
3807 temp = loop_find_equiv_value (loop_start, reg2);
3808 if (temp == loop_info->iteration_var)
3809 temp = initial_value;
3811 final_value = (const2 == const0_rtx)
3812 ? reg1 : gen_rtx_PLUS (GET_MODE (reg1), reg1, const2);
3815 else if (loop_info->vtop && GET_CODE (reg2) == CONST_INT)
3819 /* When running the loop optimizer twice, check_dbra_loop
3820 further obfuscates reversible loops of the form:
3821 for (i = init; i < init + const; i++). We often end up with
3822 final_value = 0, initial_value = temp, temp = temp2 - init,
3823 where temp2 = init + const. If the loop has a vtop we
3824 can replace initial_value with const. */
3826 temp = loop_find_equiv_value (loop_start, reg1);
3827 if (GET_CODE (temp) == MINUS && REG_P (XEXP (temp, 0)))
3829 rtx temp2 = loop_find_equiv_value (loop_start, XEXP (temp, 0));
3830 if (GET_CODE (temp2) == PLUS
3831 && XEXP (temp2, 0) == XEXP (temp, 1))
3832 initial_value = XEXP (temp2, 1);
3837 /* If have initial_value = reg + const1 and final_value = reg +
3838 const2, then replace initial_value with const1 and final_value
3839 with const2. This should be safe since we are protected by the
3840 initial comparison before entering the loop if we have a vtop.
3841 For example, a + b < a + c is not equivalent to b < c for all a
3842 when using modulo arithmetic.
3844 ??? Without a vtop we could still perform the optimization if we check
3845 the initial and final values carefully. */
3847 && (reg_term = find_common_reg_term (initial_value, final_value)))
3849 initial_value = subtract_reg_term (initial_value, reg_term);
3850 final_value = subtract_reg_term (final_value, reg_term);
3853 loop_info->initial_equiv_value = initial_value;
3854 loop_info->final_equiv_value = final_value;
3856 /* For EQ comparison loops, we don't have a valid final value.
3857 Check this now so that we won't leave an invalid value if we
3858 return early for any other reason. */
3859 if (comparison_code == EQ)
3860 loop_info->final_equiv_value = loop_info->final_value = 0;
3864 if (loop_dump_stream)
3865 fprintf (loop_dump_stream,
3866 "Loop iterations: Increment value can't be calculated.\n");
3870 if (GET_CODE (increment) != CONST_INT)
3872 /* If we have a REG, check to see if REG holds a constant value. */
3873 /* ??? Other RTL, such as (neg (reg)) is possible here, but it isn't
3874 clear if it is worthwhile to try to handle such RTL. */
3875 if (GET_CODE (increment) == REG || GET_CODE (increment) == SUBREG)
3876 increment = loop_find_equiv_value (loop_start, increment);
3878 if (GET_CODE (increment) != CONST_INT)
3880 if (loop_dump_stream)
3882 fprintf (loop_dump_stream,
3883 "Loop iterations: Increment value not constant ");
3884 print_rtl (loop_dump_stream, increment);
3885 fprintf (loop_dump_stream, ".\n");
3889 loop_info->increment = increment;
3892 if (GET_CODE (initial_value) != CONST_INT)
3894 if (loop_dump_stream)
3896 fprintf (loop_dump_stream,
3897 "Loop iterations: Initial value not constant ");
3898 print_rtl (loop_dump_stream, initial_value);
3899 fprintf (loop_dump_stream, ".\n");
3903 else if (comparison_code == EQ)
3905 if (loop_dump_stream)
3906 fprintf (loop_dump_stream,
3907 "Loop iterations: EQ comparison loop.\n");
3910 else if (GET_CODE (final_value) != CONST_INT)
3912 if (loop_dump_stream)
3914 fprintf (loop_dump_stream,
3915 "Loop iterations: Final value not constant ");
3916 print_rtl (loop_dump_stream, final_value);
3917 fprintf (loop_dump_stream, ".\n");
3922 /* Final_larger is 1 if final larger, 0 if they are equal, otherwise -1. */
3925 = ((unsigned HOST_WIDE_INT) INTVAL (final_value)
3926 > (unsigned HOST_WIDE_INT) INTVAL (initial_value))
3927 - ((unsigned HOST_WIDE_INT) INTVAL (final_value)
3928 < (unsigned HOST_WIDE_INT) INTVAL (initial_value));
3930 final_larger = (INTVAL (final_value) > INTVAL (initial_value))
3931 - (INTVAL (final_value) < INTVAL (initial_value));
3933 if (INTVAL (increment) > 0)
3935 else if (INTVAL (increment) == 0)
3940 /* There are 27 different cases: compare_dir = -1, 0, 1;
3941 final_larger = -1, 0, 1; increment_dir = -1, 0, 1.
3942 There are 4 normal cases, 4 reverse cases (where the iteration variable
3943 will overflow before the loop exits), 4 infinite loop cases, and 15
3944 immediate exit (0 or 1 iteration depending on loop type) cases.
3945 Only try to optimize the normal cases. */
3947 /* (compare_dir/final_larger/increment_dir)
3948 Normal cases: (0/-1/-1), (0/1/1), (-1/-1/-1), (1/1/1)
3949 Reverse cases: (0/-1/1), (0/1/-1), (-1/-1/1), (1/1/-1)
3950 Infinite loops: (0/-1/0), (0/1/0), (-1/-1/0), (1/1/0)
3951 Immediate exit: (0/0/X), (-1/0/X), (-1/1/X), (1/0/X), (1/-1/X) */
3953 /* ?? If the meaning of reverse loops (where the iteration variable
3954 will overflow before the loop exits) is undefined, then could
3955 eliminate all of these special checks, and just always assume
3956 the loops are normal/immediate/infinite. Note that this means
3957 the sign of increment_dir does not have to be known. Also,
3958 since it does not really hurt if immediate exit loops or infinite loops
3959 are optimized, then that case could be ignored also, and hence all
3960 loops can be optimized.
3962 According to ANSI Spec, the reverse loop case result is undefined,
3963 because the action on overflow is undefined.
3965 See also the special test for NE loops below. */
3967 if (final_larger == increment_dir && final_larger != 0
3968 && (final_larger == compare_dir || compare_dir == 0))
3973 if (loop_dump_stream)
3974 fprintf (loop_dump_stream,
3975 "Loop iterations: Not normal loop.\n");
3979 /* Calculate the number of iterations, final_value is only an approximation,
3980 so correct for that. Note that abs_diff and n_iterations are
3981 unsigned, because they can be as large as 2^n - 1. */
3983 abs_inc = INTVAL (increment);
3985 abs_diff = INTVAL (final_value) - INTVAL (initial_value);
3986 else if (abs_inc < 0)
3988 abs_diff = INTVAL (initial_value) - INTVAL (final_value);
3994 /* For NE tests, make sure that the iteration variable won't miss
3995 the final value. If abs_diff mod abs_incr is not zero, then the
3996 iteration variable will overflow before the loop exits, and we
3997 can not calculate the number of iterations. */
3998 if (compare_dir == 0 && (abs_diff % abs_inc) != 0)
4001 /* Note that the number of iterations could be calculated using
4002 (abs_diff + abs_inc - 1) / abs_inc, provided care was taken to
4003 handle potential overflow of the summation. */
4004 loop_info->n_iterations = abs_diff / abs_inc + ((abs_diff % abs_inc) != 0);
4005 return loop_info->n_iterations;
4009 /* Replace uses of split bivs with their split pseudo register. This is
4010 for original instructions which remain after loop unrolling without
4014 remap_split_bivs (x)
4017 register enum rtx_code code;
4024 code = GET_CODE (x);
4039 /* If non-reduced/final-value givs were split, then this would also
4040 have to remap those givs also. */
4042 if (REGNO (x) < max_reg_before_loop
4043 && REG_IV_TYPE (REGNO (x)) == BASIC_INDUCT)
4044 return reg_biv_class[REGNO (x)]->biv->src_reg;
4051 fmt = GET_RTX_FORMAT (code);
4052 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4055 XEXP (x, i) = remap_split_bivs (XEXP (x, i));
4059 for (j = 0; j < XVECLEN (x, i); j++)
4060 XVECEXP (x, i, j) = remap_split_bivs (XVECEXP (x, i, j));
4066 /* If FIRST_UID is a set of REGNO, and FIRST_UID dominates LAST_UID (e.g.
4067 FIST_UID is always executed if LAST_UID is), then return 1. Otherwise
4068 return 0. COPY_START is where we can start looking for the insns
4069 FIRST_UID and LAST_UID. COPY_END is where we stop looking for these
4072 If there is no JUMP_INSN between LOOP_START and FIRST_UID, then FIRST_UID
4073 must dominate LAST_UID.
4075 If there is a CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID
4076 may not dominate LAST_UID.
4078 If there is no CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID
4079 must dominate LAST_UID. */
4082 set_dominates_use (regno, first_uid, last_uid, copy_start, copy_end)
4089 int passed_jump = 0;
4090 rtx p = NEXT_INSN (copy_start);
4092 while (INSN_UID (p) != first_uid)
4094 if (GET_CODE (p) == JUMP_INSN)
4096 /* Could not find FIRST_UID. */
4102 /* Verify that FIRST_UID is an insn that entirely sets REGNO. */
4103 if (GET_RTX_CLASS (GET_CODE (p)) != 'i'
4104 || ! dead_or_set_regno_p (p, regno))
4107 /* FIRST_UID is always executed. */
4108 if (passed_jump == 0)
4111 while (INSN_UID (p) != last_uid)
4113 /* If we see a CODE_LABEL between FIRST_UID and LAST_UID, then we
4114 can not be sure that FIRST_UID dominates LAST_UID. */
4115 if (GET_CODE (p) == CODE_LABEL)
4117 /* Could not find LAST_UID, but we reached the end of the loop, so
4119 else if (p == copy_end)
4124 /* FIRST_UID is always executed if LAST_UID is executed. */