1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
5 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
33 #include "insn-config.h"
38 #include "langhooks.h"
42 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
43 unsigned HOST_WIDE_INT,
44 unsigned HOST_WIDE_INT, rtx);
45 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
46 unsigned HOST_WIDE_INT, rtx);
47 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
48 unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT, rtx, int);
51 static rtx mask_rtx (enum machine_mode, int, int, int);
52 static rtx lshift_value (enum machine_mode, rtx, int, int);
53 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT, int);
55 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
56 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
57 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
59 /* Test whether a value is zero of a power of two. */
60 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
62 /* Nonzero means divides or modulus operations are relatively cheap for
63 powers of two, so don't use branches; emit the operation instead.
64 Usually, this will mean that the MD file will emit non-branch
67 static bool sdiv_pow2_cheap[2][NUM_MACHINE_MODES];
68 static bool smod_pow2_cheap[2][NUM_MACHINE_MODES];
70 #ifndef SLOW_UNALIGNED_ACCESS
71 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
74 /* For compilers that support multiple targets with different word sizes,
75 MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD. An example
76 is the H8/300(H) compiler. */
78 #ifndef MAX_BITS_PER_WORD
79 #define MAX_BITS_PER_WORD BITS_PER_WORD
82 /* Reduce conditional compilation elsewhere. */
85 #define CODE_FOR_insv CODE_FOR_nothing
86 #define gen_insv(a,b,c,d) NULL_RTX
90 #define CODE_FOR_extv CODE_FOR_nothing
91 #define gen_extv(a,b,c,d) NULL_RTX
95 #define CODE_FOR_extzv CODE_FOR_nothing
96 #define gen_extzv(a,b,c,d) NULL_RTX
99 /* Cost of various pieces of RTL. Note that some of these are indexed by
100 shift count and some by mode. */
101 static int zero_cost[2];
102 static int add_cost[2][NUM_MACHINE_MODES];
103 static int neg_cost[2][NUM_MACHINE_MODES];
104 static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
105 static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
106 static int shiftsub_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
107 static int mul_cost[2][NUM_MACHINE_MODES];
108 static int sdiv_cost[2][NUM_MACHINE_MODES];
109 static int udiv_cost[2][NUM_MACHINE_MODES];
110 static int mul_widen_cost[2][NUM_MACHINE_MODES];
111 static int mul_highpart_cost[2][NUM_MACHINE_MODES];
118 struct rtx_def reg; rtunion reg_fld[2];
119 struct rtx_def plus; rtunion plus_fld1;
121 struct rtx_def mult; rtunion mult_fld1;
122 struct rtx_def sdiv; rtunion sdiv_fld1;
123 struct rtx_def udiv; rtunion udiv_fld1;
125 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
126 struct rtx_def smod_32; rtunion smod_32_fld1;
127 struct rtx_def wide_mult; rtunion wide_mult_fld1;
128 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
129 struct rtx_def wide_trunc;
130 struct rtx_def shift; rtunion shift_fld1;
131 struct rtx_def shift_mult; rtunion shift_mult_fld1;
132 struct rtx_def shift_add; rtunion shift_add_fld1;
133 struct rtx_def shift_sub; rtunion shift_sub_fld1;
136 rtx pow2[MAX_BITS_PER_WORD];
137 rtx cint[MAX_BITS_PER_WORD];
139 enum machine_mode mode, wider_mode;
143 for (m = 1; m < MAX_BITS_PER_WORD; m++)
145 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
146 cint[m] = GEN_INT (m);
148 memset (&all, 0, sizeof all);
150 PUT_CODE (&all.reg, REG);
151 /* Avoid using hard regs in ways which may be unsupported. */
152 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
154 PUT_CODE (&all.plus, PLUS);
155 XEXP (&all.plus, 0) = &all.reg;
156 XEXP (&all.plus, 1) = &all.reg;
158 PUT_CODE (&all.neg, NEG);
159 XEXP (&all.neg, 0) = &all.reg;
161 PUT_CODE (&all.mult, MULT);
162 XEXP (&all.mult, 0) = &all.reg;
163 XEXP (&all.mult, 1) = &all.reg;
165 PUT_CODE (&all.sdiv, DIV);
166 XEXP (&all.sdiv, 0) = &all.reg;
167 XEXP (&all.sdiv, 1) = &all.reg;
169 PUT_CODE (&all.udiv, UDIV);
170 XEXP (&all.udiv, 0) = &all.reg;
171 XEXP (&all.udiv, 1) = &all.reg;
173 PUT_CODE (&all.sdiv_32, DIV);
174 XEXP (&all.sdiv_32, 0) = &all.reg;
175 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
177 PUT_CODE (&all.smod_32, MOD);
178 XEXP (&all.smod_32, 0) = &all.reg;
179 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
181 PUT_CODE (&all.zext, ZERO_EXTEND);
182 XEXP (&all.zext, 0) = &all.reg;
184 PUT_CODE (&all.wide_mult, MULT);
185 XEXP (&all.wide_mult, 0) = &all.zext;
186 XEXP (&all.wide_mult, 1) = &all.zext;
188 PUT_CODE (&all.wide_lshr, LSHIFTRT);
189 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
191 PUT_CODE (&all.wide_trunc, TRUNCATE);
192 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
194 PUT_CODE (&all.shift, ASHIFT);
195 XEXP (&all.shift, 0) = &all.reg;
197 PUT_CODE (&all.shift_mult, MULT);
198 XEXP (&all.shift_mult, 0) = &all.reg;
200 PUT_CODE (&all.shift_add, PLUS);
201 XEXP (&all.shift_add, 0) = &all.shift_mult;
202 XEXP (&all.shift_add, 1) = &all.reg;
204 PUT_CODE (&all.shift_sub, MINUS);
205 XEXP (&all.shift_sub, 0) = &all.shift_mult;
206 XEXP (&all.shift_sub, 1) = &all.reg;
208 for (speed = 0; speed < 2; speed++)
210 crtl->maybe_hot_insn_p = speed;
211 zero_cost[speed] = rtx_cost (const0_rtx, 0, speed);
213 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
215 mode = GET_MODE_WIDER_MODE (mode))
217 PUT_MODE (&all.reg, mode);
218 PUT_MODE (&all.plus, mode);
219 PUT_MODE (&all.neg, mode);
220 PUT_MODE (&all.mult, mode);
221 PUT_MODE (&all.sdiv, mode);
222 PUT_MODE (&all.udiv, mode);
223 PUT_MODE (&all.sdiv_32, mode);
224 PUT_MODE (&all.smod_32, mode);
225 PUT_MODE (&all.wide_trunc, mode);
226 PUT_MODE (&all.shift, mode);
227 PUT_MODE (&all.shift_mult, mode);
228 PUT_MODE (&all.shift_add, mode);
229 PUT_MODE (&all.shift_sub, mode);
231 add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
232 neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
233 mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
234 sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
235 udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
237 sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
238 <= 2 * add_cost[speed][mode]);
239 smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
240 <= 4 * add_cost[speed][mode]);
242 wider_mode = GET_MODE_WIDER_MODE (mode);
243 if (wider_mode != VOIDmode)
245 PUT_MODE (&all.zext, wider_mode);
246 PUT_MODE (&all.wide_mult, wider_mode);
247 PUT_MODE (&all.wide_lshr, wider_mode);
248 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
250 mul_widen_cost[speed][wider_mode]
251 = rtx_cost (&all.wide_mult, SET, speed);
252 mul_highpart_cost[speed][mode]
253 = rtx_cost (&all.wide_trunc, SET, speed);
256 shift_cost[speed][mode][0] = 0;
257 shiftadd_cost[speed][mode][0] = shiftsub_cost[speed][mode][0]
258 = add_cost[speed][mode];
260 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
261 for (m = 1; m < n; m++)
263 XEXP (&all.shift, 1) = cint[m];
264 XEXP (&all.shift_mult, 1) = pow2[m];
266 shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
267 shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
268 shiftsub_cost[speed][mode][m] = rtx_cost (&all.shift_sub, SET, speed);
272 default_rtl_profile ();
275 /* Return an rtx representing minus the value of X.
276 MODE is the intended mode of the result,
277 useful if X is a CONST_INT. */
280 negate_rtx (enum machine_mode mode, rtx x)
282 rtx result = simplify_unary_operation (NEG, mode, x, mode);
285 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
290 /* Report on the availability of insv/extv/extzv and the desired mode
291 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
292 is false; else the mode of the specified operand. If OPNO is -1,
293 all the caller cares about is whether the insn is available. */
295 mode_for_extraction (enum extraction_pattern pattern, int opno)
297 const struct insn_data *data;
304 data = &insn_data[CODE_FOR_insv];
307 return MAX_MACHINE_MODE;
312 data = &insn_data[CODE_FOR_extv];
315 return MAX_MACHINE_MODE;
320 data = &insn_data[CODE_FOR_extzv];
323 return MAX_MACHINE_MODE;
332 /* Everyone who uses this function used to follow it with
333 if (result == VOIDmode) result = word_mode; */
334 if (data->operand[opno].mode == VOIDmode)
336 return data->operand[opno].mode;
339 /* Return true if X, of mode MODE, matches the predicate for operand
340 OPNO of instruction ICODE. Allow volatile memories, regardless of
341 the ambient volatile_ok setting. */
344 check_predicate_volatile_ok (enum insn_code icode, int opno,
345 rtx x, enum machine_mode mode)
347 bool save_volatile_ok, result;
349 save_volatile_ok = volatile_ok;
350 result = insn_data[(int) icode].operand[opno].predicate (x, mode);
351 volatile_ok = save_volatile_ok;
355 /* A subroutine of store_bit_field, with the same arguments. Return true
356 if the operation could be implemented.
358 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
359 no other way of implementing the operation. If FALLBACK_P is false,
360 return false instead. */
363 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
364 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
365 rtx value, bool fallback_p)
368 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
369 unsigned HOST_WIDE_INT offset, bitpos;
374 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
376 while (GET_CODE (op0) == SUBREG)
378 /* The following line once was done only if WORDS_BIG_ENDIAN,
379 but I think that is a mistake. WORDS_BIG_ENDIAN is
380 meaningful at a much higher level; when structures are copied
381 between memory and regs, the higher-numbered regs
382 always get higher addresses. */
383 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
384 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
388 /* Paradoxical subregs need special handling on big endian machines. */
389 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
391 int difference = inner_mode_size - outer_mode_size;
393 if (WORDS_BIG_ENDIAN)
394 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
395 if (BYTES_BIG_ENDIAN)
396 byte_offset += difference % UNITS_PER_WORD;
399 byte_offset = SUBREG_BYTE (op0);
401 bitnum += byte_offset * BITS_PER_UNIT;
402 op0 = SUBREG_REG (op0);
405 /* No action is needed if the target is a register and if the field
406 lies completely outside that register. This can occur if the source
407 code contains an out-of-bounds access to a small array. */
408 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
411 /* Use vec_set patterns for inserting parts of vectors whenever
413 if (VECTOR_MODE_P (GET_MODE (op0))
415 && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code
417 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
418 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
419 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
421 enum machine_mode outermode = GET_MODE (op0);
422 enum machine_mode innermode = GET_MODE_INNER (outermode);
423 int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code;
424 int pos = bitnum / GET_MODE_BITSIZE (innermode);
425 rtx rtxpos = GEN_INT (pos);
429 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
430 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
431 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
435 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
436 src = copy_to_mode_reg (mode1, src);
438 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
439 rtxpos = copy_to_mode_reg (mode1, rtxpos);
441 /* We could handle this, but we should always be called with a pseudo
442 for our targets and all insns should take them as outputs. */
443 gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
444 && (*insn_data[icode].operand[1].predicate) (src, mode1)
445 && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
446 pat = GEN_FCN (icode) (dest, src, rtxpos);
457 /* If the target is a register, overwriting the entire object, or storing
458 a full-word or multi-word field can be done with just a SUBREG.
460 If the target is memory, storing any naturally aligned field can be
461 done with a simple store. For targets that support fast unaligned
462 memory, any naturally sized, unit aligned field can be done directly. */
464 offset = bitnum / unit;
465 bitpos = bitnum % unit;
466 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
467 + (offset * UNITS_PER_WORD);
470 && bitsize == GET_MODE_BITSIZE (fieldmode)
472 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
473 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
474 && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
475 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
476 || (offset * BITS_PER_UNIT % bitsize == 0
477 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
480 op0 = adjust_address (op0, fieldmode, offset);
481 else if (GET_MODE (op0) != fieldmode)
482 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
484 emit_move_insn (op0, value);
488 /* Make sure we are playing with integral modes. Pun with subregs
489 if we aren't. This must come after the entire register case above,
490 since that case is valid for any mode. The following cases are only
491 valid for integral modes. */
493 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
494 if (imode != GET_MODE (op0))
497 op0 = adjust_address (op0, imode, 0);
500 gcc_assert (imode != BLKmode);
501 op0 = gen_lowpart (imode, op0);
506 /* We may be accessing data outside the field, which means
507 we can alias adjacent data. */
510 op0 = shallow_copy_rtx (op0);
511 set_mem_alias_set (op0, 0);
512 set_mem_expr (op0, 0);
515 /* If OP0 is a register, BITPOS must count within a word.
516 But as we have it, it counts within whatever size OP0 now has.
517 On a bigendian machine, these are not the same, so convert. */
520 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
521 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
523 /* Storing an lsb-aligned field in a register
524 can be done with a movestrict instruction. */
527 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
528 && bitsize == GET_MODE_BITSIZE (fieldmode)
529 && (optab_handler (movstrict_optab, fieldmode)->insn_code
530 != CODE_FOR_nothing))
532 int icode = optab_handler (movstrict_optab, fieldmode)->insn_code;
534 rtx start = get_last_insn ();
536 /* Get appropriate low part of the value being stored. */
537 if (GET_CODE (value) == CONST_INT || REG_P (value))
538 value = gen_lowpart (fieldmode, value);
539 else if (!(GET_CODE (value) == SYMBOL_REF
540 || GET_CODE (value) == LABEL_REF
541 || GET_CODE (value) == CONST))
542 value = convert_to_mode (fieldmode, value, 0);
544 if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
545 value = copy_to_mode_reg (fieldmode, value);
547 if (GET_CODE (op0) == SUBREG)
549 /* Else we've got some float mode source being extracted into
550 a different float mode destination -- this combination of
551 subregs results in Severe Tire Damage. */
552 gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
553 || GET_MODE_CLASS (fieldmode) == MODE_INT
554 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
555 op0 = SUBREG_REG (op0);
558 insn = (GEN_FCN (icode)
559 (gen_rtx_SUBREG (fieldmode, op0,
560 (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
561 + (offset * UNITS_PER_WORD)),
568 delete_insns_since (start);
571 /* Handle fields bigger than a word. */
573 if (bitsize > BITS_PER_WORD)
575 /* Here we transfer the words of the field
576 in the order least significant first.
577 This is because the most significant word is the one which may
579 However, only do that if the value is not BLKmode. */
581 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
582 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
586 /* This is the mode we must force value to, so that there will be enough
587 subwords to extract. Note that fieldmode will often (always?) be
588 VOIDmode, because that is what store_field uses to indicate that this
589 is a bit field, but passing VOIDmode to operand_subword_force
591 fieldmode = GET_MODE (value);
592 if (fieldmode == VOIDmode)
593 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
595 last = get_last_insn ();
596 for (i = 0; i < nwords; i++)
598 /* If I is 0, use the low-order word in both field and target;
599 if I is 1, use the next to lowest word; and so on. */
600 unsigned int wordnum = (backwards ? nwords - i - 1 : i);
601 unsigned int bit_offset = (backwards
602 ? MAX ((int) bitsize - ((int) i + 1)
605 : (int) i * BITS_PER_WORD);
606 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
608 if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
609 bitsize - i * BITS_PER_WORD),
610 bitnum + bit_offset, word_mode,
611 value_word, fallback_p))
613 delete_insns_since (last);
620 /* From here on we can assume that the field to be stored in is
621 a full-word (whatever type that is), since it is shorter than a word. */
623 /* OFFSET is the number of words or bytes (UNIT says which)
624 from STR_RTX to the first word or byte containing part of the field. */
629 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
633 /* Since this is a destination (lvalue), we can't copy
634 it to a pseudo. We can remove a SUBREG that does not
635 change the size of the operand. Such a SUBREG may
636 have been added above. */
637 gcc_assert (GET_CODE (op0) == SUBREG
638 && (GET_MODE_SIZE (GET_MODE (op0))
639 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
640 op0 = SUBREG_REG (op0);
642 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
643 op0, (offset * UNITS_PER_WORD));
648 /* If VALUE has a floating-point or complex mode, access it as an
649 integer of the corresponding size. This can occur on a machine
650 with 64 bit registers that uses SFmode for float. It can also
651 occur for unaligned float or complex fields. */
653 if (GET_MODE (value) != VOIDmode
654 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
655 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
657 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
658 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
661 /* Now OFFSET is nonzero only if OP0 is memory
662 and is therefore always measured in bytes. */
665 && GET_MODE (value) != BLKmode
667 && GET_MODE_BITSIZE (op_mode) >= bitsize
668 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
669 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
670 && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
672 && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
674 int xbitpos = bitpos;
677 rtx last = get_last_insn ();
680 /* Add OFFSET into OP0's address. */
682 xop0 = adjust_address (xop0, byte_mode, offset);
684 /* If xop0 is a register, we need it in OP_MODE
685 to make it acceptable to the format of insv. */
686 if (GET_CODE (xop0) == SUBREG)
687 /* We can't just change the mode, because this might clobber op0,
688 and we will need the original value of op0 if insv fails. */
689 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
690 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
691 xop0 = gen_rtx_SUBREG (op_mode, xop0, 0);
693 /* On big-endian machines, we count bits from the most significant.
694 If the bit field insn does not, we must invert. */
696 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
697 xbitpos = unit - bitsize - xbitpos;
699 /* We have been counting XBITPOS within UNIT.
700 Count instead within the size of the register. */
701 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
702 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
704 unit = GET_MODE_BITSIZE (op_mode);
706 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
708 if (GET_MODE (value) != op_mode)
710 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
712 /* Optimization: Don't bother really extending VALUE
713 if it has all the bits we will actually use. However,
714 if we must narrow it, be sure we do it correctly. */
716 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
720 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
722 tmp = simplify_gen_subreg (op_mode,
723 force_reg (GET_MODE (value),
725 GET_MODE (value), 0);
729 value1 = gen_lowpart (op_mode, value1);
731 else if (GET_CODE (value) == CONST_INT)
732 value1 = gen_int_mode (INTVAL (value), op_mode);
734 /* Parse phase is supposed to make VALUE's data type
735 match that of the component reference, which is a type
736 at least as wide as the field; so VALUE should have
737 a mode that corresponds to that type. */
738 gcc_assert (CONSTANT_P (value));
741 /* If this machine's insv insists on a register,
742 get VALUE1 into a register. */
743 if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
745 value1 = force_reg (op_mode, value1);
747 pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
753 delete_insns_since (last);
756 /* If OP0 is a memory, try copying it to a register and seeing if a
757 cheap register alternative is available. */
758 if (HAVE_insv && MEM_P (op0))
760 enum machine_mode bestmode;
762 /* Get the mode to use for inserting into this field. If OP0 is
763 BLKmode, get the smallest mode consistent with the alignment. If
764 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
765 mode. Otherwise, use the smallest mode containing the field. */
767 if (GET_MODE (op0) == BLKmode
768 || (op_mode != MAX_MACHINE_MODE
769 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
770 bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
771 (op_mode == MAX_MACHINE_MODE
772 ? VOIDmode : op_mode),
773 MEM_VOLATILE_P (op0));
775 bestmode = GET_MODE (op0);
777 if (bestmode != VOIDmode
778 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
779 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
780 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
782 rtx last, tempreg, xop0;
783 unsigned HOST_WIDE_INT xoffset, xbitpos;
785 last = get_last_insn ();
787 /* Adjust address to point to the containing unit of
788 that mode. Compute the offset as a multiple of this unit,
789 counting in bytes. */
790 unit = GET_MODE_BITSIZE (bestmode);
791 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
792 xbitpos = bitnum % unit;
793 xop0 = adjust_address (op0, bestmode, xoffset);
795 /* Fetch that unit, store the bitfield in it, then store
797 tempreg = copy_to_reg (xop0);
798 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
799 fieldmode, orig_value, false))
801 emit_move_insn (xop0, tempreg);
804 delete_insns_since (last);
811 store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
815 /* Generate code to store value from rtx VALUE
816 into a bit-field within structure STR_RTX
817 containing BITSIZE bits starting at bit BITNUM.
818 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
821 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
822 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
825 if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
829 /* Use shifts and boolean operations to store VALUE
830 into a bit field of width BITSIZE
831 in a memory location specified by OP0 except offset by OFFSET bytes.
832 (OFFSET must be 0 if OP0 is a register.)
833 The field starts at position BITPOS within the byte.
834 (If OP0 is a register, it may be a full word or a narrower mode,
835 but BITPOS still counts within a full word,
836 which is significant on bigendian machines.) */
839 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
840 unsigned HOST_WIDE_INT bitsize,
841 unsigned HOST_WIDE_INT bitpos, rtx value)
843 enum machine_mode mode;
844 unsigned int total_bits = BITS_PER_WORD;
849 /* There is a case not handled here:
850 a structure with a known alignment of just a halfword
851 and a field split across two aligned halfwords within the structure.
852 Or likewise a structure with a known alignment of just a byte
853 and a field split across two bytes.
854 Such cases are not supposed to be able to occur. */
856 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
858 gcc_assert (!offset);
859 /* Special treatment for a bit field split across two registers. */
860 if (bitsize + bitpos > BITS_PER_WORD)
862 store_split_bit_field (op0, bitsize, bitpos, value);
868 /* Get the proper mode to use for this field. We want a mode that
869 includes the entire field. If such a mode would be larger than
870 a word, we won't be doing the extraction the normal way.
871 We don't want a mode bigger than the destination. */
873 mode = GET_MODE (op0);
874 if (GET_MODE_BITSIZE (mode) == 0
875 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
877 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
878 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
880 if (mode == VOIDmode)
882 /* The only way this should occur is if the field spans word
884 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
889 total_bits = GET_MODE_BITSIZE (mode);
891 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
892 be in the range 0 to total_bits-1, and put any excess bytes in
894 if (bitpos >= total_bits)
896 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
897 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
901 /* Get ref to an aligned byte, halfword, or word containing the field.
902 Adjust BITPOS to be position within a word,
903 and OFFSET to be the offset of that word.
904 Then alter OP0 to refer to that word. */
905 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
906 offset -= (offset % (total_bits / BITS_PER_UNIT));
907 op0 = adjust_address (op0, mode, offset);
910 mode = GET_MODE (op0);
912 /* Now MODE is either some integral mode for a MEM as OP0,
913 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
914 The bit field is contained entirely within OP0.
915 BITPOS is the starting bit number within OP0.
916 (OP0's mode may actually be narrower than MODE.) */
918 if (BYTES_BIG_ENDIAN)
919 /* BITPOS is the distance between our msb
920 and that of the containing datum.
921 Convert it to the distance from the lsb. */
922 bitpos = total_bits - bitsize - bitpos;
924 /* Now BITPOS is always the distance between our lsb
927 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
928 we must first convert its mode to MODE. */
930 if (GET_CODE (value) == CONST_INT)
932 HOST_WIDE_INT v = INTVAL (value);
934 if (bitsize < HOST_BITS_PER_WIDE_INT)
935 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
939 else if ((bitsize < HOST_BITS_PER_WIDE_INT
940 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
941 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
944 value = lshift_value (mode, value, bitpos, bitsize);
948 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
949 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
951 if (GET_MODE (value) != mode)
953 if ((REG_P (value) || GET_CODE (value) == SUBREG)
954 && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
955 value = gen_lowpart (mode, value);
957 value = convert_to_mode (mode, value, 1);
961 value = expand_binop (mode, and_optab, value,
962 mask_rtx (mode, 0, bitsize, 0),
963 NULL_RTX, 1, OPTAB_LIB_WIDEN);
965 value = expand_shift (LSHIFT_EXPR, mode, value,
966 build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
969 /* Now clear the chosen bits in OP0,
970 except that if VALUE is -1 we need not bother. */
971 /* We keep the intermediates in registers to allow CSE to combine
972 consecutive bitfield assignments. */
974 temp = force_reg (mode, op0);
978 temp = expand_binop (mode, and_optab, temp,
979 mask_rtx (mode, bitpos, bitsize, 1),
980 NULL_RTX, 1, OPTAB_LIB_WIDEN);
981 temp = force_reg (mode, temp);
984 /* Now logical-or VALUE into OP0, unless it is zero. */
988 temp = expand_binop (mode, ior_optab, temp, value,
989 NULL_RTX, 1, OPTAB_LIB_WIDEN);
990 temp = force_reg (mode, temp);
995 op0 = copy_rtx (op0);
996 emit_move_insn (op0, temp);
1000 /* Store a bit field that is split across multiple accessible memory objects.
1002 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1003 BITSIZE is the field width; BITPOS the position of its first bit
1005 VALUE is the value to store.
1007 This does not yet handle fields wider than BITS_PER_WORD. */
1010 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1011 unsigned HOST_WIDE_INT bitpos, rtx value)
1014 unsigned int bitsdone = 0;
1016 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1018 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1019 unit = BITS_PER_WORD;
1021 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1023 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1024 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1025 that VALUE might be a floating-point constant. */
1026 if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
1028 rtx word = gen_lowpart_common (word_mode, value);
1030 if (word && (value != word))
1033 value = gen_lowpart_common (word_mode,
1034 force_reg (GET_MODE (value) != VOIDmode
1036 : word_mode, value));
1039 while (bitsdone < bitsize)
1041 unsigned HOST_WIDE_INT thissize;
1043 unsigned HOST_WIDE_INT thispos;
1044 unsigned HOST_WIDE_INT offset;
1046 offset = (bitpos + bitsdone) / unit;
1047 thispos = (bitpos + bitsdone) % unit;
1049 /* THISSIZE must not overrun a word boundary. Otherwise,
1050 store_fixed_bit_field will call us again, and we will mutually
1052 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1053 thissize = MIN (thissize, unit - thispos);
1055 if (BYTES_BIG_ENDIAN)
1059 /* We must do an endian conversion exactly the same way as it is
1060 done in extract_bit_field, so that the two calls to
1061 extract_fixed_bit_field will have comparable arguments. */
1062 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1063 total_bits = BITS_PER_WORD;
1065 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1067 /* Fetch successively less significant portions. */
1068 if (GET_CODE (value) == CONST_INT)
1069 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1070 >> (bitsize - bitsdone - thissize))
1071 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1073 /* The args are chosen so that the last part includes the
1074 lsb. Give extract_bit_field the value it needs (with
1075 endianness compensation) to fetch the piece we want. */
1076 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1077 total_bits - bitsize + bitsdone,
1082 /* Fetch successively more significant portions. */
1083 if (GET_CODE (value) == CONST_INT)
1084 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1086 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1088 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1089 bitsdone, NULL_RTX, 1);
1092 /* If OP0 is a register, then handle OFFSET here.
1094 When handling multiword bitfields, extract_bit_field may pass
1095 down a word_mode SUBREG of a larger REG for a bitfield that actually
1096 crosses a word boundary. Thus, for a SUBREG, we must find
1097 the current word starting from the base register. */
1098 if (GET_CODE (op0) == SUBREG)
1100 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1101 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1102 GET_MODE (SUBREG_REG (op0)));
1105 else if (REG_P (op0))
1107 word = operand_subword_force (op0, offset, GET_MODE (op0));
1113 /* OFFSET is in UNITs, and UNIT is in bits.
1114 store_fixed_bit_field wants offset in bytes. */
1115 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1117 bitsdone += thissize;
1121 /* A subroutine of extract_bit_field_1 that converts return value X
1122 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1123 to extract_bit_field. */
1126 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1127 enum machine_mode tmode, bool unsignedp)
1129 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1132 /* If the x mode is not a scalar integral, first convert to the
1133 integer mode of that size and then access it as a floating-point
1134 value via a SUBREG. */
1135 if (!SCALAR_INT_MODE_P (tmode))
1137 enum machine_mode smode;
1139 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1140 x = convert_to_mode (smode, x, unsignedp);
1141 x = force_reg (smode, x);
1142 return gen_lowpart (tmode, x);
1145 return convert_to_mode (tmode, x, unsignedp);
1148 /* A subroutine of extract_bit_field, with the same arguments.
1149 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1150 if we can find no other means of implementing the operation.
1151 if FALLBACK_P is false, return NULL instead. */
1154 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1155 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1156 enum machine_mode mode, enum machine_mode tmode,
1160 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1161 unsigned HOST_WIDE_INT offset, bitpos;
1163 enum machine_mode int_mode;
1164 enum machine_mode ext_mode;
1165 enum machine_mode mode1;
1166 enum insn_code icode;
1169 if (tmode == VOIDmode)
1172 while (GET_CODE (op0) == SUBREG)
1174 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1175 op0 = SUBREG_REG (op0);
1178 /* If we have an out-of-bounds access to a register, just return an
1179 uninitialized register of the required mode. This can occur if the
1180 source code contains an out-of-bounds access to a small array. */
1181 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1182 return gen_reg_rtx (tmode);
1185 && mode == GET_MODE (op0)
1187 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1189 /* We're trying to extract a full register from itself. */
1193 /* See if we can get a better vector mode before extracting. */
1194 if (VECTOR_MODE_P (GET_MODE (op0))
1196 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1198 enum machine_mode new_mode;
1199 int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1201 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1202 new_mode = MIN_MODE_VECTOR_FLOAT;
1203 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1204 new_mode = MIN_MODE_VECTOR_FRACT;
1205 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1206 new_mode = MIN_MODE_VECTOR_UFRACT;
1207 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1208 new_mode = MIN_MODE_VECTOR_ACCUM;
1209 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1210 new_mode = MIN_MODE_VECTOR_UACCUM;
1212 new_mode = MIN_MODE_VECTOR_INT;
1214 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1215 if (GET_MODE_NUNITS (new_mode) == nunits
1216 && GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1217 && targetm.vector_mode_supported_p (new_mode))
1219 if (new_mode != VOIDmode)
1220 op0 = gen_lowpart (new_mode, op0);
1223 /* Use vec_extract patterns for extracting parts of vectors whenever
1225 if (VECTOR_MODE_P (GET_MODE (op0))
1227 && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code
1228 != CODE_FOR_nothing)
1229 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1230 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1232 enum machine_mode outermode = GET_MODE (op0);
1233 enum machine_mode innermode = GET_MODE_INNER (outermode);
1234 int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code;
1235 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1236 rtx rtxpos = GEN_INT (pos);
1238 rtx dest = NULL, pat, seq;
1239 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1240 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1241 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1243 if (innermode == tmode || innermode == mode)
1247 dest = gen_reg_rtx (innermode);
1251 if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1252 dest = copy_to_mode_reg (mode0, dest);
1254 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1255 src = copy_to_mode_reg (mode1, src);
1257 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1258 rtxpos = copy_to_mode_reg (mode1, rtxpos);
1260 /* We could handle this, but we should always be called with a pseudo
1261 for our targets and all insns should take them as outputs. */
1262 gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1263 && (*insn_data[icode].operand[1].predicate) (src, mode1)
1264 && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1266 pat = GEN_FCN (icode) (dest, src, rtxpos);
1274 return gen_lowpart (tmode, dest);
1279 /* Make sure we are playing with integral modes. Pun with subregs
1282 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1283 if (imode != GET_MODE (op0))
1286 op0 = adjust_address (op0, imode, 0);
1289 gcc_assert (imode != BLKmode);
1290 op0 = gen_lowpart (imode, op0);
1292 /* If we got a SUBREG, force it into a register since we
1293 aren't going to be able to do another SUBREG on it. */
1294 if (GET_CODE (op0) == SUBREG)
1295 op0 = force_reg (imode, op0);
1300 /* We may be accessing data outside the field, which means
1301 we can alias adjacent data. */
1304 op0 = shallow_copy_rtx (op0);
1305 set_mem_alias_set (op0, 0);
1306 set_mem_expr (op0, 0);
1309 /* Extraction of a full-word or multi-word value from a structure
1310 in a register or aligned memory can be done with just a SUBREG.
1311 A subword value in the least significant part of a register
1312 can also be extracted with a SUBREG. For this, we need the
1313 byte offset of the value in op0. */
1315 bitpos = bitnum % unit;
1316 offset = bitnum / unit;
1317 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1319 /* If OP0 is a register, BITPOS must count within a word.
1320 But as we have it, it counts within whatever size OP0 now has.
1321 On a bigendian machine, these are not the same, so convert. */
1322 if (BYTES_BIG_ENDIAN
1324 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1325 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1327 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1328 If that's wrong, the solution is to test for it and set TARGET to 0
1331 /* Only scalar integer modes can be converted via subregs. There is an
1332 additional problem for FP modes here in that they can have a precision
1333 which is different from the size. mode_for_size uses precision, but
1334 we want a mode based on the size, so we must avoid calling it for FP
1336 mode1 = (SCALAR_INT_MODE_P (tmode)
1337 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1340 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1341 && bitpos % BITS_PER_WORD == 0)
1342 || (mode1 != BLKmode
1343 /* ??? The big endian test here is wrong. This is correct
1344 if the value is in a register, and if mode_for_size is not
1345 the same mode as op0. This causes us to get unnecessarily
1346 inefficient code from the Thumb port when -mbig-endian. */
1347 && (BYTES_BIG_ENDIAN
1348 ? bitpos + bitsize == BITS_PER_WORD
1351 && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1352 GET_MODE_BITSIZE (GET_MODE (op0)))
1353 && GET_MODE_SIZE (mode1) != 0
1354 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1356 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1357 || (offset * BITS_PER_UNIT % bitsize == 0
1358 && MEM_ALIGN (op0) % bitsize == 0)))))
1361 op0 = adjust_address (op0, mode1, offset);
1362 else if (mode1 != GET_MODE (op0))
1364 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1367 goto no_subreg_mode_swap;
1371 return convert_to_mode (tmode, op0, unsignedp);
1374 no_subreg_mode_swap:
1376 /* Handle fields bigger than a word. */
1378 if (bitsize > BITS_PER_WORD)
1380 /* Here we transfer the words of the field
1381 in the order least significant first.
1382 This is because the most significant word is the one which may
1383 be less than full. */
1385 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1388 if (target == 0 || !REG_P (target))
1389 target = gen_reg_rtx (mode);
1391 /* Indicate for flow that the entire target reg is being set. */
1392 emit_clobber (target);
1394 for (i = 0; i < nwords; i++)
1396 /* If I is 0, use the low-order word in both field and target;
1397 if I is 1, use the next to lowest word; and so on. */
1398 /* Word number in TARGET to use. */
1399 unsigned int wordnum
1401 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1403 /* Offset from start of field in OP0. */
1404 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1405 ? MAX (0, ((int) bitsize - ((int) i + 1)
1406 * (int) BITS_PER_WORD))
1407 : (int) i * BITS_PER_WORD);
1408 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1410 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1411 bitsize - i * BITS_PER_WORD),
1412 bitnum + bit_offset, 1, target_part, mode,
1415 gcc_assert (target_part);
1417 if (result_part != target_part)
1418 emit_move_insn (target_part, result_part);
1423 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1424 need to be zero'd out. */
1425 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1427 unsigned int i, total_words;
1429 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1430 for (i = nwords; i < total_words; i++)
1432 (operand_subword (target,
1433 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1440 /* Signed bit field: sign-extend with two arithmetic shifts. */
1441 target = expand_shift (LSHIFT_EXPR, mode, target,
1442 build_int_cst (NULL_TREE,
1443 GET_MODE_BITSIZE (mode) - bitsize),
1445 return expand_shift (RSHIFT_EXPR, mode, target,
1446 build_int_cst (NULL_TREE,
1447 GET_MODE_BITSIZE (mode) - bitsize),
1451 /* From here on we know the desired field is smaller than a word. */
1453 /* Check if there is a correspondingly-sized integer field, so we can
1454 safely extract it as one size of integer, if necessary; then
1455 truncate or extend to the size that is wanted; then use SUBREGs or
1456 convert_to_mode to get one of the modes we really wanted. */
1458 int_mode = int_mode_for_mode (tmode);
1459 if (int_mode == BLKmode)
1460 int_mode = int_mode_for_mode (mode);
1461 /* Should probably push op0 out to memory and then do a load. */
1462 gcc_assert (int_mode != BLKmode);
1464 /* OFFSET is the number of words or bytes (UNIT says which)
1465 from STR_RTX to the first word or byte containing part of the field. */
1469 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1472 op0 = copy_to_reg (op0);
1473 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1474 op0, (offset * UNITS_PER_WORD));
1479 /* Now OFFSET is nonzero only for memory operands. */
1480 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1481 icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1482 if (ext_mode != MAX_MACHINE_MODE
1484 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1485 /* If op0 is a register, we need it in EXT_MODE to make it
1486 acceptable to the format of ext(z)v. */
1487 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1488 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1489 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1490 && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1492 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1493 rtx bitsize_rtx, bitpos_rtx;
1494 rtx last = get_last_insn ();
1496 rtx xtarget = target;
1497 rtx xspec_target = target;
1498 rtx xspec_target_subreg = 0;
1501 /* If op0 is a register, we need it in EXT_MODE to make it
1502 acceptable to the format of ext(z)v. */
1503 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1504 xop0 = gen_rtx_SUBREG (ext_mode, xop0, 0);
1506 /* Get ref to first byte containing part of the field. */
1507 xop0 = adjust_address (xop0, byte_mode, xoffset);
1509 /* On big-endian machines, we count bits from the most significant.
1510 If the bit field insn does not, we must invert. */
1511 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1512 xbitpos = unit - bitsize - xbitpos;
1514 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1515 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1516 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1518 unit = GET_MODE_BITSIZE (ext_mode);
1521 xtarget = xspec_target = gen_reg_rtx (tmode);
1523 if (GET_MODE (xtarget) != ext_mode)
1525 if (REG_P (xtarget))
1527 xtarget = gen_lowpart (ext_mode, xtarget);
1528 if (GET_MODE_SIZE (ext_mode)
1529 > GET_MODE_SIZE (GET_MODE (xspec_target)))
1530 xspec_target_subreg = xtarget;
1533 xtarget = gen_reg_rtx (ext_mode);
1536 /* If this machine's ext(z)v insists on a register target,
1537 make sure we have one. */
1538 if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1539 xtarget = gen_reg_rtx (ext_mode);
1541 bitsize_rtx = GEN_INT (bitsize);
1542 bitpos_rtx = GEN_INT (xbitpos);
1545 ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1546 : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1550 if (xtarget == xspec_target)
1552 if (xtarget == xspec_target_subreg)
1553 return xspec_target;
1554 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1556 delete_insns_since (last);
1559 /* If OP0 is a memory, try copying it to a register and seeing if a
1560 cheap register alternative is available. */
1561 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1563 enum machine_mode bestmode;
1565 /* Get the mode to use for inserting into this field. If
1566 OP0 is BLKmode, get the smallest mode consistent with the
1567 alignment. If OP0 is a non-BLKmode object that is no
1568 wider than EXT_MODE, use its mode. Otherwise, use the
1569 smallest mode containing the field. */
1571 if (GET_MODE (op0) == BLKmode
1572 || (ext_mode != MAX_MACHINE_MODE
1573 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1574 bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1575 (ext_mode == MAX_MACHINE_MODE
1576 ? VOIDmode : ext_mode),
1577 MEM_VOLATILE_P (op0));
1579 bestmode = GET_MODE (op0);
1581 if (bestmode != VOIDmode
1582 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1583 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1585 unsigned HOST_WIDE_INT xoffset, xbitpos;
1587 /* Compute the offset as a multiple of this unit,
1588 counting in bytes. */
1589 unit = GET_MODE_BITSIZE (bestmode);
1590 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1591 xbitpos = bitnum % unit;
1593 /* Make sure the register is big enough for the whole field. */
1594 if (xoffset * BITS_PER_UNIT + unit
1595 >= offset * BITS_PER_UNIT + bitsize)
1597 rtx last, result, xop0;
1599 last = get_last_insn ();
1601 /* Fetch it to a register in that size. */
1602 xop0 = adjust_address (op0, bestmode, xoffset);
1603 xop0 = force_reg (bestmode, xop0);
1604 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1606 mode, tmode, false);
1610 delete_insns_since (last);
1618 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1619 bitpos, target, unsignedp);
1620 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1623 /* Generate code to extract a byte-field from STR_RTX
1624 containing BITSIZE bits, starting at BITNUM,
1625 and put it in TARGET if possible (if TARGET is nonzero).
1626 Regardless of TARGET, we return the rtx for where the value is placed.
1628 STR_RTX is the structure containing the byte (a REG or MEM).
1629 UNSIGNEDP is nonzero if this is an unsigned bit field.
1630 MODE is the natural mode of the field value once extracted.
1631 TMODE is the mode the caller would like the value to have;
1632 but the value may be returned with type MODE instead.
1634 If a TARGET is specified and we can store in it at no extra cost,
1635 we do so, and return TARGET.
1636 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1637 if they are equally easy. */
1640 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1641 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1642 enum machine_mode mode, enum machine_mode tmode)
1644 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1645 target, mode, tmode, true);
1648 /* Extract a bit field using shifts and boolean operations
1649 Returns an rtx to represent the value.
1650 OP0 addresses a register (word) or memory (byte).
1651 BITPOS says which bit within the word or byte the bit field starts in.
1652 OFFSET says how many bytes farther the bit field starts;
1653 it is 0 if OP0 is a register.
1654 BITSIZE says how many bits long the bit field is.
1655 (If OP0 is a register, it may be narrower than a full word,
1656 but BITPOS still counts within a full word,
1657 which is significant on bigendian machines.)
1659 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1660 If TARGET is nonzero, attempts to store the value there
1661 and return TARGET, but this is not guaranteed.
1662 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1665 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1666 unsigned HOST_WIDE_INT offset,
1667 unsigned HOST_WIDE_INT bitsize,
1668 unsigned HOST_WIDE_INT bitpos, rtx target,
1671 unsigned int total_bits = BITS_PER_WORD;
1672 enum machine_mode mode;
1674 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1676 /* Special treatment for a bit field split across two registers. */
1677 if (bitsize + bitpos > BITS_PER_WORD)
1678 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1682 /* Get the proper mode to use for this field. We want a mode that
1683 includes the entire field. If such a mode would be larger than
1684 a word, we won't be doing the extraction the normal way. */
1686 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1687 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1689 if (mode == VOIDmode)
1690 /* The only way this should occur is if the field spans word
1692 return extract_split_bit_field (op0, bitsize,
1693 bitpos + offset * BITS_PER_UNIT,
1696 total_bits = GET_MODE_BITSIZE (mode);
1698 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1699 be in the range 0 to total_bits-1, and put any excess bytes in
1701 if (bitpos >= total_bits)
1703 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1704 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1708 /* Get ref to an aligned byte, halfword, or word containing the field.
1709 Adjust BITPOS to be position within a word,
1710 and OFFSET to be the offset of that word.
1711 Then alter OP0 to refer to that word. */
1712 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1713 offset -= (offset % (total_bits / BITS_PER_UNIT));
1714 op0 = adjust_address (op0, mode, offset);
1717 mode = GET_MODE (op0);
1719 if (BYTES_BIG_ENDIAN)
1720 /* BITPOS is the distance between our msb and that of OP0.
1721 Convert it to the distance from the lsb. */
1722 bitpos = total_bits - bitsize - bitpos;
1724 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1725 We have reduced the big-endian case to the little-endian case. */
1731 /* If the field does not already start at the lsb,
1732 shift it so it does. */
1733 tree amount = build_int_cst (NULL_TREE, bitpos);
1734 /* Maybe propagate the target for the shift. */
1735 /* But not if we will return it--could confuse integrate.c. */
1736 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1737 if (tmode != mode) subtarget = 0;
1738 op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1740 /* Convert the value to the desired mode. */
1742 op0 = convert_to_mode (tmode, op0, 1);
1744 /* Unless the msb of the field used to be the msb when we shifted,
1745 mask out the upper bits. */
1747 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1748 return expand_binop (GET_MODE (op0), and_optab, op0,
1749 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1750 target, 1, OPTAB_LIB_WIDEN);
1754 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1755 then arithmetic-shift its lsb to the lsb of the word. */
1756 op0 = force_reg (mode, op0);
1760 /* Find the narrowest integer mode that contains the field. */
1762 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1763 mode = GET_MODE_WIDER_MODE (mode))
1764 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1766 op0 = convert_to_mode (mode, op0, 0);
1770 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1773 = build_int_cst (NULL_TREE,
1774 GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1775 /* Maybe propagate the target for the shift. */
1776 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1777 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1780 return expand_shift (RSHIFT_EXPR, mode, op0,
1781 build_int_cst (NULL_TREE,
1782 GET_MODE_BITSIZE (mode) - bitsize),
1786 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1787 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1788 complement of that if COMPLEMENT. The mask is truncated if
1789 necessary to the width of mode MODE. The mask is zero-extended if
1790 BITSIZE+BITPOS is too small for MODE. */
1793 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1795 HOST_WIDE_INT masklow, maskhigh;
1799 else if (bitpos < HOST_BITS_PER_WIDE_INT)
1800 masklow = (HOST_WIDE_INT) -1 << bitpos;
1804 if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1805 masklow &= ((unsigned HOST_WIDE_INT) -1
1806 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1808 if (bitpos <= HOST_BITS_PER_WIDE_INT)
1811 maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1815 else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1816 maskhigh &= ((unsigned HOST_WIDE_INT) -1
1817 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1823 maskhigh = ~maskhigh;
1827 return immed_double_const (masklow, maskhigh, mode);
1830 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1831 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1834 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1836 unsigned HOST_WIDE_INT v = INTVAL (value);
1837 HOST_WIDE_INT low, high;
1839 if (bitsize < HOST_BITS_PER_WIDE_INT)
1840 v &= ~((HOST_WIDE_INT) -1 << bitsize);
1842 if (bitpos < HOST_BITS_PER_WIDE_INT)
1845 high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1850 high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1853 return immed_double_const (low, high, mode);
1856 /* Extract a bit field that is split across two words
1857 and return an RTX for the result.
1859 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1860 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1861 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1864 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1865 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1868 unsigned int bitsdone = 0;
1869 rtx result = NULL_RTX;
1872 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1874 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1875 unit = BITS_PER_WORD;
1877 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1879 while (bitsdone < bitsize)
1881 unsigned HOST_WIDE_INT thissize;
1883 unsigned HOST_WIDE_INT thispos;
1884 unsigned HOST_WIDE_INT offset;
1886 offset = (bitpos + bitsdone) / unit;
1887 thispos = (bitpos + bitsdone) % unit;
1889 /* THISSIZE must not overrun a word boundary. Otherwise,
1890 extract_fixed_bit_field will call us again, and we will mutually
1892 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1893 thissize = MIN (thissize, unit - thispos);
1895 /* If OP0 is a register, then handle OFFSET here.
1897 When handling multiword bitfields, extract_bit_field may pass
1898 down a word_mode SUBREG of a larger REG for a bitfield that actually
1899 crosses a word boundary. Thus, for a SUBREG, we must find
1900 the current word starting from the base register. */
1901 if (GET_CODE (op0) == SUBREG)
1903 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1904 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1905 GET_MODE (SUBREG_REG (op0)));
1908 else if (REG_P (op0))
1910 word = operand_subword_force (op0, offset, GET_MODE (op0));
1916 /* Extract the parts in bit-counting order,
1917 whose meaning is determined by BYTES_PER_UNIT.
1918 OFFSET is in UNITs, and UNIT is in bits.
1919 extract_fixed_bit_field wants offset in bytes. */
1920 part = extract_fixed_bit_field (word_mode, word,
1921 offset * unit / BITS_PER_UNIT,
1922 thissize, thispos, 0, 1);
1923 bitsdone += thissize;
1925 /* Shift this part into place for the result. */
1926 if (BYTES_BIG_ENDIAN)
1928 if (bitsize != bitsdone)
1929 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1930 build_int_cst (NULL_TREE, bitsize - bitsdone),
1935 if (bitsdone != thissize)
1936 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1937 build_int_cst (NULL_TREE,
1938 bitsdone - thissize), 0, 1);
1944 /* Combine the parts with bitwise or. This works
1945 because we extracted each part as an unsigned bit field. */
1946 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1952 /* Unsigned bit field: we are done. */
1955 /* Signed bit field: sign-extend with two arithmetic shifts. */
1956 result = expand_shift (LSHIFT_EXPR, word_mode, result,
1957 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1959 return expand_shift (RSHIFT_EXPR, word_mode, result,
1960 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1964 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1965 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
1966 MODE, fill the upper bits with zeros. Fail if the layout of either
1967 mode is unknown (as for CC modes) or if the extraction would involve
1968 unprofitable mode punning. Return the value on success, otherwise
1971 This is different from gen_lowpart* in these respects:
1973 - the returned value must always be considered an rvalue
1975 - when MODE is wider than SRC_MODE, the extraction involves
1978 - when MODE is smaller than SRC_MODE, the extraction involves
1979 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1981 In other words, this routine performs a computation, whereas the
1982 gen_lowpart* routines are conceptually lvalue or rvalue subreg
1986 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1988 enum machine_mode int_mode, src_int_mode;
1990 if (mode == src_mode)
1993 if (CONSTANT_P (src))
1994 return simplify_gen_subreg (mode, src, src_mode,
1995 subreg_lowpart_offset (mode, src_mode));
1997 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2000 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2001 && MODES_TIEABLE_P (mode, src_mode))
2003 rtx x = gen_lowpart_common (mode, src);
2008 src_int_mode = int_mode_for_mode (src_mode);
2009 int_mode = int_mode_for_mode (mode);
2010 if (src_int_mode == BLKmode || int_mode == BLKmode)
2013 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2015 if (!MODES_TIEABLE_P (int_mode, mode))
2018 src = gen_lowpart (src_int_mode, src);
2019 src = convert_modes (int_mode, src_int_mode, src, true);
2020 src = gen_lowpart (mode, src);
2024 /* Add INC into TARGET. */
2027 expand_inc (rtx target, rtx inc)
2029 rtx value = expand_binop (GET_MODE (target), add_optab,
2031 target, 0, OPTAB_LIB_WIDEN);
2032 if (value != target)
2033 emit_move_insn (target, value);
2036 /* Subtract DEC from TARGET. */
2039 expand_dec (rtx target, rtx dec)
2041 rtx value = expand_binop (GET_MODE (target), sub_optab,
2043 target, 0, OPTAB_LIB_WIDEN);
2044 if (value != target)
2045 emit_move_insn (target, value);
2048 /* Output a shift instruction for expression code CODE,
2049 with SHIFTED being the rtx for the value to shift,
2050 and AMOUNT the tree for the amount to shift by.
2051 Store the result in the rtx TARGET, if that is convenient.
2052 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2053 Return the rtx for where the value is. */
2056 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2057 tree amount, rtx target, int unsignedp)
2060 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2061 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2062 optab lshift_optab = ashl_optab;
2063 optab rshift_arith_optab = ashr_optab;
2064 optab rshift_uns_optab = lshr_optab;
2065 optab lrotate_optab = rotl_optab;
2066 optab rrotate_optab = rotr_optab;
2067 enum machine_mode op1_mode;
2069 bool speed = optimize_insn_for_speed_p ();
2071 op1 = expand_normal (amount);
2072 op1_mode = GET_MODE (op1);
2074 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2075 shift amount is a vector, use the vector/vector shift patterns. */
2076 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2078 lshift_optab = vashl_optab;
2079 rshift_arith_optab = vashr_optab;
2080 rshift_uns_optab = vlshr_optab;
2081 lrotate_optab = vrotl_optab;
2082 rrotate_optab = vrotr_optab;
2085 /* Previously detected shift-counts computed by NEGATE_EXPR
2086 and shifted in the other direction; but that does not work
2089 if (SHIFT_COUNT_TRUNCATED)
2091 if (GET_CODE (op1) == CONST_INT
2092 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2093 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2094 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2095 % GET_MODE_BITSIZE (mode));
2096 else if (GET_CODE (op1) == SUBREG
2097 && subreg_lowpart_p (op1))
2098 op1 = SUBREG_REG (op1);
2101 if (op1 == const0_rtx)
2104 /* Check whether its cheaper to implement a left shift by a constant
2105 bit count by a sequence of additions. */
2106 if (code == LSHIFT_EXPR
2107 && GET_CODE (op1) == CONST_INT
2109 && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2110 && INTVAL (op1) < MAX_BITS_PER_WORD
2111 && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2112 && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2115 for (i = 0; i < INTVAL (op1); i++)
2117 temp = force_reg (mode, shifted);
2118 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2119 unsignedp, OPTAB_LIB_WIDEN);
2124 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2126 enum optab_methods methods;
2129 methods = OPTAB_DIRECT;
2130 else if (attempt == 1)
2131 methods = OPTAB_WIDEN;
2133 methods = OPTAB_LIB_WIDEN;
2137 /* Widening does not work for rotation. */
2138 if (methods == OPTAB_WIDEN)
2140 else if (methods == OPTAB_LIB_WIDEN)
2142 /* If we have been unable to open-code this by a rotation,
2143 do it as the IOR of two shifts. I.e., to rotate A
2144 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2145 where C is the bitsize of A.
2147 It is theoretically possible that the target machine might
2148 not be able to perform either shift and hence we would
2149 be making two libcalls rather than just the one for the
2150 shift (similarly if IOR could not be done). We will allow
2151 this extremely unlikely lossage to avoid complicating the
2154 rtx subtarget = target == shifted ? 0 : target;
2155 tree new_amount, other_amount;
2157 tree type = TREE_TYPE (amount);
2158 if (GET_MODE (op1) != TYPE_MODE (type)
2159 && GET_MODE (op1) != VOIDmode)
2160 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2161 new_amount = make_tree (type, op1);
2163 = fold_build2 (MINUS_EXPR, type,
2164 build_int_cst (type, GET_MODE_BITSIZE (mode)),
2167 shifted = force_reg (mode, shifted);
2169 temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2170 mode, shifted, new_amount, 0, 1);
2171 temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2172 mode, shifted, other_amount, subtarget, 1);
2173 return expand_binop (mode, ior_optab, temp, temp1, target,
2174 unsignedp, methods);
2177 temp = expand_binop (mode,
2178 left ? lrotate_optab : rrotate_optab,
2179 shifted, op1, target, unsignedp, methods);
2182 temp = expand_binop (mode,
2183 left ? lshift_optab : rshift_uns_optab,
2184 shifted, op1, target, unsignedp, methods);
2186 /* Do arithmetic shifts.
2187 Also, if we are going to widen the operand, we can just as well
2188 use an arithmetic right-shift instead of a logical one. */
2189 if (temp == 0 && ! rotate
2190 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2192 enum optab_methods methods1 = methods;
2194 /* If trying to widen a log shift to an arithmetic shift,
2195 don't accept an arithmetic shift of the same size. */
2197 methods1 = OPTAB_MUST_WIDEN;
2199 /* Arithmetic shift */
2201 temp = expand_binop (mode,
2202 left ? lshift_optab : rshift_arith_optab,
2203 shifted, op1, target, unsignedp, methods1);
2206 /* We used to try extzv here for logical right shifts, but that was
2207 only useful for one machine, the VAX, and caused poor code
2208 generation there for lshrdi3, so the code was deleted and a
2209 define_expand for lshrsi3 was added to vax.md. */
2229 /* This structure holds the "cost" of a multiply sequence. The
2230 "cost" field holds the total rtx_cost of every operator in the
2231 synthetic multiplication sequence, hence cost(a op b) is defined
2232 as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2233 The "latency" field holds the minimum possible latency of the
2234 synthetic multiply, on a hypothetical infinitely parallel CPU.
2235 This is the critical path, or the maximum height, of the expression
2236 tree which is the sum of rtx_costs on the most expensive path from
2237 any leaf to the root. Hence latency(a op b) is defined as zero for
2238 leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise. */
2241 short cost; /* Total rtx_cost of the multiplication sequence. */
2242 short latency; /* The latency of the multiplication sequence. */
2245 /* This macro is used to compare a pointer to a mult_cost against an
2246 single integer "rtx_cost" value. This is equivalent to the macro
2247 CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}. */
2248 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y) \
2249 || ((X)->cost == (Y) && (X)->latency < (Y)))
2251 /* This macro is used to compare two pointers to mult_costs against
2252 each other. The macro returns true if X is cheaper than Y.
2253 Currently, the cheaper of two mult_costs is the one with the
2254 lower "cost". If "cost"s are tied, the lower latency is cheaper. */
2255 #define CHEAPER_MULT_COST(X,Y) ((X)->cost < (Y)->cost \
2256 || ((X)->cost == (Y)->cost \
2257 && (X)->latency < (Y)->latency))
2259 /* This structure records a sequence of operations.
2260 `ops' is the number of operations recorded.
2261 `cost' is their total cost.
2262 The operations are stored in `op' and the corresponding
2263 logarithms of the integer coefficients in `log'.
2265 These are the operations:
2266 alg_zero total := 0;
2267 alg_m total := multiplicand;
2268 alg_shift total := total * coeff
2269 alg_add_t_m2 total := total + multiplicand * coeff;
2270 alg_sub_t_m2 total := total - multiplicand * coeff;
2271 alg_add_factor total := total * coeff + total;
2272 alg_sub_factor total := total * coeff - total;
2273 alg_add_t2_m total := total * coeff + multiplicand;
2274 alg_sub_t2_m total := total * coeff - multiplicand;
2276 The first operand must be either alg_zero or alg_m. */
2280 struct mult_cost cost;
2282 /* The size of the OP and LOG fields are not directly related to the
2283 word size, but the worst-case algorithms will be if we have few
2284 consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2285 In that case we will generate shift-by-2, add, shift-by-2, add,...,
2286 in total wordsize operations. */
2287 enum alg_code op[MAX_BITS_PER_WORD];
2288 char log[MAX_BITS_PER_WORD];
2291 /* The entry for our multiplication cache/hash table. */
2292 struct alg_hash_entry {
2293 /* The number we are multiplying by. */
2294 unsigned HOST_WIDE_INT t;
2296 /* The mode in which we are multiplying something by T. */
2297 enum machine_mode mode;
2299 /* The best multiplication algorithm for t. */
2302 /* The cost of multiplication if ALG_CODE is not alg_impossible.
2303 Otherwise, the cost within which multiplication by T is
2305 struct mult_cost cost;
2307 /* OPtimized for speed? */
2311 /* The number of cache/hash entries. */
2312 #if HOST_BITS_PER_WIDE_INT == 64
2313 #define NUM_ALG_HASH_ENTRIES 1031
2315 #define NUM_ALG_HASH_ENTRIES 307
2318 /* Each entry of ALG_HASH caches alg_code for some integer. This is
2319 actually a hash table. If we have a collision, that the older
2320 entry is kicked out. */
2321 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2323 /* Indicates the type of fixup needed after a constant multiplication.
2324 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2325 the result should be negated, and ADD_VARIANT means that the
2326 multiplicand should be added to the result. */
2327 enum mult_variant {basic_variant, negate_variant, add_variant};
2329 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2330 const struct mult_cost *, enum machine_mode mode);
2331 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2332 struct algorithm *, enum mult_variant *, int);
2333 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2334 const struct algorithm *, enum mult_variant);
2335 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2336 int, rtx *, int *, int *);
2337 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2338 static rtx extract_high_half (enum machine_mode, rtx);
2339 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2340 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2342 /* Compute and return the best algorithm for multiplying by T.
2343 The algorithm must cost less than cost_limit
2344 If retval.cost >= COST_LIMIT, no algorithm was found and all
2345 other field of the returned struct are undefined.
2346 MODE is the machine mode of the multiplication. */
2349 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2350 const struct mult_cost *cost_limit, enum machine_mode mode)
2353 struct algorithm *alg_in, *best_alg;
2354 struct mult_cost best_cost;
2355 struct mult_cost new_limit;
2356 int op_cost, op_latency;
2357 unsigned HOST_WIDE_INT q;
2358 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2360 bool cache_hit = false;
2361 enum alg_code cache_alg = alg_zero;
2362 bool speed = optimize_insn_for_speed_p ();
2364 /* Indicate that no algorithm is yet found. If no algorithm
2365 is found, this value will be returned and indicate failure. */
2366 alg_out->cost.cost = cost_limit->cost + 1;
2367 alg_out->cost.latency = cost_limit->latency + 1;
2369 if (cost_limit->cost < 0
2370 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2373 /* Restrict the bits of "t" to the multiplication's mode. */
2374 t &= GET_MODE_MASK (mode);
2376 /* t == 1 can be done in zero cost. */
2380 alg_out->cost.cost = 0;
2381 alg_out->cost.latency = 0;
2382 alg_out->op[0] = alg_m;
2386 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2390 if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2395 alg_out->cost.cost = zero_cost[speed];
2396 alg_out->cost.latency = zero_cost[speed];
2397 alg_out->op[0] = alg_zero;
2402 /* We'll be needing a couple extra algorithm structures now. */
2404 alg_in = XALLOCA (struct algorithm);
2405 best_alg = XALLOCA (struct algorithm);
2406 best_cost = *cost_limit;
2408 /* Compute the hash index. */
2409 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2411 /* See if we already know what to do for T. */
2412 if (alg_hash[hash_index].t == t
2413 && alg_hash[hash_index].mode == mode
2414 && alg_hash[hash_index].mode == mode
2415 && alg_hash[hash_index].speed == speed
2416 && alg_hash[hash_index].alg != alg_unknown)
2418 cache_alg = alg_hash[hash_index].alg;
2420 if (cache_alg == alg_impossible)
2422 /* The cache tells us that it's impossible to synthesize
2423 multiplication by T within alg_hash[hash_index].cost. */
2424 if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2425 /* COST_LIMIT is at least as restrictive as the one
2426 recorded in the hash table, in which case we have no
2427 hope of synthesizing a multiplication. Just
2431 /* If we get here, COST_LIMIT is less restrictive than the
2432 one recorded in the hash table, so we may be able to
2433 synthesize a multiplication. Proceed as if we didn't
2434 have the cache entry. */
2438 if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2439 /* The cached algorithm shows that this multiplication
2440 requires more cost than COST_LIMIT. Just return. This
2441 way, we don't clobber this cache entry with
2442 alg_impossible but retain useful information. */
2454 goto do_alg_addsub_t_m2;
2456 case alg_add_factor:
2457 case alg_sub_factor:
2458 goto do_alg_addsub_factor;
2461 goto do_alg_add_t2_m;
2464 goto do_alg_sub_t2_m;
2472 /* If we have a group of zero bits at the low-order part of T, try
2473 multiplying by the remaining bits and then doing a shift. */
2478 m = floor_log2 (t & -t); /* m = number of low zero bits */
2482 /* The function expand_shift will choose between a shift and
2483 a sequence of additions, so the observed cost is given as
2484 MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */
2485 op_cost = m * add_cost[speed][mode];
2486 if (shift_cost[speed][mode][m] < op_cost)
2487 op_cost = shift_cost[speed][mode][m];
2488 new_limit.cost = best_cost.cost - op_cost;
2489 new_limit.latency = best_cost.latency - op_cost;
2490 synth_mult (alg_in, q, &new_limit, mode);
2492 alg_in->cost.cost += op_cost;
2493 alg_in->cost.latency += op_cost;
2494 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2496 struct algorithm *x;
2497 best_cost = alg_in->cost;
2498 x = alg_in, alg_in = best_alg, best_alg = x;
2499 best_alg->log[best_alg->ops] = m;
2500 best_alg->op[best_alg->ops] = alg_shift;
2507 /* If we have an odd number, add or subtract one. */
2510 unsigned HOST_WIDE_INT w;
2513 for (w = 1; (w & t) != 0; w <<= 1)
2515 /* If T was -1, then W will be zero after the loop. This is another
2516 case where T ends with ...111. Handling this with (T + 1) and
2517 subtract 1 produces slightly better code and results in algorithm
2518 selection much faster than treating it like the ...0111 case
2522 /* Reject the case where t is 3.
2523 Thus we prefer addition in that case. */
2526 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2528 op_cost = add_cost[speed][mode];
2529 new_limit.cost = best_cost.cost - op_cost;
2530 new_limit.latency = best_cost.latency - op_cost;
2531 synth_mult (alg_in, t + 1, &new_limit, mode);
2533 alg_in->cost.cost += op_cost;
2534 alg_in->cost.latency += op_cost;
2535 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2537 struct algorithm *x;
2538 best_cost = alg_in->cost;
2539 x = alg_in, alg_in = best_alg, best_alg = x;
2540 best_alg->log[best_alg->ops] = 0;
2541 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2546 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2548 op_cost = add_cost[speed][mode];
2549 new_limit.cost = best_cost.cost - op_cost;
2550 new_limit.latency = best_cost.latency - op_cost;
2551 synth_mult (alg_in, t - 1, &new_limit, mode);
2553 alg_in->cost.cost += op_cost;
2554 alg_in->cost.latency += op_cost;
2555 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2557 struct algorithm *x;
2558 best_cost = alg_in->cost;
2559 x = alg_in, alg_in = best_alg, best_alg = x;
2560 best_alg->log[best_alg->ops] = 0;
2561 best_alg->op[best_alg->ops] = alg_add_t_m2;
2568 /* Look for factors of t of the form
2569 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2570 If we find such a factor, we can multiply by t using an algorithm that
2571 multiplies by q, shift the result by m and add/subtract it to itself.
2573 We search for large factors first and loop down, even if large factors
2574 are less probable than small; if we find a large factor we will find a
2575 good sequence quickly, and therefore be able to prune (by decreasing
2576 COST_LIMIT) the search. */
2578 do_alg_addsub_factor:
2579 for (m = floor_log2 (t - 1); m >= 2; m--)
2581 unsigned HOST_WIDE_INT d;
2583 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2584 if (t % d == 0 && t > d && m < maxm
2585 && (!cache_hit || cache_alg == alg_add_factor))
2587 /* If the target has a cheap shift-and-add instruction use
2588 that in preference to a shift insn followed by an add insn.
2589 Assume that the shift-and-add is "atomic" with a latency
2590 equal to its cost, otherwise assume that on superscalar
2591 hardware the shift may be executed concurrently with the
2592 earlier steps in the algorithm. */
2593 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2594 if (shiftadd_cost[speed][mode][m] < op_cost)
2596 op_cost = shiftadd_cost[speed][mode][m];
2597 op_latency = op_cost;
2600 op_latency = add_cost[speed][mode];
2602 new_limit.cost = best_cost.cost - op_cost;
2603 new_limit.latency = best_cost.latency - op_latency;
2604 synth_mult (alg_in, t / d, &new_limit, mode);
2606 alg_in->cost.cost += op_cost;
2607 alg_in->cost.latency += op_latency;
2608 if (alg_in->cost.latency < op_cost)
2609 alg_in->cost.latency = op_cost;
2610 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2612 struct algorithm *x;
2613 best_cost = alg_in->cost;
2614 x = alg_in, alg_in = best_alg, best_alg = x;
2615 best_alg->log[best_alg->ops] = m;
2616 best_alg->op[best_alg->ops] = alg_add_factor;
2618 /* Other factors will have been taken care of in the recursion. */
2622 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2623 if (t % d == 0 && t > d && m < maxm
2624 && (!cache_hit || cache_alg == alg_sub_factor))
2626 /* If the target has a cheap shift-and-subtract insn use
2627 that in preference to a shift insn followed by a sub insn.
2628 Assume that the shift-and-sub is "atomic" with a latency
2629 equal to it's cost, otherwise assume that on superscalar
2630 hardware the shift may be executed concurrently with the
2631 earlier steps in the algorithm. */
2632 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2633 if (shiftsub_cost[speed][mode][m] < op_cost)
2635 op_cost = shiftsub_cost[speed][mode][m];
2636 op_latency = op_cost;
2639 op_latency = add_cost[speed][mode];
2641 new_limit.cost = best_cost.cost - op_cost;
2642 new_limit.latency = best_cost.latency - op_latency;
2643 synth_mult (alg_in, t / d, &new_limit, mode);
2645 alg_in->cost.cost += op_cost;
2646 alg_in->cost.latency += op_latency;
2647 if (alg_in->cost.latency < op_cost)
2648 alg_in->cost.latency = op_cost;
2649 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2651 struct algorithm *x;
2652 best_cost = alg_in->cost;
2653 x = alg_in, alg_in = best_alg, best_alg = x;
2654 best_alg->log[best_alg->ops] = m;
2655 best_alg->op[best_alg->ops] = alg_sub_factor;
2663 /* Try shift-and-add (load effective address) instructions,
2664 i.e. do a*3, a*5, a*9. */
2671 if (m >= 0 && m < maxm)
2673 op_cost = shiftadd_cost[speed][mode][m];
2674 new_limit.cost = best_cost.cost - op_cost;
2675 new_limit.latency = best_cost.latency - op_cost;
2676 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2678 alg_in->cost.cost += op_cost;
2679 alg_in->cost.latency += op_cost;
2680 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2682 struct algorithm *x;
2683 best_cost = alg_in->cost;
2684 x = alg_in, alg_in = best_alg, best_alg = x;
2685 best_alg->log[best_alg->ops] = m;
2686 best_alg->op[best_alg->ops] = alg_add_t2_m;
2696 if (m >= 0 && m < maxm)
2698 op_cost = shiftsub_cost[speed][mode][m];
2699 new_limit.cost = best_cost.cost - op_cost;
2700 new_limit.latency = best_cost.latency - op_cost;
2701 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2703 alg_in->cost.cost += op_cost;
2704 alg_in->cost.latency += op_cost;
2705 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2707 struct algorithm *x;
2708 best_cost = alg_in->cost;
2709 x = alg_in, alg_in = best_alg, best_alg = x;
2710 best_alg->log[best_alg->ops] = m;
2711 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2719 /* If best_cost has not decreased, we have not found any algorithm. */
2720 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2722 /* We failed to find an algorithm. Record alg_impossible for
2723 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2724 we are asked to find an algorithm for T within the same or
2725 lower COST_LIMIT, we can immediately return to the
2727 alg_hash[hash_index].t = t;
2728 alg_hash[hash_index].mode = mode;
2729 alg_hash[hash_index].speed = speed;
2730 alg_hash[hash_index].alg = alg_impossible;
2731 alg_hash[hash_index].cost = *cost_limit;
2735 /* Cache the result. */
2738 alg_hash[hash_index].t = t;
2739 alg_hash[hash_index].mode = mode;
2740 alg_hash[hash_index].speed = speed;
2741 alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2742 alg_hash[hash_index].cost.cost = best_cost.cost;
2743 alg_hash[hash_index].cost.latency = best_cost.latency;
2746 /* If we are getting a too long sequence for `struct algorithm'
2747 to record, make this search fail. */
2748 if (best_alg->ops == MAX_BITS_PER_WORD)
2751 /* Copy the algorithm from temporary space to the space at alg_out.
2752 We avoid using structure assignment because the majority of
2753 best_alg is normally undefined, and this is a critical function. */
2754 alg_out->ops = best_alg->ops + 1;
2755 alg_out->cost = best_cost;
2756 memcpy (alg_out->op, best_alg->op,
2757 alg_out->ops * sizeof *alg_out->op);
2758 memcpy (alg_out->log, best_alg->log,
2759 alg_out->ops * sizeof *alg_out->log);
2762 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2763 Try three variations:
2765 - a shift/add sequence based on VAL itself
2766 - a shift/add sequence based on -VAL, followed by a negation
2767 - a shift/add sequence based on VAL - 1, followed by an addition.
2769 Return true if the cheapest of these cost less than MULT_COST,
2770 describing the algorithm in *ALG and final fixup in *VARIANT. */
2773 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2774 struct algorithm *alg, enum mult_variant *variant,
2777 struct algorithm alg2;
2778 struct mult_cost limit;
2780 bool speed = optimize_insn_for_speed_p ();
2782 /* Fail quickly for impossible bounds. */
2786 /* Ensure that mult_cost provides a reasonable upper bound.
2787 Any constant multiplication can be performed with less
2788 than 2 * bits additions. */
2789 op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2790 if (mult_cost > op_cost)
2791 mult_cost = op_cost;
2793 *variant = basic_variant;
2794 limit.cost = mult_cost;
2795 limit.latency = mult_cost;
2796 synth_mult (alg, val, &limit, mode);
2798 /* This works only if the inverted value actually fits in an
2800 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2802 op_cost = neg_cost[speed][mode];
2803 if (MULT_COST_LESS (&alg->cost, mult_cost))
2805 limit.cost = alg->cost.cost - op_cost;
2806 limit.latency = alg->cost.latency - op_cost;
2810 limit.cost = mult_cost - op_cost;
2811 limit.latency = mult_cost - op_cost;
2814 synth_mult (&alg2, -val, &limit, mode);
2815 alg2.cost.cost += op_cost;
2816 alg2.cost.latency += op_cost;
2817 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2818 *alg = alg2, *variant = negate_variant;
2821 /* This proves very useful for division-by-constant. */
2822 op_cost = add_cost[speed][mode];
2823 if (MULT_COST_LESS (&alg->cost, mult_cost))
2825 limit.cost = alg->cost.cost - op_cost;
2826 limit.latency = alg->cost.latency - op_cost;
2830 limit.cost = mult_cost - op_cost;
2831 limit.latency = mult_cost - op_cost;
2834 synth_mult (&alg2, val - 1, &limit, mode);
2835 alg2.cost.cost += op_cost;
2836 alg2.cost.latency += op_cost;
2837 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2838 *alg = alg2, *variant = add_variant;
2840 return MULT_COST_LESS (&alg->cost, mult_cost);
2843 /* A subroutine of expand_mult, used for constant multiplications.
2844 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2845 convenient. Use the shift/add sequence described by ALG and apply
2846 the final fixup specified by VARIANT. */
2849 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2850 rtx target, const struct algorithm *alg,
2851 enum mult_variant variant)
2853 HOST_WIDE_INT val_so_far;
2854 rtx insn, accum, tem;
2856 enum machine_mode nmode;
2858 /* Avoid referencing memory over and over and invalid sharing
2860 op0 = force_reg (mode, op0);
2862 /* ACCUM starts out either as OP0 or as a zero, depending on
2863 the first operation. */
2865 if (alg->op[0] == alg_zero)
2867 accum = copy_to_mode_reg (mode, const0_rtx);
2870 else if (alg->op[0] == alg_m)
2872 accum = copy_to_mode_reg (mode, op0);
2878 for (opno = 1; opno < alg->ops; opno++)
2880 int log = alg->log[opno];
2881 rtx shift_subtarget = optimize ? 0 : accum;
2883 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2886 rtx accum_target = optimize ? 0 : accum;
2888 switch (alg->op[opno])
2891 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2892 build_int_cst (NULL_TREE, log),
2898 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2899 build_int_cst (NULL_TREE, log),
2901 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2902 add_target ? add_target : accum_target);
2903 val_so_far += (HOST_WIDE_INT) 1 << log;
2907 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2908 build_int_cst (NULL_TREE, log),
2910 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2911 add_target ? add_target : accum_target);
2912 val_so_far -= (HOST_WIDE_INT) 1 << log;
2916 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2917 build_int_cst (NULL_TREE, log),
2920 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2921 add_target ? add_target : accum_target);
2922 val_so_far = (val_so_far << log) + 1;
2926 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2927 build_int_cst (NULL_TREE, log),
2928 shift_subtarget, 0);
2929 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2930 add_target ? add_target : accum_target);
2931 val_so_far = (val_so_far << log) - 1;
2934 case alg_add_factor:
2935 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2936 build_int_cst (NULL_TREE, log),
2938 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2939 add_target ? add_target : accum_target);
2940 val_so_far += val_so_far << log;
2943 case alg_sub_factor:
2944 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2945 build_int_cst (NULL_TREE, log),
2947 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2949 ? add_target : (optimize ? 0 : tem)));
2950 val_so_far = (val_so_far << log) - val_so_far;
2957 /* Write a REG_EQUAL note on the last insn so that we can cse
2958 multiplication sequences. Note that if ACCUM is a SUBREG,
2959 we've set the inner register and must properly indicate
2962 tem = op0, nmode = mode;
2963 if (GET_CODE (accum) == SUBREG)
2965 nmode = GET_MODE (SUBREG_REG (accum));
2966 tem = gen_lowpart (nmode, op0);
2969 insn = get_last_insn ();
2970 set_unique_reg_note (insn, REG_EQUAL,
2971 gen_rtx_MULT (nmode, tem,
2972 GEN_INT (val_so_far)));
2975 if (variant == negate_variant)
2977 val_so_far = -val_so_far;
2978 accum = expand_unop (mode, neg_optab, accum, target, 0);
2980 else if (variant == add_variant)
2982 val_so_far = val_so_far + 1;
2983 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2986 /* Compare only the bits of val and val_so_far that are significant
2987 in the result mode, to avoid sign-/zero-extension confusion. */
2988 val &= GET_MODE_MASK (mode);
2989 val_so_far &= GET_MODE_MASK (mode);
2990 gcc_assert (val == val_so_far);
2995 /* Perform a multiplication and return an rtx for the result.
2996 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2997 TARGET is a suggestion for where to store the result (an rtx).
2999 We check specially for a constant integer as OP1.
3000 If you want this check for OP0 as well, then before calling
3001 you should swap the two operands if OP0 would be constant. */
3004 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3007 enum mult_variant variant;
3008 struct algorithm algorithm;
3010 bool speed = optimize_insn_for_speed_p ();
3012 /* Handling const0_rtx here allows us to use zero as a rogue value for
3014 if (op1 == const0_rtx)
3016 if (op1 == const1_rtx)
3018 if (op1 == constm1_rtx)
3019 return expand_unop (mode,
3020 GET_MODE_CLASS (mode) == MODE_INT
3021 && !unsignedp && flag_trapv
3022 ? negv_optab : neg_optab,
3025 /* These are the operations that are potentially turned into a sequence
3026 of shifts and additions. */
3027 if (SCALAR_INT_MODE_P (mode)
3028 && (unsignedp || !flag_trapv))
3030 HOST_WIDE_INT coeff = 0;
3031 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3033 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3034 less than or equal in size to `unsigned int' this doesn't matter.
3035 If the mode is larger than `unsigned int', then synth_mult works
3036 only if the constant value exactly fits in an `unsigned int' without
3037 any truncation. This means that multiplying by negative values does
3038 not work; results are off by 2^32 on a 32 bit machine. */
3040 if (GET_CODE (op1) == CONST_INT)
3042 /* Attempt to handle multiplication of DImode values by negative
3043 coefficients, by performing the multiplication by a positive
3044 multiplier and then inverting the result. */
3045 if (INTVAL (op1) < 0
3046 && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3048 /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3049 result is interpreted as an unsigned coefficient.
3050 Exclude cost of op0 from max_cost to match the cost
3051 calculation of the synth_mult. */
3052 max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
3053 - neg_cost[speed][mode];
3055 && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3056 &variant, max_cost))
3058 rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3059 NULL_RTX, &algorithm,
3061 return expand_unop (mode, neg_optab, temp, target, 0);
3064 else coeff = INTVAL (op1);
3066 else if (GET_CODE (op1) == CONST_DOUBLE)
3068 /* If we are multiplying in DImode, it may still be a win
3069 to try to work with shifts and adds. */
3070 if (CONST_DOUBLE_HIGH (op1) == 0)
3071 coeff = CONST_DOUBLE_LOW (op1);
3072 else if (CONST_DOUBLE_LOW (op1) == 0
3073 && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3075 int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3076 + HOST_BITS_PER_WIDE_INT;
3077 return expand_shift (LSHIFT_EXPR, mode, op0,
3078 build_int_cst (NULL_TREE, shift),
3083 /* We used to test optimize here, on the grounds that it's better to
3084 produce a smaller program when -O is not used. But this causes
3085 such a terrible slowdown sometimes that it seems better to always
3089 /* Special case powers of two. */
3090 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3091 return expand_shift (LSHIFT_EXPR, mode, op0,
3092 build_int_cst (NULL_TREE, floor_log2 (coeff)),
3095 /* Exclude cost of op0 from max_cost to match the cost
3096 calculation of the synth_mult. */
3097 max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3098 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3100 return expand_mult_const (mode, op0, coeff, target,
3101 &algorithm, variant);
3105 if (GET_CODE (op0) == CONST_DOUBLE)
3112 /* Expand x*2.0 as x+x. */
3113 if (GET_CODE (op1) == CONST_DOUBLE
3114 && SCALAR_FLOAT_MODE_P (mode))
3117 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3119 if (REAL_VALUES_EQUAL (d, dconst2))
3121 op0 = force_reg (GET_MODE (op0), op0);
3122 return expand_binop (mode, add_optab, op0, op0,
3123 target, unsignedp, OPTAB_LIB_WIDEN);
3127 /* This used to use umul_optab if unsigned, but for non-widening multiply
3128 there is no difference between signed and unsigned. */
3129 op0 = expand_binop (mode,
3131 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3132 ? smulv_optab : smul_optab,
3133 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3138 /* Return the smallest n such that 2**n >= X. */
3141 ceil_log2 (unsigned HOST_WIDE_INT x)
3143 return floor_log2 (x - 1) + 1;
3146 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3147 replace division by D, and put the least significant N bits of the result
3148 in *MULTIPLIER_PTR and return the most significant bit.
3150 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3151 needed precision is in PRECISION (should be <= N).
3153 PRECISION should be as small as possible so this function can choose
3154 multiplier more freely.
3156 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3157 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3159 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3160 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3163 unsigned HOST_WIDE_INT
3164 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3165 rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3167 HOST_WIDE_INT mhigh_hi, mlow_hi;
3168 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3169 int lgup, post_shift;
3171 unsigned HOST_WIDE_INT nl, dummy1;
3172 HOST_WIDE_INT nh, dummy2;
3174 /* lgup = ceil(log2(divisor)); */
3175 lgup = ceil_log2 (d);
3177 gcc_assert (lgup <= n);
3180 pow2 = n + lgup - precision;
3182 /* We could handle this with some effort, but this case is much
3183 better handled directly with a scc insn, so rely on caller using
3185 gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3187 /* mlow = 2^(N + lgup)/d */
3188 if (pow >= HOST_BITS_PER_WIDE_INT)
3190 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3196 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3198 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3199 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3201 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3202 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3203 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3205 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3206 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3207 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3209 gcc_assert (!mhigh_hi || nh - d < d);
3210 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3211 /* Assert that mlow < mhigh. */
3212 gcc_assert (mlow_hi < mhigh_hi
3213 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3215 /* If precision == N, then mlow, mhigh exceed 2^N
3216 (but they do not exceed 2^(N+1)). */
3218 /* Reduce to lowest terms. */
3219 for (post_shift = lgup; post_shift > 0; post_shift--)
3221 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3222 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3232 *post_shift_ptr = post_shift;
3234 if (n < HOST_BITS_PER_WIDE_INT)
3236 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3237 *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3238 return mhigh_lo >= mask;
3242 *multiplier_ptr = GEN_INT (mhigh_lo);
3247 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3248 congruent to 1 (mod 2**N). */
3250 static unsigned HOST_WIDE_INT
3251 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3253 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3255 /* The algorithm notes that the choice y = x satisfies
3256 x*y == 1 mod 2^3, since x is assumed odd.
3257 Each iteration doubles the number of bits of significance in y. */
3259 unsigned HOST_WIDE_INT mask;
3260 unsigned HOST_WIDE_INT y = x;
3263 mask = (n == HOST_BITS_PER_WIDE_INT
3264 ? ~(unsigned HOST_WIDE_INT) 0
3265 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3269 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3275 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3276 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3277 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3278 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3281 The result is put in TARGET if that is convenient.
3283 MODE is the mode of operation. */
3286 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3287 rtx op1, rtx target, int unsignedp)
3290 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3292 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3293 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3295 tem = expand_and (mode, tem, op1, NULL_RTX);
3297 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3300 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3301 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3303 tem = expand_and (mode, tem, op0, NULL_RTX);
3304 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3310 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
3313 extract_high_half (enum machine_mode mode, rtx op)
3315 enum machine_mode wider_mode;
3317 if (mode == word_mode)
3318 return gen_highpart (mode, op);
3320 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3322 wider_mode = GET_MODE_WIDER_MODE (mode);
3323 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3324 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3325 return convert_modes (mode, wider_mode, op, 0);
3328 /* Like expand_mult_highpart, but only consider using a multiplication
3329 optab. OP1 is an rtx for the constant operand. */
3332 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3333 rtx target, int unsignedp, int max_cost)
3335 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3336 enum machine_mode wider_mode;
3340 bool speed = optimize_insn_for_speed_p ();
3342 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3344 wider_mode = GET_MODE_WIDER_MODE (mode);
3345 size = GET_MODE_BITSIZE (mode);
3347 /* Firstly, try using a multiplication insn that only generates the needed
3348 high part of the product, and in the sign flavor of unsignedp. */
3349 if (mul_highpart_cost[speed][mode] < max_cost)
3351 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3352 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3353 unsignedp, OPTAB_DIRECT);
3358 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3359 Need to adjust the result after the multiplication. */
3360 if (size - 1 < BITS_PER_WORD
3361 && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3362 + 4 * add_cost[speed][mode] < max_cost))
3364 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3365 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3366 unsignedp, OPTAB_DIRECT);
3368 /* We used the wrong signedness. Adjust the result. */
3369 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3373 /* Try widening multiplication. */
3374 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3375 if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3376 && mul_widen_cost[speed][wider_mode] < max_cost)
3378 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3379 unsignedp, OPTAB_WIDEN);
3381 return extract_high_half (mode, tem);
3384 /* Try widening the mode and perform a non-widening multiplication. */
3385 if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
3386 && size - 1 < BITS_PER_WORD
3387 && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3389 rtx insns, wop0, wop1;
3391 /* We need to widen the operands, for example to ensure the
3392 constant multiplier is correctly sign or zero extended.
3393 Use a sequence to clean-up any instructions emitted by
3394 the conversions if things don't work out. */
3396 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3397 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3398 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3399 unsignedp, OPTAB_WIDEN);
3400 insns = get_insns ();
3406 return extract_high_half (mode, tem);
3410 /* Try widening multiplication of opposite signedness, and adjust. */
3411 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3412 if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3413 && size - 1 < BITS_PER_WORD
3414 && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3415 + 4 * add_cost[speed][mode] < max_cost))
3417 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3418 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3421 tem = extract_high_half (mode, tem);
3422 /* We used the wrong signedness. Adjust the result. */
3423 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3431 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3432 putting the high half of the result in TARGET if that is convenient,
3433 and return where the result is. If the operation can not be performed,
3436 MODE is the mode of operation and result.
3438 UNSIGNEDP nonzero means unsigned multiply.
3440 MAX_COST is the total allowed cost for the expanded RTL. */
3443 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3444 rtx target, int unsignedp, int max_cost)
3446 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3447 unsigned HOST_WIDE_INT cnst1;
3449 bool sign_adjust = false;
3450 enum mult_variant variant;
3451 struct algorithm alg;
3453 bool speed = optimize_insn_for_speed_p ();
3455 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3456 /* We can't support modes wider than HOST_BITS_PER_INT. */
3457 gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3459 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3461 /* We can't optimize modes wider than BITS_PER_WORD.
3462 ??? We might be able to perform double-word arithmetic if
3463 mode == word_mode, however all the cost calculations in
3464 synth_mult etc. assume single-word operations. */
3465 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3466 return expand_mult_highpart_optab (mode, op0, op1, target,
3467 unsignedp, max_cost);
3469 extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3471 /* Check whether we try to multiply by a negative constant. */
3472 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3475 extra_cost += add_cost[speed][mode];
3478 /* See whether shift/add multiplication is cheap enough. */
3479 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3480 max_cost - extra_cost))
3482 /* See whether the specialized multiplication optabs are
3483 cheaper than the shift/add version. */
3484 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3485 alg.cost.cost + extra_cost);
3489 tem = convert_to_mode (wider_mode, op0, unsignedp);
3490 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3491 tem = extract_high_half (mode, tem);
3493 /* Adjust result for signedness. */
3495 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3499 return expand_mult_highpart_optab (mode, op0, op1, target,
3500 unsignedp, max_cost);
3504 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3507 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3509 unsigned HOST_WIDE_INT masklow, maskhigh;
3510 rtx result, temp, shift, label;
3513 logd = floor_log2 (d);
3514 result = gen_reg_rtx (mode);
3516 /* Avoid conditional branches when they're expensive. */
3517 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3518 && optimize_insn_for_speed_p ())
3520 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3524 signmask = force_reg (mode, signmask);
3525 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3526 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3528 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3529 which instruction sequence to use. If logical right shifts
3530 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3531 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3533 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3534 if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
3535 || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3537 temp = expand_binop (mode, xor_optab, op0, signmask,
3538 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3539 temp = expand_binop (mode, sub_optab, temp, signmask,
3540 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3541 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3542 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3543 temp = expand_binop (mode, xor_optab, temp, signmask,
3544 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3545 temp = expand_binop (mode, sub_optab, temp, signmask,
3546 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3550 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3551 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3552 signmask = force_reg (mode, signmask);
3554 temp = expand_binop (mode, add_optab, op0, signmask,
3555 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3556 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3557 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3558 temp = expand_binop (mode, sub_optab, temp, signmask,
3559 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3565 /* Mask contains the mode's signbit and the significant bits of the
3566 modulus. By including the signbit in the operation, many targets
3567 can avoid an explicit compare operation in the following comparison
3570 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3571 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3573 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3577 maskhigh = (HOST_WIDE_INT) -1
3578 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3580 temp = expand_binop (mode, and_optab, op0,
3581 immed_double_const (masklow, maskhigh, mode),
3582 result, 1, OPTAB_LIB_WIDEN);
3584 emit_move_insn (result, temp);
3586 label = gen_label_rtx ();
3587 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3589 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3590 0, OPTAB_LIB_WIDEN);
3591 masklow = (HOST_WIDE_INT) -1 << logd;
3593 temp = expand_binop (mode, ior_optab, temp,
3594 immed_double_const (masklow, maskhigh, mode),
3595 result, 1, OPTAB_LIB_WIDEN);
3596 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3597 0, OPTAB_LIB_WIDEN);
3599 emit_move_insn (result, temp);
3604 /* Expand signed division of OP0 by a power of two D in mode MODE.
3605 This routine is only called for positive values of D. */
3608 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3614 logd = floor_log2 (d);
3615 shift = build_int_cst (NULL_TREE, logd);
3618 && BRANCH_COST (optimize_insn_for_speed_p (),
3621 temp = gen_reg_rtx (mode);
3622 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3623 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3624 0, OPTAB_LIB_WIDEN);
3625 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3628 #ifdef HAVE_conditional_move
3629 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3634 /* ??? emit_conditional_move forces a stack adjustment via
3635 compare_from_rtx so, if the sequence is discarded, it will
3636 be lost. Do it now instead. */
3637 do_pending_stack_adjust ();
3640 temp2 = copy_to_mode_reg (mode, op0);
3641 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3642 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3643 temp = force_reg (mode, temp);
3645 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3646 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3647 mode, temp, temp2, mode, 0);
3650 rtx seq = get_insns ();
3653 return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3659 if (BRANCH_COST (optimize_insn_for_speed_p (),
3662 int ushift = GET_MODE_BITSIZE (mode) - logd;
3664 temp = gen_reg_rtx (mode);
3665 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3666 if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3667 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3668 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3670 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3671 build_int_cst (NULL_TREE, ushift),
3673 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3674 0, OPTAB_LIB_WIDEN);
3675 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3678 label = gen_label_rtx ();
3679 temp = copy_to_mode_reg (mode, op0);
3680 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3681 expand_inc (temp, GEN_INT (d - 1));
3683 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3686 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3687 if that is convenient, and returning where the result is.
3688 You may request either the quotient or the remainder as the result;
3689 specify REM_FLAG nonzero to get the remainder.
3691 CODE is the expression code for which kind of division this is;
3692 it controls how rounding is done. MODE is the machine mode to use.
3693 UNSIGNEDP nonzero means do unsigned division. */
3695 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3696 and then correct it by or'ing in missing high bits
3697 if result of ANDI is nonzero.
3698 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3699 This could optimize to a bfexts instruction.
3700 But C doesn't use these operations, so their optimizations are
3702 /* ??? For modulo, we don't actually need the highpart of the first product,
3703 the low part will do nicely. And for small divisors, the second multiply
3704 can also be a low-part only multiply or even be completely left out.
3705 E.g. to calculate the remainder of a division by 3 with a 32 bit
3706 multiply, multiply with 0x55555556 and extract the upper two bits;
3707 the result is exact for inputs up to 0x1fffffff.
3708 The input range can be reduced by using cross-sum rules.
3709 For odd divisors >= 3, the following table gives right shift counts
3710 so that if a number is shifted by an integer multiple of the given
3711 amount, the remainder stays the same:
3712 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3713 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3714 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3715 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3716 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3718 Cross-sum rules for even numbers can be derived by leaving as many bits
3719 to the right alone as the divisor has zeros to the right.
3720 E.g. if x is an unsigned 32 bit number:
3721 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3725 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3726 rtx op0, rtx op1, rtx target, int unsignedp)
3728 enum machine_mode compute_mode;
3730 rtx quotient = 0, remainder = 0;
3734 optab optab1, optab2;
3735 int op1_is_constant, op1_is_pow2 = 0;
3736 int max_cost, extra_cost;
3737 static HOST_WIDE_INT last_div_const = 0;
3738 static HOST_WIDE_INT ext_op1;
3739 bool speed = optimize_insn_for_speed_p ();
3741 op1_is_constant = GET_CODE (op1) == CONST_INT;
3742 if (op1_is_constant)
3744 ext_op1 = INTVAL (op1);
3746 ext_op1 &= GET_MODE_MASK (mode);
3747 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3748 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3752 This is the structure of expand_divmod:
3754 First comes code to fix up the operands so we can perform the operations
3755 correctly and efficiently.
3757 Second comes a switch statement with code specific for each rounding mode.
3758 For some special operands this code emits all RTL for the desired
3759 operation, for other cases, it generates only a quotient and stores it in
3760 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3761 to indicate that it has not done anything.
3763 Last comes code that finishes the operation. If QUOTIENT is set and
3764 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3765 QUOTIENT is not set, it is computed using trunc rounding.
3767 We try to generate special code for division and remainder when OP1 is a
3768 constant. If |OP1| = 2**n we can use shifts and some other fast
3769 operations. For other values of OP1, we compute a carefully selected
3770 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3773 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3774 half of the product. Different strategies for generating the product are
3775 implemented in expand_mult_highpart.
3777 If what we actually want is the remainder, we generate that by another
3778 by-constant multiplication and a subtraction. */
3780 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3781 code below will malfunction if we are, so check here and handle
3782 the special case if so. */
3783 if (op1 == const1_rtx)
3784 return rem_flag ? const0_rtx : op0;
3786 /* When dividing by -1, we could get an overflow.
3787 negv_optab can handle overflows. */
3788 if (! unsignedp && op1 == constm1_rtx)
3792 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3793 ? negv_optab : neg_optab, op0, target, 0);
3797 /* Don't use the function value register as a target
3798 since we have to read it as well as write it,
3799 and function-inlining gets confused by this. */
3800 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3801 /* Don't clobber an operand while doing a multi-step calculation. */
3802 || ((rem_flag || op1_is_constant)
3803 && (reg_mentioned_p (target, op0)
3804 || (MEM_P (op0) && MEM_P (target))))
3805 || reg_mentioned_p (target, op1)
3806 || (MEM_P (op1) && MEM_P (target))))
3809 /* Get the mode in which to perform this computation. Normally it will
3810 be MODE, but sometimes we can't do the desired operation in MODE.
3811 If so, pick a wider mode in which we can do the operation. Convert
3812 to that mode at the start to avoid repeated conversions.
3814 First see what operations we need. These depend on the expression
3815 we are evaluating. (We assume that divxx3 insns exist under the
3816 same conditions that modxx3 insns and that these insns don't normally
3817 fail. If these assumptions are not correct, we may generate less
3818 efficient code in some cases.)
3820 Then see if we find a mode in which we can open-code that operation
3821 (either a division, modulus, or shift). Finally, check for the smallest
3822 mode for which we can do the operation with a library call. */
3824 /* We might want to refine this now that we have division-by-constant
3825 optimization. Since expand_mult_highpart tries so many variants, it is
3826 not straightforward to generalize this. Maybe we should make an array
3827 of possible modes in init_expmed? Save this for GCC 2.7. */
3829 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3830 ? (unsignedp ? lshr_optab : ashr_optab)
3831 : (unsignedp ? udiv_optab : sdiv_optab));
3832 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3834 : (unsignedp ? udivmod_optab : sdivmod_optab));
3836 for (compute_mode = mode; compute_mode != VOIDmode;
3837 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3838 if (optab_handler (optab1, compute_mode)->insn_code != CODE_FOR_nothing
3839 || optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing)
3842 if (compute_mode == VOIDmode)
3843 for (compute_mode = mode; compute_mode != VOIDmode;
3844 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3845 if (optab_libfunc (optab1, compute_mode)
3846 || optab_libfunc (optab2, compute_mode))
3849 /* If we still couldn't find a mode, use MODE, but expand_binop will
3851 if (compute_mode == VOIDmode)
3852 compute_mode = mode;
3854 if (target && GET_MODE (target) == compute_mode)
3857 tquotient = gen_reg_rtx (compute_mode);
3859 size = GET_MODE_BITSIZE (compute_mode);
3861 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3862 (mode), and thereby get better code when OP1 is a constant. Do that
3863 later. It will require going over all usages of SIZE below. */
3864 size = GET_MODE_BITSIZE (mode);
3867 /* Only deduct something for a REM if the last divide done was
3868 for a different constant. Then set the constant of the last
3870 max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3871 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3872 && INTVAL (op1) == last_div_const))
3873 max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3875 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3877 /* Now convert to the best mode to use. */
3878 if (compute_mode != mode)
3880 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3881 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3883 /* convert_modes may have placed op1 into a register, so we
3884 must recompute the following. */
3885 op1_is_constant = GET_CODE (op1) == CONST_INT;
3886 op1_is_pow2 = (op1_is_constant
3887 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3889 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3892 /* If one of the operands is a volatile MEM, copy it into a register. */
3894 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3895 op0 = force_reg (compute_mode, op0);
3896 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3897 op1 = force_reg (compute_mode, op1);
3899 /* If we need the remainder or if OP1 is constant, we need to
3900 put OP0 in a register in case it has any queued subexpressions. */
3901 if (rem_flag || op1_is_constant)
3902 op0 = force_reg (compute_mode, op0);
3904 last = get_last_insn ();
3906 /* Promote floor rounding to trunc rounding for unsigned operations. */
3909 if (code == FLOOR_DIV_EXPR)
3910 code = TRUNC_DIV_EXPR;
3911 if (code == FLOOR_MOD_EXPR)
3912 code = TRUNC_MOD_EXPR;
3913 if (code == EXACT_DIV_EXPR && op1_is_pow2)
3914 code = TRUNC_DIV_EXPR;
3917 if (op1 != const0_rtx)
3920 case TRUNC_MOD_EXPR:
3921 case TRUNC_DIV_EXPR:
3922 if (op1_is_constant)
3926 unsigned HOST_WIDE_INT mh;
3927 int pre_shift, post_shift;
3930 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3931 & GET_MODE_MASK (compute_mode));
3933 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3935 pre_shift = floor_log2 (d);
3939 = expand_binop (compute_mode, and_optab, op0,
3940 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3944 return gen_lowpart (mode, remainder);
3946 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3947 build_int_cst (NULL_TREE,
3951 else if (size <= HOST_BITS_PER_WIDE_INT)
3953 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3955 /* Most significant bit of divisor is set; emit an scc
3957 quotient = emit_store_flag (tquotient, GEU, op0, op1,
3958 compute_mode, 1, 1);
3964 /* Find a suitable multiplier and right shift count
3965 instead of multiplying with D. */
3967 mh = choose_multiplier (d, size, size,
3968 &ml, &post_shift, &dummy);
3970 /* If the suggested multiplier is more than SIZE bits,
3971 we can do better for even divisors, using an
3972 initial right shift. */
3973 if (mh != 0 && (d & 1) == 0)
3975 pre_shift = floor_log2 (d & -d);
3976 mh = choose_multiplier (d >> pre_shift, size,
3978 &ml, &post_shift, &dummy);
3988 if (post_shift - 1 >= BITS_PER_WORD)
3992 = (shift_cost[speed][compute_mode][post_shift - 1]
3993 + shift_cost[speed][compute_mode][1]
3994 + 2 * add_cost[speed][compute_mode]);
3995 t1 = expand_mult_highpart (compute_mode, op0, ml,
3997 max_cost - extra_cost);
4000 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4004 (RSHIFT_EXPR, compute_mode, t2,
4005 build_int_cst (NULL_TREE, 1),
4007 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4010 quotient = expand_shift
4011 (RSHIFT_EXPR, compute_mode, t4,
4012 build_int_cst (NULL_TREE, post_shift - 1),
4019 if (pre_shift >= BITS_PER_WORD
4020 || post_shift >= BITS_PER_WORD)
4024 (RSHIFT_EXPR, compute_mode, op0,
4025 build_int_cst (NULL_TREE, pre_shift),
4028 = (shift_cost[speed][compute_mode][pre_shift]
4029 + shift_cost[speed][compute_mode][post_shift]);
4030 t2 = expand_mult_highpart (compute_mode, t1, ml,
4032 max_cost - extra_cost);
4035 quotient = expand_shift
4036 (RSHIFT_EXPR, compute_mode, t2,
4037 build_int_cst (NULL_TREE, post_shift),
4042 else /* Too wide mode to use tricky code */
4045 insn = get_last_insn ();
4047 && (set = single_set (insn)) != 0
4048 && SET_DEST (set) == quotient)
4049 set_unique_reg_note (insn,
4051 gen_rtx_UDIV (compute_mode, op0, op1));
4053 else /* TRUNC_DIV, signed */
4055 unsigned HOST_WIDE_INT ml;
4056 int lgup, post_shift;
4058 HOST_WIDE_INT d = INTVAL (op1);
4059 unsigned HOST_WIDE_INT abs_d;
4061 /* Since d might be INT_MIN, we have to cast to
4062 unsigned HOST_WIDE_INT before negating to avoid
4063 undefined signed overflow. */
4065 ? (unsigned HOST_WIDE_INT) d
4066 : - (unsigned HOST_WIDE_INT) d);
4068 /* n rem d = n rem -d */
4069 if (rem_flag && d < 0)
4072 op1 = gen_int_mode (abs_d, compute_mode);
4078 quotient = expand_unop (compute_mode, neg_optab, op0,
4080 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4082 /* This case is not handled correctly below. */
4083 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4084 compute_mode, 1, 1);
4088 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4089 && (rem_flag ? smod_pow2_cheap[compute_mode]
4090 : sdiv_pow2_cheap[compute_mode])
4091 /* We assume that cheap metric is true if the
4092 optab has an expander for this mode. */
4093 && ((optab_handler ((rem_flag ? smod_optab
4095 compute_mode)->insn_code
4096 != CODE_FOR_nothing)
4097 || (optab_handler(sdivmod_optab,
4099 ->insn_code != CODE_FOR_nothing)))
4101 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4105 remainder = expand_smod_pow2 (compute_mode, op0, d);
4107 return gen_lowpart (mode, remainder);
4110 if (sdiv_pow2_cheap[compute_mode]
4111 && ((optab_handler (sdiv_optab, compute_mode)->insn_code
4112 != CODE_FOR_nothing)
4113 || (optab_handler (sdivmod_optab, compute_mode)->insn_code
4114 != CODE_FOR_nothing)))
4115 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4117 gen_int_mode (abs_d,
4121 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4123 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4124 negate the quotient. */
4127 insn = get_last_insn ();
4129 && (set = single_set (insn)) != 0
4130 && SET_DEST (set) == quotient
4131 && abs_d < ((unsigned HOST_WIDE_INT) 1
4132 << (HOST_BITS_PER_WIDE_INT - 1)))
4133 set_unique_reg_note (insn,
4135 gen_rtx_DIV (compute_mode,
4142 quotient = expand_unop (compute_mode, neg_optab,
4143 quotient, quotient, 0);
4146 else if (size <= HOST_BITS_PER_WIDE_INT)
4148 choose_multiplier (abs_d, size, size - 1,
4149 &mlr, &post_shift, &lgup);
4150 ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4151 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4155 if (post_shift >= BITS_PER_WORD
4156 || size - 1 >= BITS_PER_WORD)
4159 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4160 + shift_cost[speed][compute_mode][size - 1]
4161 + add_cost[speed][compute_mode]);
4162 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4164 max_cost - extra_cost);
4168 (RSHIFT_EXPR, compute_mode, t1,
4169 build_int_cst (NULL_TREE, post_shift),
4172 (RSHIFT_EXPR, compute_mode, op0,
4173 build_int_cst (NULL_TREE, size - 1),
4177 = force_operand (gen_rtx_MINUS (compute_mode,
4182 = force_operand (gen_rtx_MINUS (compute_mode,
4190 if (post_shift >= BITS_PER_WORD
4191 || size - 1 >= BITS_PER_WORD)
4194 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4195 mlr = gen_int_mode (ml, compute_mode);
4196 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4197 + shift_cost[speed][compute_mode][size - 1]
4198 + 2 * add_cost[speed][compute_mode]);
4199 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4201 max_cost - extra_cost);
4204 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4208 (RSHIFT_EXPR, compute_mode, t2,
4209 build_int_cst (NULL_TREE, post_shift),
4212 (RSHIFT_EXPR, compute_mode, op0,
4213 build_int_cst (NULL_TREE, size - 1),
4217 = force_operand (gen_rtx_MINUS (compute_mode,
4222 = force_operand (gen_rtx_MINUS (compute_mode,
4227 else /* Too wide mode to use tricky code */
4230 insn = get_last_insn ();
4232 && (set = single_set (insn)) != 0
4233 && SET_DEST (set) == quotient)
4234 set_unique_reg_note (insn,
4236 gen_rtx_DIV (compute_mode, op0, op1));
4241 delete_insns_since (last);
4244 case FLOOR_DIV_EXPR:
4245 case FLOOR_MOD_EXPR:
4246 /* We will come here only for signed operations. */
4247 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4249 unsigned HOST_WIDE_INT mh;
4250 int pre_shift, lgup, post_shift;
4251 HOST_WIDE_INT d = INTVAL (op1);
4256 /* We could just as easily deal with negative constants here,
4257 but it does not seem worth the trouble for GCC 2.6. */
4258 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4260 pre_shift = floor_log2 (d);
4263 remainder = expand_binop (compute_mode, and_optab, op0,
4264 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4265 remainder, 0, OPTAB_LIB_WIDEN);
4267 return gen_lowpart (mode, remainder);
4269 quotient = expand_shift
4270 (RSHIFT_EXPR, compute_mode, op0,
4271 build_int_cst (NULL_TREE, pre_shift),
4278 mh = choose_multiplier (d, size, size - 1,
4279 &ml, &post_shift, &lgup);
4282 if (post_shift < BITS_PER_WORD
4283 && size - 1 < BITS_PER_WORD)
4286 (RSHIFT_EXPR, compute_mode, op0,
4287 build_int_cst (NULL_TREE, size - 1),
4289 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4290 NULL_RTX, 0, OPTAB_WIDEN);
4291 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4292 + shift_cost[speed][compute_mode][size - 1]
4293 + 2 * add_cost[speed][compute_mode]);
4294 t3 = expand_mult_highpart (compute_mode, t2, ml,
4296 max_cost - extra_cost);
4300 (RSHIFT_EXPR, compute_mode, t3,
4301 build_int_cst (NULL_TREE, post_shift),
4303 quotient = expand_binop (compute_mode, xor_optab,
4304 t4, t1, tquotient, 0,
4312 rtx nsign, t1, t2, t3, t4;
4313 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4314 op0, constm1_rtx), NULL_RTX);
4315 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4317 nsign = expand_shift
4318 (RSHIFT_EXPR, compute_mode, t2,
4319 build_int_cst (NULL_TREE, size - 1),
4321 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4323 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4328 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4330 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4339 delete_insns_since (last);
4341 /* Try using an instruction that produces both the quotient and
4342 remainder, using truncation. We can easily compensate the quotient
4343 or remainder to get floor rounding, once we have the remainder.
4344 Notice that we compute also the final remainder value here,
4345 and return the result right away. */
4346 if (target == 0 || GET_MODE (target) != compute_mode)
4347 target = gen_reg_rtx (compute_mode);
4352 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4353 quotient = gen_reg_rtx (compute_mode);
4358 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4359 remainder = gen_reg_rtx (compute_mode);
4362 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4363 quotient, remainder, 0))
4365 /* This could be computed with a branch-less sequence.
4366 Save that for later. */
4368 rtx label = gen_label_rtx ();
4369 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4370 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4371 NULL_RTX, 0, OPTAB_WIDEN);
4372 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4373 expand_dec (quotient, const1_rtx);
4374 expand_inc (remainder, op1);
4376 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4379 /* No luck with division elimination or divmod. Have to do it
4380 by conditionally adjusting op0 *and* the result. */
4382 rtx label1, label2, label3, label4, label5;
4386 quotient = gen_reg_rtx (compute_mode);
4387 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4388 label1 = gen_label_rtx ();
4389 label2 = gen_label_rtx ();
4390 label3 = gen_label_rtx ();
4391 label4 = gen_label_rtx ();
4392 label5 = gen_label_rtx ();
4393 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4394 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4395 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4396 quotient, 0, OPTAB_LIB_WIDEN);
4397 if (tem != quotient)
4398 emit_move_insn (quotient, tem);
4399 emit_jump_insn (gen_jump (label5));
4401 emit_label (label1);
4402 expand_inc (adjusted_op0, const1_rtx);
4403 emit_jump_insn (gen_jump (label4));
4405 emit_label (label2);
4406 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4407 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4408 quotient, 0, OPTAB_LIB_WIDEN);
4409 if (tem != quotient)
4410 emit_move_insn (quotient, tem);
4411 emit_jump_insn (gen_jump (label5));
4413 emit_label (label3);
4414 expand_dec (adjusted_op0, const1_rtx);
4415 emit_label (label4);
4416 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4417 quotient, 0, OPTAB_LIB_WIDEN);
4418 if (tem != quotient)
4419 emit_move_insn (quotient, tem);
4420 expand_dec (quotient, const1_rtx);
4421 emit_label (label5);
4429 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4432 unsigned HOST_WIDE_INT d = INTVAL (op1);
4433 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4434 build_int_cst (NULL_TREE, floor_log2 (d)),
4436 t2 = expand_binop (compute_mode, and_optab, op0,
4438 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4439 t3 = gen_reg_rtx (compute_mode);
4440 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4441 compute_mode, 1, 1);
4445 lab = gen_label_rtx ();
4446 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4447 expand_inc (t1, const1_rtx);
4452 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4458 /* Try using an instruction that produces both the quotient and
4459 remainder, using truncation. We can easily compensate the
4460 quotient or remainder to get ceiling rounding, once we have the
4461 remainder. Notice that we compute also the final remainder
4462 value here, and return the result right away. */
4463 if (target == 0 || GET_MODE (target) != compute_mode)
4464 target = gen_reg_rtx (compute_mode);
4468 remainder = (REG_P (target)
4469 ? target : gen_reg_rtx (compute_mode));
4470 quotient = gen_reg_rtx (compute_mode);
4474 quotient = (REG_P (target)
4475 ? target : gen_reg_rtx (compute_mode));
4476 remainder = gen_reg_rtx (compute_mode);
4479 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4482 /* This could be computed with a branch-less sequence.
4483 Save that for later. */
4484 rtx label = gen_label_rtx ();
4485 do_cmp_and_jump (remainder, const0_rtx, EQ,
4486 compute_mode, label);
4487 expand_inc (quotient, const1_rtx);
4488 expand_dec (remainder, op1);
4490 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4493 /* No luck with division elimination or divmod. Have to do it
4494 by conditionally adjusting op0 *and* the result. */
4497 rtx adjusted_op0, tem;
4499 quotient = gen_reg_rtx (compute_mode);
4500 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4501 label1 = gen_label_rtx ();
4502 label2 = gen_label_rtx ();
4503 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4504 compute_mode, label1);
4505 emit_move_insn (quotient, const0_rtx);
4506 emit_jump_insn (gen_jump (label2));
4508 emit_label (label1);
4509 expand_dec (adjusted_op0, const1_rtx);
4510 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4511 quotient, 1, OPTAB_LIB_WIDEN);
4512 if (tem != quotient)
4513 emit_move_insn (quotient, tem);
4514 expand_inc (quotient, const1_rtx);
4515 emit_label (label2);
4520 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4521 && INTVAL (op1) >= 0)
4523 /* This is extremely similar to the code for the unsigned case
4524 above. For 2.7 we should merge these variants, but for
4525 2.6.1 I don't want to touch the code for unsigned since that
4526 get used in C. The signed case will only be used by other
4530 unsigned HOST_WIDE_INT d = INTVAL (op1);
4531 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4532 build_int_cst (NULL_TREE, floor_log2 (d)),
4534 t2 = expand_binop (compute_mode, and_optab, op0,
4536 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4537 t3 = gen_reg_rtx (compute_mode);
4538 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4539 compute_mode, 1, 1);
4543 lab = gen_label_rtx ();
4544 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4545 expand_inc (t1, const1_rtx);
4550 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4556 /* Try using an instruction that produces both the quotient and
4557 remainder, using truncation. We can easily compensate the
4558 quotient or remainder to get ceiling rounding, once we have the
4559 remainder. Notice that we compute also the final remainder
4560 value here, and return the result right away. */
4561 if (target == 0 || GET_MODE (target) != compute_mode)
4562 target = gen_reg_rtx (compute_mode);
4565 remainder= (REG_P (target)
4566 ? target : gen_reg_rtx (compute_mode));
4567 quotient = gen_reg_rtx (compute_mode);
4571 quotient = (REG_P (target)
4572 ? target : gen_reg_rtx (compute_mode));
4573 remainder = gen_reg_rtx (compute_mode);
4576 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4579 /* This could be computed with a branch-less sequence.
4580 Save that for later. */
4582 rtx label = gen_label_rtx ();
4583 do_cmp_and_jump (remainder, const0_rtx, EQ,
4584 compute_mode, label);
4585 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4586 NULL_RTX, 0, OPTAB_WIDEN);
4587 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4588 expand_inc (quotient, const1_rtx);
4589 expand_dec (remainder, op1);
4591 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4594 /* No luck with division elimination or divmod. Have to do it
4595 by conditionally adjusting op0 *and* the result. */
4597 rtx label1, label2, label3, label4, label5;
4601 quotient = gen_reg_rtx (compute_mode);
4602 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4603 label1 = gen_label_rtx ();
4604 label2 = gen_label_rtx ();
4605 label3 = gen_label_rtx ();
4606 label4 = gen_label_rtx ();
4607 label5 = gen_label_rtx ();
4608 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4609 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4610 compute_mode, label1);
4611 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4612 quotient, 0, OPTAB_LIB_WIDEN);
4613 if (tem != quotient)
4614 emit_move_insn (quotient, tem);
4615 emit_jump_insn (gen_jump (label5));
4617 emit_label (label1);
4618 expand_dec (adjusted_op0, const1_rtx);
4619 emit_jump_insn (gen_jump (label4));
4621 emit_label (label2);
4622 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4623 compute_mode, label3);
4624 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4625 quotient, 0, OPTAB_LIB_WIDEN);
4626 if (tem != quotient)
4627 emit_move_insn (quotient, tem);
4628 emit_jump_insn (gen_jump (label5));
4630 emit_label (label3);
4631 expand_inc (adjusted_op0, const1_rtx);
4632 emit_label (label4);
4633 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4634 quotient, 0, OPTAB_LIB_WIDEN);
4635 if (tem != quotient)
4636 emit_move_insn (quotient, tem);
4637 expand_inc (quotient, const1_rtx);
4638 emit_label (label5);
4643 case EXACT_DIV_EXPR:
4644 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4646 HOST_WIDE_INT d = INTVAL (op1);
4647 unsigned HOST_WIDE_INT ml;
4651 pre_shift = floor_log2 (d & -d);
4652 ml = invert_mod2n (d >> pre_shift, size);
4653 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4654 build_int_cst (NULL_TREE, pre_shift),
4655 NULL_RTX, unsignedp);
4656 quotient = expand_mult (compute_mode, t1,
4657 gen_int_mode (ml, compute_mode),
4660 insn = get_last_insn ();
4661 set_unique_reg_note (insn,
4663 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4669 case ROUND_DIV_EXPR:
4670 case ROUND_MOD_EXPR:
4675 label = gen_label_rtx ();
4676 quotient = gen_reg_rtx (compute_mode);
4677 remainder = gen_reg_rtx (compute_mode);
4678 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4681 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4682 quotient, 1, OPTAB_LIB_WIDEN);
4683 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4684 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4685 remainder, 1, OPTAB_LIB_WIDEN);
4687 tem = plus_constant (op1, -1);
4688 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4689 build_int_cst (NULL_TREE, 1),
4691 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4692 expand_inc (quotient, const1_rtx);
4693 expand_dec (remainder, op1);
4698 rtx abs_rem, abs_op1, tem, mask;
4700 label = gen_label_rtx ();
4701 quotient = gen_reg_rtx (compute_mode);
4702 remainder = gen_reg_rtx (compute_mode);
4703 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4706 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4707 quotient, 0, OPTAB_LIB_WIDEN);
4708 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4709 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4710 remainder, 0, OPTAB_LIB_WIDEN);
4712 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4713 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4714 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4715 build_int_cst (NULL_TREE, 1),
4717 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4718 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4719 NULL_RTX, 0, OPTAB_WIDEN);
4720 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4721 build_int_cst (NULL_TREE, size - 1),
4723 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4724 NULL_RTX, 0, OPTAB_WIDEN);
4725 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4726 NULL_RTX, 0, OPTAB_WIDEN);
4727 expand_inc (quotient, tem);
4728 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4729 NULL_RTX, 0, OPTAB_WIDEN);
4730 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4731 NULL_RTX, 0, OPTAB_WIDEN);
4732 expand_dec (remainder, tem);
4735 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4743 if (target && GET_MODE (target) != compute_mode)
4748 /* Try to produce the remainder without producing the quotient.
4749 If we seem to have a divmod pattern that does not require widening,
4750 don't try widening here. We should really have a WIDEN argument
4751 to expand_twoval_binop, since what we'd really like to do here is
4752 1) try a mod insn in compute_mode
4753 2) try a divmod insn in compute_mode
4754 3) try a div insn in compute_mode and multiply-subtract to get
4756 4) try the same things with widening allowed. */
4758 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4761 ((optab_handler (optab2, compute_mode)->insn_code
4762 != CODE_FOR_nothing)
4763 ? OPTAB_DIRECT : OPTAB_WIDEN));
4766 /* No luck there. Can we do remainder and divide at once
4767 without a library call? */
4768 remainder = gen_reg_rtx (compute_mode);
4769 if (! expand_twoval_binop ((unsignedp
4773 NULL_RTX, remainder, unsignedp))
4778 return gen_lowpart (mode, remainder);
4781 /* Produce the quotient. Try a quotient insn, but not a library call.
4782 If we have a divmod in this mode, use it in preference to widening
4783 the div (for this test we assume it will not fail). Note that optab2
4784 is set to the one of the two optabs that the call below will use. */
4786 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4787 op0, op1, rem_flag ? NULL_RTX : target,
4789 ((optab_handler (optab2, compute_mode)->insn_code
4790 != CODE_FOR_nothing)
4791 ? OPTAB_DIRECT : OPTAB_WIDEN));
4795 /* No luck there. Try a quotient-and-remainder insn,
4796 keeping the quotient alone. */
4797 quotient = gen_reg_rtx (compute_mode);
4798 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4800 quotient, NULL_RTX, unsignedp))
4804 /* Still no luck. If we are not computing the remainder,
4805 use a library call for the quotient. */
4806 quotient = sign_expand_binop (compute_mode,
4807 udiv_optab, sdiv_optab,
4809 unsignedp, OPTAB_LIB_WIDEN);
4816 if (target && GET_MODE (target) != compute_mode)
4821 /* No divide instruction either. Use library for remainder. */
4822 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4824 unsignedp, OPTAB_LIB_WIDEN);
4825 /* No remainder function. Try a quotient-and-remainder
4826 function, keeping the remainder. */
4829 remainder = gen_reg_rtx (compute_mode);
4830 if (!expand_twoval_binop_libfunc
4831 (unsignedp ? udivmod_optab : sdivmod_optab,
4833 NULL_RTX, remainder,
4834 unsignedp ? UMOD : MOD))
4835 remainder = NULL_RTX;
4840 /* We divided. Now finish doing X - Y * (X / Y). */
4841 remainder = expand_mult (compute_mode, quotient, op1,
4842 NULL_RTX, unsignedp);
4843 remainder = expand_binop (compute_mode, sub_optab, op0,
4844 remainder, target, unsignedp,
4849 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4852 /* Return a tree node with data type TYPE, describing the value of X.
4853 Usually this is an VAR_DECL, if there is no obvious better choice.
4854 X may be an expression, however we only support those expressions
4855 generated by loop.c. */
4858 make_tree (tree type, rtx x)
4862 switch (GET_CODE (x))
4866 HOST_WIDE_INT hi = 0;
4869 && !(TYPE_UNSIGNED (type)
4870 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4871 < HOST_BITS_PER_WIDE_INT)))
4874 t = build_int_cst_wide (type, INTVAL (x), hi);
4880 if (GET_MODE (x) == VOIDmode)
4881 t = build_int_cst_wide (type,
4882 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4887 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4888 t = build_real (type, d);
4895 int units = CONST_VECTOR_NUNITS (x);
4896 tree itype = TREE_TYPE (type);
4901 /* Build a tree with vector elements. */
4902 for (i = units - 1; i >= 0; --i)
4904 rtx elt = CONST_VECTOR_ELT (x, i);
4905 t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4908 return build_vector (type, t);
4912 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4913 make_tree (type, XEXP (x, 1)));
4916 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4917 make_tree (type, XEXP (x, 1)));
4920 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4923 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4924 make_tree (type, XEXP (x, 1)));
4927 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4928 make_tree (type, XEXP (x, 1)));
4931 t = unsigned_type_for (type);
4932 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4933 make_tree (t, XEXP (x, 0)),
4934 make_tree (type, XEXP (x, 1))));
4937 t = signed_type_for (type);
4938 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4939 make_tree (t, XEXP (x, 0)),
4940 make_tree (type, XEXP (x, 1))));
4943 if (TREE_CODE (type) != REAL_TYPE)
4944 t = signed_type_for (type);
4948 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4949 make_tree (t, XEXP (x, 0)),
4950 make_tree (t, XEXP (x, 1))));
4952 t = unsigned_type_for (type);
4953 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4954 make_tree (t, XEXP (x, 0)),
4955 make_tree (t, XEXP (x, 1))));
4959 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4960 GET_CODE (x) == ZERO_EXTEND);
4961 return fold_convert (type, make_tree (t, XEXP (x, 0)));
4964 return make_tree (type, XEXP (x, 0));
4967 t = SYMBOL_REF_DECL (x);
4969 return fold_convert (type, build_fold_addr_expr (t));
4970 /* else fall through. */
4973 t = build_decl (VAR_DECL, NULL_TREE, type);
4975 /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4976 ptr_mode. So convert. */
4977 if (POINTER_TYPE_P (type))
4978 x = convert_memory_address (TYPE_MODE (type), x);
4980 /* Note that we do *not* use SET_DECL_RTL here, because we do not
4981 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
4982 t->decl_with_rtl.rtl = x;
4988 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4989 and returning TARGET.
4991 If TARGET is 0, a pseudo-register or constant is returned. */
4994 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4998 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4999 tem = simplify_binary_operation (AND, mode, op0, op1);
5001 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5005 else if (tem != target)
5006 emit_move_insn (target, tem);
5010 /* Helper function for emit_store_flag. */
5012 emit_store_flag_1 (rtx target, rtx subtarget, enum machine_mode mode,
5016 enum machine_mode target_mode = GET_MODE (target);
5018 /* If we are converting to a wider mode, first convert to
5019 TARGET_MODE, then normalize. This produces better combining
5020 opportunities on machines that have a SIGN_EXTRACT when we are
5021 testing a single bit. This mostly benefits the 68k.
5023 If STORE_FLAG_VALUE does not have the sign bit set when
5024 interpreted in MODE, we can do this conversion as unsigned, which
5025 is usually more efficient. */
5026 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5028 convert_move (target, subtarget,
5029 (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5030 && 0 == (STORE_FLAG_VALUE
5031 & ((HOST_WIDE_INT) 1
5032 << (GET_MODE_BITSIZE (mode) -1))));
5039 /* If we want to keep subexpressions around, don't reuse our last
5044 /* Now normalize to the proper value in MODE. Sometimes we don't
5045 have to do anything. */
5046 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5048 /* STORE_FLAG_VALUE might be the most negative number, so write
5049 the comparison this way to avoid a compiler-time warning. */
5050 else if (- normalizep == STORE_FLAG_VALUE)
5051 op0 = expand_unop (mode, neg_optab, op0, subtarget, 0);
5053 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5054 it hard to use a value of just the sign bit due to ANSI integer
5055 constant typing rules. */
5056 else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5057 && (STORE_FLAG_VALUE
5058 & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))))
5059 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5060 size_int (GET_MODE_BITSIZE (mode) - 1), subtarget,
5064 gcc_assert (STORE_FLAG_VALUE & 1);
5066 op0 = expand_and (mode, op0, const1_rtx, subtarget);
5067 if (normalizep == -1)
5068 op0 = expand_unop (mode, neg_optab, op0, op0, 0);
5071 /* If we were converting to a smaller mode, do the conversion now. */
5072 if (target_mode != mode)
5074 convert_move (target, op0, 0);
5081 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5082 and storing in TARGET. Normally return TARGET.
5083 Return 0 if that cannot be done.
5085 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5086 it is VOIDmode, they cannot both be CONST_INT.
5088 UNSIGNEDP is for the case where we have to widen the operands
5089 to perform the operation. It says to use zero-extension.
5091 NORMALIZEP is 1 if we should convert the result to be either zero
5092 or one. Normalize is -1 if we should convert the result to be
5093 either zero or -1. If NORMALIZEP is zero, the result will be left
5094 "raw" out of the scc insn. */
5097 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5098 enum machine_mode mode, int unsignedp, int normalizep)
5101 enum insn_code icode;
5102 enum machine_mode compare_mode;
5103 enum machine_mode target_mode = GET_MODE (target);
5105 rtx last = get_last_insn ();
5106 rtx pattern, comparison;
5109 code = unsigned_condition (code);
5111 /* If one operand is constant, make it the second one. Only do this
5112 if the other operand is not constant as well. */
5114 if (swap_commutative_operands_p (op0, op1))
5119 code = swap_condition (code);
5122 if (mode == VOIDmode)
5123 mode = GET_MODE (op0);
5125 /* For some comparisons with 1 and -1, we can convert this to
5126 comparisons with zero. This will often produce more opportunities for
5127 store-flag insns. */
5132 if (op1 == const1_rtx)
5133 op1 = const0_rtx, code = LE;
5136 if (op1 == constm1_rtx)
5137 op1 = const0_rtx, code = LT;
5140 if (op1 == const1_rtx)
5141 op1 = const0_rtx, code = GT;
5144 if (op1 == constm1_rtx)
5145 op1 = const0_rtx, code = GE;
5148 if (op1 == const1_rtx)
5149 op1 = const0_rtx, code = NE;
5152 if (op1 == const1_rtx)
5153 op1 = const0_rtx, code = EQ;
5159 /* If we are comparing a double-word integer with zero or -1, we can
5160 convert the comparison into one involving a single word. */
5161 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5162 && GET_MODE_CLASS (mode) == MODE_INT
5163 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5165 if ((code == EQ || code == NE)
5166 && (op1 == const0_rtx || op1 == constm1_rtx))
5168 rtx op00, op01, op0both;
5170 /* Do a logical OR or AND of the two words and compare the
5172 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5173 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5174 op0both = expand_binop (word_mode,
5175 op1 == const0_rtx ? ior_optab : and_optab,
5176 op00, op01, NULL_RTX, unsignedp,
5180 return emit_store_flag (target, code, op0both, op1, word_mode,
5181 unsignedp, normalizep);
5183 else if ((code == LT || code == GE) && op1 == const0_rtx)
5187 /* If testing the sign bit, can just test on high word. */
5188 op0h = simplify_gen_subreg (word_mode, op0, mode,
5189 subreg_highpart_offset (word_mode,
5191 return emit_store_flag (target, code, op0h, op1, word_mode,
5192 unsignedp, normalizep);
5196 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5197 complement of A (for GE) and shifting the sign bit to the low bit. */
5198 if (op1 == const0_rtx && (code == LT || code == GE)
5199 && GET_MODE_CLASS (mode) == MODE_INT
5200 && (normalizep || STORE_FLAG_VALUE == 1
5201 || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5202 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5203 == ((unsigned HOST_WIDE_INT) 1
5204 << (GET_MODE_BITSIZE (mode) - 1))))))
5208 /* If the result is to be wider than OP0, it is best to convert it
5209 first. If it is to be narrower, it is *incorrect* to convert it
5211 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5213 op0 = convert_modes (target_mode, mode, op0, 0);
5217 if (target_mode != mode)
5221 op0 = expand_unop (mode, one_cmpl_optab, op0,
5222 ((STORE_FLAG_VALUE == 1 || normalizep)
5223 ? 0 : subtarget), 0);
5225 if (STORE_FLAG_VALUE == 1 || normalizep)
5226 /* If we are supposed to produce a 0/1 value, we want to do
5227 a logical shift from the sign bit to the low-order bit; for
5228 a -1/0 value, we do an arithmetic shift. */
5229 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5230 size_int (GET_MODE_BITSIZE (mode) - 1),
5231 subtarget, normalizep != -1);
5233 if (mode != target_mode)
5234 op0 = convert_modes (target_mode, mode, op0, 0);
5239 icode = setcc_gen_code[(int) code];
5241 if (icode != CODE_FOR_nothing)
5243 insn_operand_predicate_fn pred;
5245 /* We think we may be able to do this with a scc insn. Emit the
5246 comparison and then the scc insn. */
5248 do_pending_stack_adjust ();
5249 last = get_last_insn ();
5252 = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5253 if (CONSTANT_P (comparison))
5255 switch (GET_CODE (comparison))
5258 if (comparison == const0_rtx)
5262 #ifdef FLOAT_STORE_FLAG_VALUE
5264 if (comparison == CONST0_RTX (GET_MODE (comparison)))
5272 if (normalizep == 1)
5274 if (normalizep == -1)
5276 return const_true_rtx;
5279 /* The code of COMPARISON may not match CODE if compare_from_rtx
5280 decided to swap its operands and reverse the original code.
5282 We know that compare_from_rtx returns either a CONST_INT or
5283 a new comparison code, so it is safe to just extract the
5284 code from COMPARISON. */
5285 code = GET_CODE (comparison);
5287 /* Get a reference to the target in the proper mode for this insn. */
5288 compare_mode = insn_data[(int) icode].operand[0].mode;
5290 pred = insn_data[(int) icode].operand[0].predicate;
5291 if (optimize || ! (*pred) (subtarget, compare_mode))
5292 subtarget = gen_reg_rtx (compare_mode);
5294 pattern = GEN_FCN (icode) (subtarget);
5297 emit_insn (pattern);
5298 return emit_store_flag_1 (target, subtarget, compare_mode,
5304 /* We don't have an scc insn, so try a cstore insn. */
5306 for (compare_mode = mode; compare_mode != VOIDmode;
5307 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5309 icode = optab_handler (cstore_optab, compare_mode)->insn_code;
5310 if (icode != CODE_FOR_nothing)
5314 if (icode != CODE_FOR_nothing)
5316 enum machine_mode result_mode
5317 = insn_data[(int) icode].operand[0].mode;
5318 rtx cstore_op0 = op0;
5319 rtx cstore_op1 = op1;
5321 do_pending_stack_adjust ();
5322 last = get_last_insn ();
5324 if (compare_mode != mode)
5326 cstore_op0 = convert_modes (compare_mode, mode, cstore_op0,
5328 cstore_op1 = convert_modes (compare_mode, mode, cstore_op1,
5332 if (!insn_data[(int) icode].operand[2].predicate (cstore_op0,
5334 cstore_op0 = copy_to_mode_reg (compare_mode, cstore_op0);
5336 if (!insn_data[(int) icode].operand[3].predicate (cstore_op1,
5338 cstore_op1 = copy_to_mode_reg (compare_mode, cstore_op1);
5340 comparison = gen_rtx_fmt_ee (code, result_mode, cstore_op0,
5344 if (optimize || !(insn_data[(int) icode].operand[0].predicate
5345 (subtarget, result_mode)))
5346 subtarget = gen_reg_rtx (result_mode);
5348 pattern = GEN_FCN (icode) (subtarget, comparison, cstore_op0,
5353 emit_insn (pattern);
5354 return emit_store_flag_1 (target, subtarget, result_mode,
5360 delete_insns_since (last);
5362 /* If optimizing, use different pseudo registers for each insn, instead
5363 of reusing the same pseudo. This leads to better CSE, but slows
5364 down the compiler, since there are more pseudos */
5365 subtarget = (!optimize
5366 && (target_mode == mode)) ? target : NULL_RTX;
5368 /* If we reached here, we can't do this with a scc insn. However, there
5369 are some comparisons that can be done directly. For example, if
5370 this is an equality comparison of integers, we can try to exclusive-or
5371 (or subtract) the two operands and use a recursive call to try the
5372 comparison with zero. Don't do any of these cases if branches are
5375 if (BRANCH_COST (optimize_insn_for_speed_p (),
5377 && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5378 && op1 != const0_rtx)
5380 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5384 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5387 tem = emit_store_flag (target, code, tem, const0_rtx,
5388 mode, unsignedp, normalizep);
5390 delete_insns_since (last);
5394 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5395 the constant zero. Reject all other comparisons at this point. Only
5396 do LE and GT if branches are expensive since they are expensive on
5397 2-operand machines. */
5399 if (BRANCH_COST (optimize_insn_for_speed_p (),
5401 || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5402 || (code != EQ && code != NE
5403 && (BRANCH_COST (optimize_insn_for_speed_p (),
5404 false) <= 1 || (code != LE && code != GT))))
5407 /* See what we need to return. We can only return a 1, -1, or the
5410 if (normalizep == 0)
5412 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5413 normalizep = STORE_FLAG_VALUE;
5415 else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5416 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5417 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5423 /* Try to put the result of the comparison in the sign bit. Assume we can't
5424 do the necessary operation below. */
5428 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5429 the sign bit set. */
5433 /* This is destructive, so SUBTARGET can't be OP0. */
5434 if (rtx_equal_p (subtarget, op0))
5437 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5440 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5444 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5445 number of bits in the mode of OP0, minus one. */
5449 if (rtx_equal_p (subtarget, op0))
5452 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5453 size_int (GET_MODE_BITSIZE (mode) - 1),
5455 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5459 if (code == EQ || code == NE)
5461 /* For EQ or NE, one way to do the comparison is to apply an operation
5462 that converts the operand into a positive number if it is nonzero
5463 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5464 for NE we negate. This puts the result in the sign bit. Then we
5465 normalize with a shift, if needed.
5467 Two operations that can do the above actions are ABS and FFS, so try
5468 them. If that doesn't work, and MODE is smaller than a full word,
5469 we can use zero-extension to the wider mode (an unsigned conversion)
5470 as the operation. */
5472 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5473 that is compensated by the subsequent overflow when subtracting
5476 if (optab_handler (abs_optab, mode)->insn_code != CODE_FOR_nothing)
5477 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5478 else if (optab_handler (ffs_optab, mode)->insn_code != CODE_FOR_nothing)
5479 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5480 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5482 tem = convert_modes (word_mode, mode, op0, 1);
5489 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5492 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5495 /* If we couldn't do it that way, for NE we can "or" the two's complement
5496 of the value with itself. For EQ, we take the one's complement of
5497 that "or", which is an extra insn, so we only handle EQ if branches
5502 || BRANCH_COST (optimize_insn_for_speed_p (),
5505 if (rtx_equal_p (subtarget, op0))
5508 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5509 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5512 if (tem && code == EQ)
5513 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5517 if (tem && normalizep)
5518 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5519 size_int (GET_MODE_BITSIZE (mode) - 1),
5520 subtarget, normalizep == 1);
5524 if (GET_MODE (tem) != target_mode)
5526 convert_move (target, tem, 0);
5529 else if (!subtarget)
5531 emit_move_insn (target, tem);
5536 delete_insns_since (last);
5541 /* Like emit_store_flag, but always succeeds. */
5544 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5545 enum machine_mode mode, int unsignedp, int normalizep)
5549 /* First see if emit_store_flag can do the job. */
5550 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5554 if (normalizep == 0)
5557 /* If this failed, we have to do this with set/compare/jump/set code. */
5560 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5561 target = gen_reg_rtx (GET_MODE (target));
5563 emit_move_insn (target, const1_rtx);
5564 label = gen_label_rtx ();
5565 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5568 emit_move_insn (target, const0_rtx);
5574 /* Perform possibly multi-word comparison and conditional jump to LABEL
5575 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5576 now a thin wrapper around do_compare_rtx_and_jump. */
5579 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5582 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5583 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5584 NULL_RTX, NULL_RTX, label);