1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
5 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
33 #include "insn-config.h"
38 #include "langhooks.h"
42 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
43 unsigned HOST_WIDE_INT,
44 unsigned HOST_WIDE_INT, rtx);
45 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
46 unsigned HOST_WIDE_INT, rtx);
47 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
48 unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT, rtx, int);
51 static rtx mask_rtx (enum machine_mode, int, int, int);
52 static rtx lshift_value (enum machine_mode, rtx, int, int);
53 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT, int);
55 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
56 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
57 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
59 /* Test whether a value is zero of a power of two. */
60 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
62 /* Nonzero means divides or modulus operations are relatively cheap for
63 powers of two, so don't use branches; emit the operation instead.
64 Usually, this will mean that the MD file will emit non-branch
67 static bool sdiv_pow2_cheap[2][NUM_MACHINE_MODES];
68 static bool smod_pow2_cheap[2][NUM_MACHINE_MODES];
70 #ifndef SLOW_UNALIGNED_ACCESS
71 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
74 /* For compilers that support multiple targets with different word sizes,
75 MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD. An example
76 is the H8/300(H) compiler. */
78 #ifndef MAX_BITS_PER_WORD
79 #define MAX_BITS_PER_WORD BITS_PER_WORD
82 /* Reduce conditional compilation elsewhere. */
85 #define CODE_FOR_insv CODE_FOR_nothing
86 #define gen_insv(a,b,c,d) NULL_RTX
90 #define CODE_FOR_extv CODE_FOR_nothing
91 #define gen_extv(a,b,c,d) NULL_RTX
95 #define CODE_FOR_extzv CODE_FOR_nothing
96 #define gen_extzv(a,b,c,d) NULL_RTX
99 /* Cost of various pieces of RTL. Note that some of these are indexed by
100 shift count and some by mode. */
101 static int zero_cost[2];
102 static int add_cost[2][NUM_MACHINE_MODES];
103 static int neg_cost[2][NUM_MACHINE_MODES];
104 static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
105 static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
106 static int shiftsub_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
107 static int mul_cost[2][NUM_MACHINE_MODES];
108 static int sdiv_cost[2][NUM_MACHINE_MODES];
109 static int udiv_cost[2][NUM_MACHINE_MODES];
110 static int mul_widen_cost[2][NUM_MACHINE_MODES];
111 static int mul_highpart_cost[2][NUM_MACHINE_MODES];
118 struct rtx_def reg; rtunion reg_fld[2];
119 struct rtx_def plus; rtunion plus_fld1;
121 struct rtx_def mult; rtunion mult_fld1;
122 struct rtx_def sdiv; rtunion sdiv_fld1;
123 struct rtx_def udiv; rtunion udiv_fld1;
125 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
126 struct rtx_def smod_32; rtunion smod_32_fld1;
127 struct rtx_def wide_mult; rtunion wide_mult_fld1;
128 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
129 struct rtx_def wide_trunc;
130 struct rtx_def shift; rtunion shift_fld1;
131 struct rtx_def shift_mult; rtunion shift_mult_fld1;
132 struct rtx_def shift_add; rtunion shift_add_fld1;
133 struct rtx_def shift_sub; rtunion shift_sub_fld1;
136 rtx pow2[MAX_BITS_PER_WORD];
137 rtx cint[MAX_BITS_PER_WORD];
139 enum machine_mode mode, wider_mode;
143 for (m = 1; m < MAX_BITS_PER_WORD; m++)
145 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
146 cint[m] = GEN_INT (m);
148 memset (&all, 0, sizeof all);
150 PUT_CODE (&all.reg, REG);
151 /* Avoid using hard regs in ways which may be unsupported. */
152 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
154 PUT_CODE (&all.plus, PLUS);
155 XEXP (&all.plus, 0) = &all.reg;
156 XEXP (&all.plus, 1) = &all.reg;
158 PUT_CODE (&all.neg, NEG);
159 XEXP (&all.neg, 0) = &all.reg;
161 PUT_CODE (&all.mult, MULT);
162 XEXP (&all.mult, 0) = &all.reg;
163 XEXP (&all.mult, 1) = &all.reg;
165 PUT_CODE (&all.sdiv, DIV);
166 XEXP (&all.sdiv, 0) = &all.reg;
167 XEXP (&all.sdiv, 1) = &all.reg;
169 PUT_CODE (&all.udiv, UDIV);
170 XEXP (&all.udiv, 0) = &all.reg;
171 XEXP (&all.udiv, 1) = &all.reg;
173 PUT_CODE (&all.sdiv_32, DIV);
174 XEXP (&all.sdiv_32, 0) = &all.reg;
175 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
177 PUT_CODE (&all.smod_32, MOD);
178 XEXP (&all.smod_32, 0) = &all.reg;
179 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
181 PUT_CODE (&all.zext, ZERO_EXTEND);
182 XEXP (&all.zext, 0) = &all.reg;
184 PUT_CODE (&all.wide_mult, MULT);
185 XEXP (&all.wide_mult, 0) = &all.zext;
186 XEXP (&all.wide_mult, 1) = &all.zext;
188 PUT_CODE (&all.wide_lshr, LSHIFTRT);
189 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
191 PUT_CODE (&all.wide_trunc, TRUNCATE);
192 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
194 PUT_CODE (&all.shift, ASHIFT);
195 XEXP (&all.shift, 0) = &all.reg;
197 PUT_CODE (&all.shift_mult, MULT);
198 XEXP (&all.shift_mult, 0) = &all.reg;
200 PUT_CODE (&all.shift_add, PLUS);
201 XEXP (&all.shift_add, 0) = &all.shift_mult;
202 XEXP (&all.shift_add, 1) = &all.reg;
204 PUT_CODE (&all.shift_sub, MINUS);
205 XEXP (&all.shift_sub, 0) = &all.shift_mult;
206 XEXP (&all.shift_sub, 1) = &all.reg;
208 for (speed = 0; speed < 2; speed++)
210 crtl->maybe_hot_insn_p = speed;
211 zero_cost[speed] = rtx_cost (const0_rtx, 0, speed);
213 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
215 mode = GET_MODE_WIDER_MODE (mode))
217 PUT_MODE (&all.reg, mode);
218 PUT_MODE (&all.plus, mode);
219 PUT_MODE (&all.neg, mode);
220 PUT_MODE (&all.mult, mode);
221 PUT_MODE (&all.sdiv, mode);
222 PUT_MODE (&all.udiv, mode);
223 PUT_MODE (&all.sdiv_32, mode);
224 PUT_MODE (&all.smod_32, mode);
225 PUT_MODE (&all.wide_trunc, mode);
226 PUT_MODE (&all.shift, mode);
227 PUT_MODE (&all.shift_mult, mode);
228 PUT_MODE (&all.shift_add, mode);
229 PUT_MODE (&all.shift_sub, mode);
231 add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
232 neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
233 mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
234 sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
235 udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
237 sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
238 <= 2 * add_cost[speed][mode]);
239 smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
240 <= 4 * add_cost[speed][mode]);
242 wider_mode = GET_MODE_WIDER_MODE (mode);
243 if (wider_mode != VOIDmode)
245 PUT_MODE (&all.zext, wider_mode);
246 PUT_MODE (&all.wide_mult, wider_mode);
247 PUT_MODE (&all.wide_lshr, wider_mode);
248 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
250 mul_widen_cost[speed][wider_mode]
251 = rtx_cost (&all.wide_mult, SET, speed);
252 mul_highpart_cost[speed][mode]
253 = rtx_cost (&all.wide_trunc, SET, speed);
256 shift_cost[speed][mode][0] = 0;
257 shiftadd_cost[speed][mode][0] = shiftsub_cost[speed][mode][0]
258 = add_cost[speed][mode];
260 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
261 for (m = 1; m < n; m++)
263 XEXP (&all.shift, 1) = cint[m];
264 XEXP (&all.shift_mult, 1) = pow2[m];
266 shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
267 shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
268 shiftsub_cost[speed][mode][m] = rtx_cost (&all.shift_sub, SET, speed);
272 default_rtl_profile ();
275 /* Return an rtx representing minus the value of X.
276 MODE is the intended mode of the result,
277 useful if X is a CONST_INT. */
280 negate_rtx (enum machine_mode mode, rtx x)
282 rtx result = simplify_unary_operation (NEG, mode, x, mode);
285 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
290 /* Report on the availability of insv/extv/extzv and the desired mode
291 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
292 is false; else the mode of the specified operand. If OPNO is -1,
293 all the caller cares about is whether the insn is available. */
295 mode_for_extraction (enum extraction_pattern pattern, int opno)
297 const struct insn_data *data;
304 data = &insn_data[CODE_FOR_insv];
307 return MAX_MACHINE_MODE;
312 data = &insn_data[CODE_FOR_extv];
315 return MAX_MACHINE_MODE;
320 data = &insn_data[CODE_FOR_extzv];
323 return MAX_MACHINE_MODE;
332 /* Everyone who uses this function used to follow it with
333 if (result == VOIDmode) result = word_mode; */
334 if (data->operand[opno].mode == VOIDmode)
336 return data->operand[opno].mode;
339 /* Return true if X, of mode MODE, matches the predicate for operand
340 OPNO of instruction ICODE. Allow volatile memories, regardless of
341 the ambient volatile_ok setting. */
344 check_predicate_volatile_ok (enum insn_code icode, int opno,
345 rtx x, enum machine_mode mode)
347 bool save_volatile_ok, result;
349 save_volatile_ok = volatile_ok;
350 result = insn_data[(int) icode].operand[opno].predicate (x, mode);
351 volatile_ok = save_volatile_ok;
355 /* A subroutine of store_bit_field, with the same arguments. Return true
356 if the operation could be implemented.
358 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
359 no other way of implementing the operation. If FALLBACK_P is false,
360 return false instead. */
363 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
364 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
365 rtx value, bool fallback_p)
368 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
369 unsigned HOST_WIDE_INT offset, bitpos;
374 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
376 while (GET_CODE (op0) == SUBREG)
378 /* The following line once was done only if WORDS_BIG_ENDIAN,
379 but I think that is a mistake. WORDS_BIG_ENDIAN is
380 meaningful at a much higher level; when structures are copied
381 between memory and regs, the higher-numbered regs
382 always get higher addresses. */
383 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
384 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
388 /* Paradoxical subregs need special handling on big endian machines. */
389 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
391 int difference = inner_mode_size - outer_mode_size;
393 if (WORDS_BIG_ENDIAN)
394 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
395 if (BYTES_BIG_ENDIAN)
396 byte_offset += difference % UNITS_PER_WORD;
399 byte_offset = SUBREG_BYTE (op0);
401 bitnum += byte_offset * BITS_PER_UNIT;
402 op0 = SUBREG_REG (op0);
405 /* No action is needed if the target is a register and if the field
406 lies completely outside that register. This can occur if the source
407 code contains an out-of-bounds access to a small array. */
408 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
411 /* Use vec_set patterns for inserting parts of vectors whenever
413 if (VECTOR_MODE_P (GET_MODE (op0))
415 && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code
417 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
418 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
419 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
421 enum machine_mode outermode = GET_MODE (op0);
422 enum machine_mode innermode = GET_MODE_INNER (outermode);
423 int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code;
424 int pos = bitnum / GET_MODE_BITSIZE (innermode);
425 rtx rtxpos = GEN_INT (pos);
429 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
430 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
431 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
435 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
436 src = copy_to_mode_reg (mode1, src);
438 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
439 rtxpos = copy_to_mode_reg (mode1, rtxpos);
441 /* We could handle this, but we should always be called with a pseudo
442 for our targets and all insns should take them as outputs. */
443 gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
444 && (*insn_data[icode].operand[1].predicate) (src, mode1)
445 && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
446 pat = GEN_FCN (icode) (dest, src, rtxpos);
457 /* If the target is a register, overwriting the entire object, or storing
458 a full-word or multi-word field can be done with just a SUBREG.
460 If the target is memory, storing any naturally aligned field can be
461 done with a simple store. For targets that support fast unaligned
462 memory, any naturally sized, unit aligned field can be done directly. */
464 offset = bitnum / unit;
465 bitpos = bitnum % unit;
466 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
467 + (offset * UNITS_PER_WORD);
470 && bitsize == GET_MODE_BITSIZE (fieldmode)
472 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
473 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
474 && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
475 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
476 || (offset * BITS_PER_UNIT % bitsize == 0
477 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
480 op0 = adjust_address (op0, fieldmode, offset);
481 else if (GET_MODE (op0) != fieldmode)
482 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
484 emit_move_insn (op0, value);
488 /* Make sure we are playing with integral modes. Pun with subregs
489 if we aren't. This must come after the entire register case above,
490 since that case is valid for any mode. The following cases are only
491 valid for integral modes. */
493 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
494 if (imode != GET_MODE (op0))
497 op0 = adjust_address (op0, imode, 0);
500 gcc_assert (imode != BLKmode);
501 op0 = gen_lowpart (imode, op0);
506 /* We may be accessing data outside the field, which means
507 we can alias adjacent data. */
510 op0 = shallow_copy_rtx (op0);
511 set_mem_alias_set (op0, 0);
512 set_mem_expr (op0, 0);
515 /* If OP0 is a register, BITPOS must count within a word.
516 But as we have it, it counts within whatever size OP0 now has.
517 On a bigendian machine, these are not the same, so convert. */
520 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
521 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
523 /* Storing an lsb-aligned field in a register
524 can be done with a movestrict instruction. */
527 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
528 && bitsize == GET_MODE_BITSIZE (fieldmode)
529 && (optab_handler (movstrict_optab, fieldmode)->insn_code
530 != CODE_FOR_nothing))
532 int icode = optab_handler (movstrict_optab, fieldmode)->insn_code;
534 rtx start = get_last_insn ();
537 /* Get appropriate low part of the value being stored. */
538 if (GET_CODE (value) == CONST_INT || REG_P (value))
539 value = gen_lowpart (fieldmode, value);
540 else if (!(GET_CODE (value) == SYMBOL_REF
541 || GET_CODE (value) == LABEL_REF
542 || GET_CODE (value) == CONST))
543 value = convert_to_mode (fieldmode, value, 0);
545 if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
546 value = copy_to_mode_reg (fieldmode, value);
548 if (GET_CODE (op0) == SUBREG)
550 /* Else we've got some float mode source being extracted into
551 a different float mode destination -- this combination of
552 subregs results in Severe Tire Damage. */
553 gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
554 || GET_MODE_CLASS (fieldmode) == MODE_INT
555 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
556 arg0 = SUBREG_REG (op0);
559 insn = (GEN_FCN (icode)
560 (gen_rtx_SUBREG (fieldmode, arg0,
561 (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
562 + (offset * UNITS_PER_WORD)),
569 delete_insns_since (start);
572 /* Handle fields bigger than a word. */
574 if (bitsize > BITS_PER_WORD)
576 /* Here we transfer the words of the field
577 in the order least significant first.
578 This is because the most significant word is the one which may
580 However, only do that if the value is not BLKmode. */
582 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
583 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
587 /* This is the mode we must force value to, so that there will be enough
588 subwords to extract. Note that fieldmode will often (always?) be
589 VOIDmode, because that is what store_field uses to indicate that this
590 is a bit field, but passing VOIDmode to operand_subword_force
592 fieldmode = GET_MODE (value);
593 if (fieldmode == VOIDmode)
594 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
596 last = get_last_insn ();
597 for (i = 0; i < nwords; i++)
599 /* If I is 0, use the low-order word in both field and target;
600 if I is 1, use the next to lowest word; and so on. */
601 unsigned int wordnum = (backwards ? nwords - i - 1 : i);
602 unsigned int bit_offset = (backwards
603 ? MAX ((int) bitsize - ((int) i + 1)
606 : (int) i * BITS_PER_WORD);
607 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
609 if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
610 bitsize - i * BITS_PER_WORD),
611 bitnum + bit_offset, word_mode,
612 value_word, fallback_p))
614 delete_insns_since (last);
621 /* From here on we can assume that the field to be stored in is
622 a full-word (whatever type that is), since it is shorter than a word. */
624 /* OFFSET is the number of words or bytes (UNIT says which)
625 from STR_RTX to the first word or byte containing part of the field. */
630 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
634 /* Since this is a destination (lvalue), we can't copy
635 it to a pseudo. We can remove a SUBREG that does not
636 change the size of the operand. Such a SUBREG may
637 have been added above. */
638 gcc_assert (GET_CODE (op0) == SUBREG
639 && (GET_MODE_SIZE (GET_MODE (op0))
640 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
641 op0 = SUBREG_REG (op0);
643 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
644 op0, (offset * UNITS_PER_WORD));
649 /* If VALUE has a floating-point or complex mode, access it as an
650 integer of the corresponding size. This can occur on a machine
651 with 64 bit registers that uses SFmode for float. It can also
652 occur for unaligned float or complex fields. */
654 if (GET_MODE (value) != VOIDmode
655 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
656 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
658 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
659 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
662 /* Now OFFSET is nonzero only if OP0 is memory
663 and is therefore always measured in bytes. */
666 && GET_MODE (value) != BLKmode
668 && GET_MODE_BITSIZE (op_mode) >= bitsize
669 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
670 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
671 && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
673 && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
675 int xbitpos = bitpos;
678 rtx last = get_last_insn ();
681 /* Add OFFSET into OP0's address. */
683 xop0 = adjust_address (xop0, byte_mode, offset);
685 /* If xop0 is a register, we need it in OP_MODE
686 to make it acceptable to the format of insv. */
687 if (GET_CODE (xop0) == SUBREG)
688 /* We can't just change the mode, because this might clobber op0,
689 and we will need the original value of op0 if insv fails. */
690 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
691 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
692 xop0 = gen_rtx_SUBREG (op_mode, xop0, 0);
694 /* On big-endian machines, we count bits from the most significant.
695 If the bit field insn does not, we must invert. */
697 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
698 xbitpos = unit - bitsize - xbitpos;
700 /* We have been counting XBITPOS within UNIT.
701 Count instead within the size of the register. */
702 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
703 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
705 unit = GET_MODE_BITSIZE (op_mode);
707 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
709 if (GET_MODE (value) != op_mode)
711 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
713 /* Optimization: Don't bother really extending VALUE
714 if it has all the bits we will actually use. However,
715 if we must narrow it, be sure we do it correctly. */
717 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
721 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
723 tmp = simplify_gen_subreg (op_mode,
724 force_reg (GET_MODE (value),
726 GET_MODE (value), 0);
730 value1 = gen_lowpart (op_mode, value1);
732 else if (GET_CODE (value) == CONST_INT)
733 value1 = gen_int_mode (INTVAL (value), op_mode);
735 /* Parse phase is supposed to make VALUE's data type
736 match that of the component reference, which is a type
737 at least as wide as the field; so VALUE should have
738 a mode that corresponds to that type. */
739 gcc_assert (CONSTANT_P (value));
742 /* If this machine's insv insists on a register,
743 get VALUE1 into a register. */
744 if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
746 value1 = force_reg (op_mode, value1);
748 pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
754 delete_insns_since (last);
757 /* If OP0 is a memory, try copying it to a register and seeing if a
758 cheap register alternative is available. */
759 if (HAVE_insv && MEM_P (op0))
761 enum machine_mode bestmode;
763 /* Get the mode to use for inserting into this field. If OP0 is
764 BLKmode, get the smallest mode consistent with the alignment. If
765 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
766 mode. Otherwise, use the smallest mode containing the field. */
768 if (GET_MODE (op0) == BLKmode
769 || (op_mode != MAX_MACHINE_MODE
770 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
771 bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
772 (op_mode == MAX_MACHINE_MODE
773 ? VOIDmode : op_mode),
774 MEM_VOLATILE_P (op0));
776 bestmode = GET_MODE (op0);
778 if (bestmode != VOIDmode
779 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
780 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
781 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
783 rtx last, tempreg, xop0;
784 unsigned HOST_WIDE_INT xoffset, xbitpos;
786 last = get_last_insn ();
788 /* Adjust address to point to the containing unit of
789 that mode. Compute the offset as a multiple of this unit,
790 counting in bytes. */
791 unit = GET_MODE_BITSIZE (bestmode);
792 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
793 xbitpos = bitnum % unit;
794 xop0 = adjust_address (op0, bestmode, xoffset);
796 /* Fetch that unit, store the bitfield in it, then store
798 tempreg = copy_to_reg (xop0);
799 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
800 fieldmode, orig_value, false))
802 emit_move_insn (xop0, tempreg);
805 delete_insns_since (last);
812 store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
816 /* Generate code to store value from rtx VALUE
817 into a bit-field within structure STR_RTX
818 containing BITSIZE bits starting at bit BITNUM.
819 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
822 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
823 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
826 if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
830 /* Use shifts and boolean operations to store VALUE
831 into a bit field of width BITSIZE
832 in a memory location specified by OP0 except offset by OFFSET bytes.
833 (OFFSET must be 0 if OP0 is a register.)
834 The field starts at position BITPOS within the byte.
835 (If OP0 is a register, it may be a full word or a narrower mode,
836 but BITPOS still counts within a full word,
837 which is significant on bigendian machines.) */
840 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
841 unsigned HOST_WIDE_INT bitsize,
842 unsigned HOST_WIDE_INT bitpos, rtx value)
844 enum machine_mode mode;
845 unsigned int total_bits = BITS_PER_WORD;
850 /* There is a case not handled here:
851 a structure with a known alignment of just a halfword
852 and a field split across two aligned halfwords within the structure.
853 Or likewise a structure with a known alignment of just a byte
854 and a field split across two bytes.
855 Such cases are not supposed to be able to occur. */
857 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
859 gcc_assert (!offset);
860 /* Special treatment for a bit field split across two registers. */
861 if (bitsize + bitpos > BITS_PER_WORD)
863 store_split_bit_field (op0, bitsize, bitpos, value);
869 /* Get the proper mode to use for this field. We want a mode that
870 includes the entire field. If such a mode would be larger than
871 a word, we won't be doing the extraction the normal way.
872 We don't want a mode bigger than the destination. */
874 mode = GET_MODE (op0);
875 if (GET_MODE_BITSIZE (mode) == 0
876 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
878 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
879 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
881 if (mode == VOIDmode)
883 /* The only way this should occur is if the field spans word
885 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
890 total_bits = GET_MODE_BITSIZE (mode);
892 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
893 be in the range 0 to total_bits-1, and put any excess bytes in
895 if (bitpos >= total_bits)
897 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
898 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
902 /* Get ref to an aligned byte, halfword, or word containing the field.
903 Adjust BITPOS to be position within a word,
904 and OFFSET to be the offset of that word.
905 Then alter OP0 to refer to that word. */
906 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
907 offset -= (offset % (total_bits / BITS_PER_UNIT));
908 op0 = adjust_address (op0, mode, offset);
911 mode = GET_MODE (op0);
913 /* Now MODE is either some integral mode for a MEM as OP0,
914 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
915 The bit field is contained entirely within OP0.
916 BITPOS is the starting bit number within OP0.
917 (OP0's mode may actually be narrower than MODE.) */
919 if (BYTES_BIG_ENDIAN)
920 /* BITPOS is the distance between our msb
921 and that of the containing datum.
922 Convert it to the distance from the lsb. */
923 bitpos = total_bits - bitsize - bitpos;
925 /* Now BITPOS is always the distance between our lsb
928 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
929 we must first convert its mode to MODE. */
931 if (GET_CODE (value) == CONST_INT)
933 HOST_WIDE_INT v = INTVAL (value);
935 if (bitsize < HOST_BITS_PER_WIDE_INT)
936 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
940 else if ((bitsize < HOST_BITS_PER_WIDE_INT
941 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
942 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
945 value = lshift_value (mode, value, bitpos, bitsize);
949 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
950 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
952 if (GET_MODE (value) != mode)
953 value = convert_to_mode (mode, value, 1);
956 value = expand_binop (mode, and_optab, value,
957 mask_rtx (mode, 0, bitsize, 0),
958 NULL_RTX, 1, OPTAB_LIB_WIDEN);
960 value = expand_shift (LSHIFT_EXPR, mode, value,
961 build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
964 /* Now clear the chosen bits in OP0,
965 except that if VALUE is -1 we need not bother. */
966 /* We keep the intermediates in registers to allow CSE to combine
967 consecutive bitfield assignments. */
969 temp = force_reg (mode, op0);
973 temp = expand_binop (mode, and_optab, temp,
974 mask_rtx (mode, bitpos, bitsize, 1),
975 NULL_RTX, 1, OPTAB_LIB_WIDEN);
976 temp = force_reg (mode, temp);
979 /* Now logical-or VALUE into OP0, unless it is zero. */
983 temp = expand_binop (mode, ior_optab, temp, value,
984 NULL_RTX, 1, OPTAB_LIB_WIDEN);
985 temp = force_reg (mode, temp);
990 op0 = copy_rtx (op0);
991 emit_move_insn (op0, temp);
995 /* Store a bit field that is split across multiple accessible memory objects.
997 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
998 BITSIZE is the field width; BITPOS the position of its first bit
1000 VALUE is the value to store.
1002 This does not yet handle fields wider than BITS_PER_WORD. */
1005 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1006 unsigned HOST_WIDE_INT bitpos, rtx value)
1009 unsigned int bitsdone = 0;
1011 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1013 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1014 unit = BITS_PER_WORD;
1016 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1018 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1019 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1020 that VALUE might be a floating-point constant. */
1021 if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
1023 rtx word = gen_lowpart_common (word_mode, value);
1025 if (word && (value != word))
1028 value = gen_lowpart_common (word_mode,
1029 force_reg (GET_MODE (value) != VOIDmode
1031 : word_mode, value));
1034 while (bitsdone < bitsize)
1036 unsigned HOST_WIDE_INT thissize;
1038 unsigned HOST_WIDE_INT thispos;
1039 unsigned HOST_WIDE_INT offset;
1041 offset = (bitpos + bitsdone) / unit;
1042 thispos = (bitpos + bitsdone) % unit;
1044 /* THISSIZE must not overrun a word boundary. Otherwise,
1045 store_fixed_bit_field will call us again, and we will mutually
1047 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1048 thissize = MIN (thissize, unit - thispos);
1050 if (BYTES_BIG_ENDIAN)
1054 /* We must do an endian conversion exactly the same way as it is
1055 done in extract_bit_field, so that the two calls to
1056 extract_fixed_bit_field will have comparable arguments. */
1057 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1058 total_bits = BITS_PER_WORD;
1060 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1062 /* Fetch successively less significant portions. */
1063 if (GET_CODE (value) == CONST_INT)
1064 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1065 >> (bitsize - bitsdone - thissize))
1066 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1068 /* The args are chosen so that the last part includes the
1069 lsb. Give extract_bit_field the value it needs (with
1070 endianness compensation) to fetch the piece we want. */
1071 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1072 total_bits - bitsize + bitsdone,
1077 /* Fetch successively more significant portions. */
1078 if (GET_CODE (value) == CONST_INT)
1079 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1081 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1083 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1084 bitsdone, NULL_RTX, 1);
1087 /* If OP0 is a register, then handle OFFSET here.
1089 When handling multiword bitfields, extract_bit_field may pass
1090 down a word_mode SUBREG of a larger REG for a bitfield that actually
1091 crosses a word boundary. Thus, for a SUBREG, we must find
1092 the current word starting from the base register. */
1093 if (GET_CODE (op0) == SUBREG)
1095 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1096 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1097 GET_MODE (SUBREG_REG (op0)));
1100 else if (REG_P (op0))
1102 word = operand_subword_force (op0, offset, GET_MODE (op0));
1108 /* OFFSET is in UNITs, and UNIT is in bits.
1109 store_fixed_bit_field wants offset in bytes. */
1110 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1112 bitsdone += thissize;
1116 /* A subroutine of extract_bit_field_1 that converts return value X
1117 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1118 to extract_bit_field. */
1121 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1122 enum machine_mode tmode, bool unsignedp)
1124 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1127 /* If the x mode is not a scalar integral, first convert to the
1128 integer mode of that size and then access it as a floating-point
1129 value via a SUBREG. */
1130 if (!SCALAR_INT_MODE_P (tmode))
1132 enum machine_mode smode;
1134 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1135 x = convert_to_mode (smode, x, unsignedp);
1136 x = force_reg (smode, x);
1137 return gen_lowpart (tmode, x);
1140 return convert_to_mode (tmode, x, unsignedp);
1143 /* A subroutine of extract_bit_field, with the same arguments.
1144 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1145 if we can find no other means of implementing the operation.
1146 if FALLBACK_P is false, return NULL instead. */
1149 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1150 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1151 enum machine_mode mode, enum machine_mode tmode,
1155 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1156 unsigned HOST_WIDE_INT offset, bitpos;
1158 enum machine_mode int_mode;
1159 enum machine_mode ext_mode;
1160 enum machine_mode mode1;
1161 enum insn_code icode;
1164 if (tmode == VOIDmode)
1167 while (GET_CODE (op0) == SUBREG)
1169 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1170 op0 = SUBREG_REG (op0);
1173 /* If we have an out-of-bounds access to a register, just return an
1174 uninitialized register of the required mode. This can occur if the
1175 source code contains an out-of-bounds access to a small array. */
1176 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1177 return gen_reg_rtx (tmode);
1180 && mode == GET_MODE (op0)
1182 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1184 /* We're trying to extract a full register from itself. */
1188 /* See if we can get a better vector mode before extracting. */
1189 if (VECTOR_MODE_P (GET_MODE (op0))
1191 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1193 enum machine_mode new_mode;
1194 int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1196 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1197 new_mode = MIN_MODE_VECTOR_FLOAT;
1198 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1199 new_mode = MIN_MODE_VECTOR_FRACT;
1200 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1201 new_mode = MIN_MODE_VECTOR_UFRACT;
1202 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1203 new_mode = MIN_MODE_VECTOR_ACCUM;
1204 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1205 new_mode = MIN_MODE_VECTOR_UACCUM;
1207 new_mode = MIN_MODE_VECTOR_INT;
1209 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1210 if (GET_MODE_NUNITS (new_mode) == nunits
1211 && GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1212 && targetm.vector_mode_supported_p (new_mode))
1214 if (new_mode != VOIDmode)
1215 op0 = gen_lowpart (new_mode, op0);
1218 /* Use vec_extract patterns for extracting parts of vectors whenever
1220 if (VECTOR_MODE_P (GET_MODE (op0))
1222 && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code
1223 != CODE_FOR_nothing)
1224 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1225 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1227 enum machine_mode outermode = GET_MODE (op0);
1228 enum machine_mode innermode = GET_MODE_INNER (outermode);
1229 int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code;
1230 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1231 rtx rtxpos = GEN_INT (pos);
1233 rtx dest = NULL, pat, seq;
1234 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1235 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1236 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1238 if (innermode == tmode || innermode == mode)
1242 dest = gen_reg_rtx (innermode);
1246 if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1247 dest = copy_to_mode_reg (mode0, dest);
1249 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1250 src = copy_to_mode_reg (mode1, src);
1252 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1253 rtxpos = copy_to_mode_reg (mode1, rtxpos);
1255 /* We could handle this, but we should always be called with a pseudo
1256 for our targets and all insns should take them as outputs. */
1257 gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1258 && (*insn_data[icode].operand[1].predicate) (src, mode1)
1259 && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1261 pat = GEN_FCN (icode) (dest, src, rtxpos);
1269 return gen_lowpart (tmode, dest);
1274 /* Make sure we are playing with integral modes. Pun with subregs
1277 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1278 if (imode != GET_MODE (op0))
1281 op0 = adjust_address (op0, imode, 0);
1282 else if (imode != BLKmode)
1284 op0 = gen_lowpart (imode, op0);
1286 /* If we got a SUBREG, force it into a register since we
1287 aren't going to be able to do another SUBREG on it. */
1288 if (GET_CODE (op0) == SUBREG)
1289 op0 = force_reg (imode, op0);
1291 else if (REG_P (op0))
1294 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1296 reg = gen_reg_rtx (imode);
1297 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1298 emit_move_insn (subreg, op0);
1300 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1304 rtx mem = assign_stack_temp (GET_MODE (op0),
1305 GET_MODE_SIZE (GET_MODE (op0)), 0);
1306 emit_move_insn (mem, op0);
1307 op0 = adjust_address (mem, BLKmode, 0);
1312 /* We may be accessing data outside the field, which means
1313 we can alias adjacent data. */
1316 op0 = shallow_copy_rtx (op0);
1317 set_mem_alias_set (op0, 0);
1318 set_mem_expr (op0, 0);
1321 /* Extraction of a full-word or multi-word value from a structure
1322 in a register or aligned memory can be done with just a SUBREG.
1323 A subword value in the least significant part of a register
1324 can also be extracted with a SUBREG. For this, we need the
1325 byte offset of the value in op0. */
1327 bitpos = bitnum % unit;
1328 offset = bitnum / unit;
1329 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1331 /* If OP0 is a register, BITPOS must count within a word.
1332 But as we have it, it counts within whatever size OP0 now has.
1333 On a bigendian machine, these are not the same, so convert. */
1334 if (BYTES_BIG_ENDIAN
1336 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1337 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1339 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1340 If that's wrong, the solution is to test for it and set TARGET to 0
1343 /* Only scalar integer modes can be converted via subregs. There is an
1344 additional problem for FP modes here in that they can have a precision
1345 which is different from the size. mode_for_size uses precision, but
1346 we want a mode based on the size, so we must avoid calling it for FP
1348 mode1 = (SCALAR_INT_MODE_P (tmode)
1349 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1352 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1353 && bitpos % BITS_PER_WORD == 0)
1354 || (mode1 != BLKmode
1355 /* ??? The big endian test here is wrong. This is correct
1356 if the value is in a register, and if mode_for_size is not
1357 the same mode as op0. This causes us to get unnecessarily
1358 inefficient code from the Thumb port when -mbig-endian. */
1359 && (BYTES_BIG_ENDIAN
1360 ? bitpos + bitsize == BITS_PER_WORD
1363 && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode1),
1364 GET_MODE_BITSIZE (GET_MODE (op0)))
1365 && GET_MODE_SIZE (mode1) != 0
1366 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1368 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1369 || (offset * BITS_PER_UNIT % bitsize == 0
1370 && MEM_ALIGN (op0) % bitsize == 0)))))
1373 op0 = adjust_address (op0, mode1, offset);
1374 else if (mode1 != GET_MODE (op0))
1376 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1379 goto no_subreg_mode_swap;
1383 return convert_to_mode (tmode, op0, unsignedp);
1386 no_subreg_mode_swap:
1388 /* Handle fields bigger than a word. */
1390 if (bitsize > BITS_PER_WORD)
1392 /* Here we transfer the words of the field
1393 in the order least significant first.
1394 This is because the most significant word is the one which may
1395 be less than full. */
1397 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1400 if (target == 0 || !REG_P (target))
1401 target = gen_reg_rtx (mode);
1403 /* Indicate for flow that the entire target reg is being set. */
1404 emit_clobber (target);
1406 for (i = 0; i < nwords; i++)
1408 /* If I is 0, use the low-order word in both field and target;
1409 if I is 1, use the next to lowest word; and so on. */
1410 /* Word number in TARGET to use. */
1411 unsigned int wordnum
1413 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1415 /* Offset from start of field in OP0. */
1416 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1417 ? MAX (0, ((int) bitsize - ((int) i + 1)
1418 * (int) BITS_PER_WORD))
1419 : (int) i * BITS_PER_WORD);
1420 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1422 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1423 bitsize - i * BITS_PER_WORD),
1424 bitnum + bit_offset, 1, target_part, mode,
1427 gcc_assert (target_part);
1429 if (result_part != target_part)
1430 emit_move_insn (target_part, result_part);
1435 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1436 need to be zero'd out. */
1437 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1439 unsigned int i, total_words;
1441 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1442 for (i = nwords; i < total_words; i++)
1444 (operand_subword (target,
1445 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1452 /* Signed bit field: sign-extend with two arithmetic shifts. */
1453 target = expand_shift (LSHIFT_EXPR, mode, target,
1454 build_int_cst (NULL_TREE,
1455 GET_MODE_BITSIZE (mode) - bitsize),
1457 return expand_shift (RSHIFT_EXPR, mode, target,
1458 build_int_cst (NULL_TREE,
1459 GET_MODE_BITSIZE (mode) - bitsize),
1463 /* From here on we know the desired field is smaller than a word. */
1465 /* Check if there is a correspondingly-sized integer field, so we can
1466 safely extract it as one size of integer, if necessary; then
1467 truncate or extend to the size that is wanted; then use SUBREGs or
1468 convert_to_mode to get one of the modes we really wanted. */
1470 int_mode = int_mode_for_mode (tmode);
1471 if (int_mode == BLKmode)
1472 int_mode = int_mode_for_mode (mode);
1473 /* Should probably push op0 out to memory and then do a load. */
1474 gcc_assert (int_mode != BLKmode);
1476 /* OFFSET is the number of words or bytes (UNIT says which)
1477 from STR_RTX to the first word or byte containing part of the field. */
1481 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1484 op0 = copy_to_reg (op0);
1485 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1486 op0, (offset * UNITS_PER_WORD));
1491 /* Now OFFSET is nonzero only for memory operands. */
1492 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1493 icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1494 if (ext_mode != MAX_MACHINE_MODE
1496 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1497 /* If op0 is a register, we need it in EXT_MODE to make it
1498 acceptable to the format of ext(z)v. */
1499 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1500 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1501 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1502 && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1504 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1505 rtx bitsize_rtx, bitpos_rtx;
1506 rtx last = get_last_insn ();
1508 rtx xtarget = target;
1509 rtx xspec_target = target;
1510 rtx xspec_target_subreg = 0;
1513 /* If op0 is a register, we need it in EXT_MODE to make it
1514 acceptable to the format of ext(z)v. */
1515 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1516 xop0 = gen_rtx_SUBREG (ext_mode, xop0, 0);
1518 /* Get ref to first byte containing part of the field. */
1519 xop0 = adjust_address (xop0, byte_mode, xoffset);
1521 /* On big-endian machines, we count bits from the most significant.
1522 If the bit field insn does not, we must invert. */
1523 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1524 xbitpos = unit - bitsize - xbitpos;
1526 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1527 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1528 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1530 unit = GET_MODE_BITSIZE (ext_mode);
1533 xtarget = xspec_target = gen_reg_rtx (tmode);
1535 if (GET_MODE (xtarget) != ext_mode)
1537 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1538 between the mode of the extraction (word_mode) and the target
1539 mode. Instead, create a temporary and use convert_move to set
1542 && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (xtarget)),
1543 GET_MODE_BITSIZE (ext_mode)))
1545 xtarget = gen_lowpart (ext_mode, xtarget);
1546 if (GET_MODE_SIZE (ext_mode)
1547 > GET_MODE_SIZE (GET_MODE (xspec_target)))
1548 xspec_target_subreg = xtarget;
1551 xtarget = gen_reg_rtx (ext_mode);
1554 /* If this machine's ext(z)v insists on a register target,
1555 make sure we have one. */
1556 if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1557 xtarget = gen_reg_rtx (ext_mode);
1559 bitsize_rtx = GEN_INT (bitsize);
1560 bitpos_rtx = GEN_INT (xbitpos);
1563 ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1564 : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1568 if (xtarget == xspec_target)
1570 if (xtarget == xspec_target_subreg)
1571 return xspec_target;
1572 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1574 delete_insns_since (last);
1577 /* If OP0 is a memory, try copying it to a register and seeing if a
1578 cheap register alternative is available. */
1579 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1581 enum machine_mode bestmode;
1583 /* Get the mode to use for inserting into this field. If
1584 OP0 is BLKmode, get the smallest mode consistent with the
1585 alignment. If OP0 is a non-BLKmode object that is no
1586 wider than EXT_MODE, use its mode. Otherwise, use the
1587 smallest mode containing the field. */
1589 if (GET_MODE (op0) == BLKmode
1590 || (ext_mode != MAX_MACHINE_MODE
1591 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1592 bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1593 (ext_mode == MAX_MACHINE_MODE
1594 ? VOIDmode : ext_mode),
1595 MEM_VOLATILE_P (op0));
1597 bestmode = GET_MODE (op0);
1599 if (bestmode != VOIDmode
1600 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1601 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1603 unsigned HOST_WIDE_INT xoffset, xbitpos;
1605 /* Compute the offset as a multiple of this unit,
1606 counting in bytes. */
1607 unit = GET_MODE_BITSIZE (bestmode);
1608 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1609 xbitpos = bitnum % unit;
1611 /* Make sure the register is big enough for the whole field. */
1612 if (xoffset * BITS_PER_UNIT + unit
1613 >= offset * BITS_PER_UNIT + bitsize)
1615 rtx last, result, xop0;
1617 last = get_last_insn ();
1619 /* Fetch it to a register in that size. */
1620 xop0 = adjust_address (op0, bestmode, xoffset);
1621 xop0 = force_reg (bestmode, xop0);
1622 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1624 mode, tmode, false);
1628 delete_insns_since (last);
1636 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1637 bitpos, target, unsignedp);
1638 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1641 /* Generate code to extract a byte-field from STR_RTX
1642 containing BITSIZE bits, starting at BITNUM,
1643 and put it in TARGET if possible (if TARGET is nonzero).
1644 Regardless of TARGET, we return the rtx for where the value is placed.
1646 STR_RTX is the structure containing the byte (a REG or MEM).
1647 UNSIGNEDP is nonzero if this is an unsigned bit field.
1648 MODE is the natural mode of the field value once extracted.
1649 TMODE is the mode the caller would like the value to have;
1650 but the value may be returned with type MODE instead.
1652 If a TARGET is specified and we can store in it at no extra cost,
1653 we do so, and return TARGET.
1654 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1655 if they are equally easy. */
1658 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1659 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1660 enum machine_mode mode, enum machine_mode tmode)
1662 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1663 target, mode, tmode, true);
1666 /* Extract a bit field using shifts and boolean operations
1667 Returns an rtx to represent the value.
1668 OP0 addresses a register (word) or memory (byte).
1669 BITPOS says which bit within the word or byte the bit field starts in.
1670 OFFSET says how many bytes farther the bit field starts;
1671 it is 0 if OP0 is a register.
1672 BITSIZE says how many bits long the bit field is.
1673 (If OP0 is a register, it may be narrower than a full word,
1674 but BITPOS still counts within a full word,
1675 which is significant on bigendian machines.)
1677 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1678 If TARGET is nonzero, attempts to store the value there
1679 and return TARGET, but this is not guaranteed.
1680 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1683 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1684 unsigned HOST_WIDE_INT offset,
1685 unsigned HOST_WIDE_INT bitsize,
1686 unsigned HOST_WIDE_INT bitpos, rtx target,
1689 unsigned int total_bits = BITS_PER_WORD;
1690 enum machine_mode mode;
1692 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1694 /* Special treatment for a bit field split across two registers. */
1695 if (bitsize + bitpos > BITS_PER_WORD)
1696 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1700 /* Get the proper mode to use for this field. We want a mode that
1701 includes the entire field. If such a mode would be larger than
1702 a word, we won't be doing the extraction the normal way. */
1704 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1705 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1707 if (mode == VOIDmode)
1708 /* The only way this should occur is if the field spans word
1710 return extract_split_bit_field (op0, bitsize,
1711 bitpos + offset * BITS_PER_UNIT,
1714 total_bits = GET_MODE_BITSIZE (mode);
1716 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1717 be in the range 0 to total_bits-1, and put any excess bytes in
1719 if (bitpos >= total_bits)
1721 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1722 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1726 /* Get ref to an aligned byte, halfword, or word containing the field.
1727 Adjust BITPOS to be position within a word,
1728 and OFFSET to be the offset of that word.
1729 Then alter OP0 to refer to that word. */
1730 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1731 offset -= (offset % (total_bits / BITS_PER_UNIT));
1732 op0 = adjust_address (op0, mode, offset);
1735 mode = GET_MODE (op0);
1737 if (BYTES_BIG_ENDIAN)
1738 /* BITPOS is the distance between our msb and that of OP0.
1739 Convert it to the distance from the lsb. */
1740 bitpos = total_bits - bitsize - bitpos;
1742 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1743 We have reduced the big-endian case to the little-endian case. */
1749 /* If the field does not already start at the lsb,
1750 shift it so it does. */
1751 tree amount = build_int_cst (NULL_TREE, bitpos);
1752 /* Maybe propagate the target for the shift. */
1753 /* But not if we will return it--could confuse integrate.c. */
1754 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1755 if (tmode != mode) subtarget = 0;
1756 op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1758 /* Convert the value to the desired mode. */
1760 op0 = convert_to_mode (tmode, op0, 1);
1762 /* Unless the msb of the field used to be the msb when we shifted,
1763 mask out the upper bits. */
1765 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1766 return expand_binop (GET_MODE (op0), and_optab, op0,
1767 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1768 target, 1, OPTAB_LIB_WIDEN);
1772 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1773 then arithmetic-shift its lsb to the lsb of the word. */
1774 op0 = force_reg (mode, op0);
1778 /* Find the narrowest integer mode that contains the field. */
1780 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1781 mode = GET_MODE_WIDER_MODE (mode))
1782 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1784 op0 = convert_to_mode (mode, op0, 0);
1788 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1791 = build_int_cst (NULL_TREE,
1792 GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1793 /* Maybe propagate the target for the shift. */
1794 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1795 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1798 return expand_shift (RSHIFT_EXPR, mode, op0,
1799 build_int_cst (NULL_TREE,
1800 GET_MODE_BITSIZE (mode) - bitsize),
1804 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1805 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1806 complement of that if COMPLEMENT. The mask is truncated if
1807 necessary to the width of mode MODE. The mask is zero-extended if
1808 BITSIZE+BITPOS is too small for MODE. */
1811 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1813 HOST_WIDE_INT masklow, maskhigh;
1817 else if (bitpos < HOST_BITS_PER_WIDE_INT)
1818 masklow = (HOST_WIDE_INT) -1 << bitpos;
1822 if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1823 masklow &= ((unsigned HOST_WIDE_INT) -1
1824 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1826 if (bitpos <= HOST_BITS_PER_WIDE_INT)
1829 maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1833 else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1834 maskhigh &= ((unsigned HOST_WIDE_INT) -1
1835 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1841 maskhigh = ~maskhigh;
1845 return immed_double_const (masklow, maskhigh, mode);
1848 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1849 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1852 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1854 unsigned HOST_WIDE_INT v = INTVAL (value);
1855 HOST_WIDE_INT low, high;
1857 if (bitsize < HOST_BITS_PER_WIDE_INT)
1858 v &= ~((HOST_WIDE_INT) -1 << bitsize);
1860 if (bitpos < HOST_BITS_PER_WIDE_INT)
1863 high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1868 high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1871 return immed_double_const (low, high, mode);
1874 /* Extract a bit field that is split across two words
1875 and return an RTX for the result.
1877 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1878 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1879 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1882 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1883 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1886 unsigned int bitsdone = 0;
1887 rtx result = NULL_RTX;
1890 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1892 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1893 unit = BITS_PER_WORD;
1895 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1897 while (bitsdone < bitsize)
1899 unsigned HOST_WIDE_INT thissize;
1901 unsigned HOST_WIDE_INT thispos;
1902 unsigned HOST_WIDE_INT offset;
1904 offset = (bitpos + bitsdone) / unit;
1905 thispos = (bitpos + bitsdone) % unit;
1907 /* THISSIZE must not overrun a word boundary. Otherwise,
1908 extract_fixed_bit_field will call us again, and we will mutually
1910 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1911 thissize = MIN (thissize, unit - thispos);
1913 /* If OP0 is a register, then handle OFFSET here.
1915 When handling multiword bitfields, extract_bit_field may pass
1916 down a word_mode SUBREG of a larger REG for a bitfield that actually
1917 crosses a word boundary. Thus, for a SUBREG, we must find
1918 the current word starting from the base register. */
1919 if (GET_CODE (op0) == SUBREG)
1921 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1922 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1923 GET_MODE (SUBREG_REG (op0)));
1926 else if (REG_P (op0))
1928 word = operand_subword_force (op0, offset, GET_MODE (op0));
1934 /* Extract the parts in bit-counting order,
1935 whose meaning is determined by BYTES_PER_UNIT.
1936 OFFSET is in UNITs, and UNIT is in bits.
1937 extract_fixed_bit_field wants offset in bytes. */
1938 part = extract_fixed_bit_field (word_mode, word,
1939 offset * unit / BITS_PER_UNIT,
1940 thissize, thispos, 0, 1);
1941 bitsdone += thissize;
1943 /* Shift this part into place for the result. */
1944 if (BYTES_BIG_ENDIAN)
1946 if (bitsize != bitsdone)
1947 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1948 build_int_cst (NULL_TREE, bitsize - bitsdone),
1953 if (bitsdone != thissize)
1954 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1955 build_int_cst (NULL_TREE,
1956 bitsdone - thissize), 0, 1);
1962 /* Combine the parts with bitwise or. This works
1963 because we extracted each part as an unsigned bit field. */
1964 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1970 /* Unsigned bit field: we are done. */
1973 /* Signed bit field: sign-extend with two arithmetic shifts. */
1974 result = expand_shift (LSHIFT_EXPR, word_mode, result,
1975 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1977 return expand_shift (RSHIFT_EXPR, word_mode, result,
1978 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1982 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1983 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
1984 MODE, fill the upper bits with zeros. Fail if the layout of either
1985 mode is unknown (as for CC modes) or if the extraction would involve
1986 unprofitable mode punning. Return the value on success, otherwise
1989 This is different from gen_lowpart* in these respects:
1991 - the returned value must always be considered an rvalue
1993 - when MODE is wider than SRC_MODE, the extraction involves
1996 - when MODE is smaller than SRC_MODE, the extraction involves
1997 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1999 In other words, this routine performs a computation, whereas the
2000 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2004 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2006 enum machine_mode int_mode, src_int_mode;
2008 if (mode == src_mode)
2011 if (CONSTANT_P (src))
2013 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2014 fails, it will happily create (subreg (symbol_ref)) or similar
2016 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2017 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2021 if (GET_MODE (src) == VOIDmode
2022 || !validate_subreg (mode, src_mode, src, byte))
2025 src = force_reg (GET_MODE (src), src);
2026 return gen_rtx_SUBREG (mode, src, byte);
2029 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2032 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2033 && MODES_TIEABLE_P (mode, src_mode))
2035 rtx x = gen_lowpart_common (mode, src);
2040 src_int_mode = int_mode_for_mode (src_mode);
2041 int_mode = int_mode_for_mode (mode);
2042 if (src_int_mode == BLKmode || int_mode == BLKmode)
2045 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2047 if (!MODES_TIEABLE_P (int_mode, mode))
2050 src = gen_lowpart (src_int_mode, src);
2051 src = convert_modes (int_mode, src_int_mode, src, true);
2052 src = gen_lowpart (mode, src);
2056 /* Add INC into TARGET. */
2059 expand_inc (rtx target, rtx inc)
2061 rtx value = expand_binop (GET_MODE (target), add_optab,
2063 target, 0, OPTAB_LIB_WIDEN);
2064 if (value != target)
2065 emit_move_insn (target, value);
2068 /* Subtract DEC from TARGET. */
2071 expand_dec (rtx target, rtx dec)
2073 rtx value = expand_binop (GET_MODE (target), sub_optab,
2075 target, 0, OPTAB_LIB_WIDEN);
2076 if (value != target)
2077 emit_move_insn (target, value);
2080 /* Output a shift instruction for expression code CODE,
2081 with SHIFTED being the rtx for the value to shift,
2082 and AMOUNT the tree for the amount to shift by.
2083 Store the result in the rtx TARGET, if that is convenient.
2084 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2085 Return the rtx for where the value is. */
2088 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2089 tree amount, rtx target, int unsignedp)
2092 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2093 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2094 optab lshift_optab = ashl_optab;
2095 optab rshift_arith_optab = ashr_optab;
2096 optab rshift_uns_optab = lshr_optab;
2097 optab lrotate_optab = rotl_optab;
2098 optab rrotate_optab = rotr_optab;
2099 enum machine_mode op1_mode;
2101 bool speed = optimize_insn_for_speed_p ();
2103 op1 = expand_normal (amount);
2104 op1_mode = GET_MODE (op1);
2106 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2107 shift amount is a vector, use the vector/vector shift patterns. */
2108 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2110 lshift_optab = vashl_optab;
2111 rshift_arith_optab = vashr_optab;
2112 rshift_uns_optab = vlshr_optab;
2113 lrotate_optab = vrotl_optab;
2114 rrotate_optab = vrotr_optab;
2117 /* Previously detected shift-counts computed by NEGATE_EXPR
2118 and shifted in the other direction; but that does not work
2121 if (SHIFT_COUNT_TRUNCATED)
2123 if (GET_CODE (op1) == CONST_INT
2124 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2125 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2126 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2127 % GET_MODE_BITSIZE (mode));
2128 else if (GET_CODE (op1) == SUBREG
2129 && subreg_lowpart_p (op1))
2130 op1 = SUBREG_REG (op1);
2133 if (op1 == const0_rtx)
2136 /* Check whether its cheaper to implement a left shift by a constant
2137 bit count by a sequence of additions. */
2138 if (code == LSHIFT_EXPR
2139 && GET_CODE (op1) == CONST_INT
2141 && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2142 && INTVAL (op1) < MAX_BITS_PER_WORD
2143 && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2144 && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2147 for (i = 0; i < INTVAL (op1); i++)
2149 temp = force_reg (mode, shifted);
2150 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2151 unsignedp, OPTAB_LIB_WIDEN);
2156 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2158 enum optab_methods methods;
2161 methods = OPTAB_DIRECT;
2162 else if (attempt == 1)
2163 methods = OPTAB_WIDEN;
2165 methods = OPTAB_LIB_WIDEN;
2169 /* Widening does not work for rotation. */
2170 if (methods == OPTAB_WIDEN)
2172 else if (methods == OPTAB_LIB_WIDEN)
2174 /* If we have been unable to open-code this by a rotation,
2175 do it as the IOR of two shifts. I.e., to rotate A
2176 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2177 where C is the bitsize of A.
2179 It is theoretically possible that the target machine might
2180 not be able to perform either shift and hence we would
2181 be making two libcalls rather than just the one for the
2182 shift (similarly if IOR could not be done). We will allow
2183 this extremely unlikely lossage to avoid complicating the
2186 rtx subtarget = target == shifted ? 0 : target;
2187 tree new_amount, other_amount;
2189 tree type = TREE_TYPE (amount);
2190 if (GET_MODE (op1) != TYPE_MODE (type)
2191 && GET_MODE (op1) != VOIDmode)
2192 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2193 new_amount = make_tree (type, op1);
2195 = fold_build2 (MINUS_EXPR, type,
2196 build_int_cst (type, GET_MODE_BITSIZE (mode)),
2199 shifted = force_reg (mode, shifted);
2201 temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2202 mode, shifted, new_amount, 0, 1);
2203 temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2204 mode, shifted, other_amount, subtarget, 1);
2205 return expand_binop (mode, ior_optab, temp, temp1, target,
2206 unsignedp, methods);
2209 temp = expand_binop (mode,
2210 left ? lrotate_optab : rrotate_optab,
2211 shifted, op1, target, unsignedp, methods);
2214 temp = expand_binop (mode,
2215 left ? lshift_optab : rshift_uns_optab,
2216 shifted, op1, target, unsignedp, methods);
2218 /* Do arithmetic shifts.
2219 Also, if we are going to widen the operand, we can just as well
2220 use an arithmetic right-shift instead of a logical one. */
2221 if (temp == 0 && ! rotate
2222 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2224 enum optab_methods methods1 = methods;
2226 /* If trying to widen a log shift to an arithmetic shift,
2227 don't accept an arithmetic shift of the same size. */
2229 methods1 = OPTAB_MUST_WIDEN;
2231 /* Arithmetic shift */
2233 temp = expand_binop (mode,
2234 left ? lshift_optab : rshift_arith_optab,
2235 shifted, op1, target, unsignedp, methods1);
2238 /* We used to try extzv here for logical right shifts, but that was
2239 only useful for one machine, the VAX, and caused poor code
2240 generation there for lshrdi3, so the code was deleted and a
2241 define_expand for lshrsi3 was added to vax.md. */
2261 /* This structure holds the "cost" of a multiply sequence. The
2262 "cost" field holds the total rtx_cost of every operator in the
2263 synthetic multiplication sequence, hence cost(a op b) is defined
2264 as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2265 The "latency" field holds the minimum possible latency of the
2266 synthetic multiply, on a hypothetical infinitely parallel CPU.
2267 This is the critical path, or the maximum height, of the expression
2268 tree which is the sum of rtx_costs on the most expensive path from
2269 any leaf to the root. Hence latency(a op b) is defined as zero for
2270 leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise. */
2273 short cost; /* Total rtx_cost of the multiplication sequence. */
2274 short latency; /* The latency of the multiplication sequence. */
2277 /* This macro is used to compare a pointer to a mult_cost against an
2278 single integer "rtx_cost" value. This is equivalent to the macro
2279 CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}. */
2280 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y) \
2281 || ((X)->cost == (Y) && (X)->latency < (Y)))
2283 /* This macro is used to compare two pointers to mult_costs against
2284 each other. The macro returns true if X is cheaper than Y.
2285 Currently, the cheaper of two mult_costs is the one with the
2286 lower "cost". If "cost"s are tied, the lower latency is cheaper. */
2287 #define CHEAPER_MULT_COST(X,Y) ((X)->cost < (Y)->cost \
2288 || ((X)->cost == (Y)->cost \
2289 && (X)->latency < (Y)->latency))
2291 /* This structure records a sequence of operations.
2292 `ops' is the number of operations recorded.
2293 `cost' is their total cost.
2294 The operations are stored in `op' and the corresponding
2295 logarithms of the integer coefficients in `log'.
2297 These are the operations:
2298 alg_zero total := 0;
2299 alg_m total := multiplicand;
2300 alg_shift total := total * coeff
2301 alg_add_t_m2 total := total + multiplicand * coeff;
2302 alg_sub_t_m2 total := total - multiplicand * coeff;
2303 alg_add_factor total := total * coeff + total;
2304 alg_sub_factor total := total * coeff - total;
2305 alg_add_t2_m total := total * coeff + multiplicand;
2306 alg_sub_t2_m total := total * coeff - multiplicand;
2308 The first operand must be either alg_zero or alg_m. */
2312 struct mult_cost cost;
2314 /* The size of the OP and LOG fields are not directly related to the
2315 word size, but the worst-case algorithms will be if we have few
2316 consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2317 In that case we will generate shift-by-2, add, shift-by-2, add,...,
2318 in total wordsize operations. */
2319 enum alg_code op[MAX_BITS_PER_WORD];
2320 char log[MAX_BITS_PER_WORD];
2323 /* The entry for our multiplication cache/hash table. */
2324 struct alg_hash_entry {
2325 /* The number we are multiplying by. */
2326 unsigned HOST_WIDE_INT t;
2328 /* The mode in which we are multiplying something by T. */
2329 enum machine_mode mode;
2331 /* The best multiplication algorithm for t. */
2334 /* The cost of multiplication if ALG_CODE is not alg_impossible.
2335 Otherwise, the cost within which multiplication by T is
2337 struct mult_cost cost;
2339 /* OPtimized for speed? */
2343 /* The number of cache/hash entries. */
2344 #if HOST_BITS_PER_WIDE_INT == 64
2345 #define NUM_ALG_HASH_ENTRIES 1031
2347 #define NUM_ALG_HASH_ENTRIES 307
2350 /* Each entry of ALG_HASH caches alg_code for some integer. This is
2351 actually a hash table. If we have a collision, that the older
2352 entry is kicked out. */
2353 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2355 /* Indicates the type of fixup needed after a constant multiplication.
2356 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2357 the result should be negated, and ADD_VARIANT means that the
2358 multiplicand should be added to the result. */
2359 enum mult_variant {basic_variant, negate_variant, add_variant};
2361 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2362 const struct mult_cost *, enum machine_mode mode);
2363 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2364 struct algorithm *, enum mult_variant *, int);
2365 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2366 const struct algorithm *, enum mult_variant);
2367 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2368 int, rtx *, int *, int *);
2369 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2370 static rtx extract_high_half (enum machine_mode, rtx);
2371 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2372 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2374 /* Compute and return the best algorithm for multiplying by T.
2375 The algorithm must cost less than cost_limit
2376 If retval.cost >= COST_LIMIT, no algorithm was found and all
2377 other field of the returned struct are undefined.
2378 MODE is the machine mode of the multiplication. */
2381 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2382 const struct mult_cost *cost_limit, enum machine_mode mode)
2385 struct algorithm *alg_in, *best_alg;
2386 struct mult_cost best_cost;
2387 struct mult_cost new_limit;
2388 int op_cost, op_latency;
2389 unsigned HOST_WIDE_INT q;
2390 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2392 bool cache_hit = false;
2393 enum alg_code cache_alg = alg_zero;
2394 bool speed = optimize_insn_for_speed_p ();
2396 /* Indicate that no algorithm is yet found. If no algorithm
2397 is found, this value will be returned and indicate failure. */
2398 alg_out->cost.cost = cost_limit->cost + 1;
2399 alg_out->cost.latency = cost_limit->latency + 1;
2401 if (cost_limit->cost < 0
2402 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2405 /* Restrict the bits of "t" to the multiplication's mode. */
2406 t &= GET_MODE_MASK (mode);
2408 /* t == 1 can be done in zero cost. */
2412 alg_out->cost.cost = 0;
2413 alg_out->cost.latency = 0;
2414 alg_out->op[0] = alg_m;
2418 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2422 if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2427 alg_out->cost.cost = zero_cost[speed];
2428 alg_out->cost.latency = zero_cost[speed];
2429 alg_out->op[0] = alg_zero;
2434 /* We'll be needing a couple extra algorithm structures now. */
2436 alg_in = XALLOCA (struct algorithm);
2437 best_alg = XALLOCA (struct algorithm);
2438 best_cost = *cost_limit;
2440 /* Compute the hash index. */
2441 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2443 /* See if we already know what to do for T. */
2444 if (alg_hash[hash_index].t == t
2445 && alg_hash[hash_index].mode == mode
2446 && alg_hash[hash_index].mode == mode
2447 && alg_hash[hash_index].speed == speed
2448 && alg_hash[hash_index].alg != alg_unknown)
2450 cache_alg = alg_hash[hash_index].alg;
2452 if (cache_alg == alg_impossible)
2454 /* The cache tells us that it's impossible to synthesize
2455 multiplication by T within alg_hash[hash_index].cost. */
2456 if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2457 /* COST_LIMIT is at least as restrictive as the one
2458 recorded in the hash table, in which case we have no
2459 hope of synthesizing a multiplication. Just
2463 /* If we get here, COST_LIMIT is less restrictive than the
2464 one recorded in the hash table, so we may be able to
2465 synthesize a multiplication. Proceed as if we didn't
2466 have the cache entry. */
2470 if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2471 /* The cached algorithm shows that this multiplication
2472 requires more cost than COST_LIMIT. Just return. This
2473 way, we don't clobber this cache entry with
2474 alg_impossible but retain useful information. */
2486 goto do_alg_addsub_t_m2;
2488 case alg_add_factor:
2489 case alg_sub_factor:
2490 goto do_alg_addsub_factor;
2493 goto do_alg_add_t2_m;
2496 goto do_alg_sub_t2_m;
2504 /* If we have a group of zero bits at the low-order part of T, try
2505 multiplying by the remaining bits and then doing a shift. */
2510 m = floor_log2 (t & -t); /* m = number of low zero bits */
2514 /* The function expand_shift will choose between a shift and
2515 a sequence of additions, so the observed cost is given as
2516 MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */
2517 op_cost = m * add_cost[speed][mode];
2518 if (shift_cost[speed][mode][m] < op_cost)
2519 op_cost = shift_cost[speed][mode][m];
2520 new_limit.cost = best_cost.cost - op_cost;
2521 new_limit.latency = best_cost.latency - op_cost;
2522 synth_mult (alg_in, q, &new_limit, mode);
2524 alg_in->cost.cost += op_cost;
2525 alg_in->cost.latency += op_cost;
2526 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2528 struct algorithm *x;
2529 best_cost = alg_in->cost;
2530 x = alg_in, alg_in = best_alg, best_alg = x;
2531 best_alg->log[best_alg->ops] = m;
2532 best_alg->op[best_alg->ops] = alg_shift;
2539 /* If we have an odd number, add or subtract one. */
2542 unsigned HOST_WIDE_INT w;
2545 for (w = 1; (w & t) != 0; w <<= 1)
2547 /* If T was -1, then W will be zero after the loop. This is another
2548 case where T ends with ...111. Handling this with (T + 1) and
2549 subtract 1 produces slightly better code and results in algorithm
2550 selection much faster than treating it like the ...0111 case
2554 /* Reject the case where t is 3.
2555 Thus we prefer addition in that case. */
2558 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2560 op_cost = add_cost[speed][mode];
2561 new_limit.cost = best_cost.cost - op_cost;
2562 new_limit.latency = best_cost.latency - op_cost;
2563 synth_mult (alg_in, t + 1, &new_limit, mode);
2565 alg_in->cost.cost += op_cost;
2566 alg_in->cost.latency += op_cost;
2567 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2569 struct algorithm *x;
2570 best_cost = alg_in->cost;
2571 x = alg_in, alg_in = best_alg, best_alg = x;
2572 best_alg->log[best_alg->ops] = 0;
2573 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2578 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2580 op_cost = add_cost[speed][mode];
2581 new_limit.cost = best_cost.cost - op_cost;
2582 new_limit.latency = best_cost.latency - op_cost;
2583 synth_mult (alg_in, t - 1, &new_limit, mode);
2585 alg_in->cost.cost += op_cost;
2586 alg_in->cost.latency += op_cost;
2587 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2589 struct algorithm *x;
2590 best_cost = alg_in->cost;
2591 x = alg_in, alg_in = best_alg, best_alg = x;
2592 best_alg->log[best_alg->ops] = 0;
2593 best_alg->op[best_alg->ops] = alg_add_t_m2;
2600 /* Look for factors of t of the form
2601 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2602 If we find such a factor, we can multiply by t using an algorithm that
2603 multiplies by q, shift the result by m and add/subtract it to itself.
2605 We search for large factors first and loop down, even if large factors
2606 are less probable than small; if we find a large factor we will find a
2607 good sequence quickly, and therefore be able to prune (by decreasing
2608 COST_LIMIT) the search. */
2610 do_alg_addsub_factor:
2611 for (m = floor_log2 (t - 1); m >= 2; m--)
2613 unsigned HOST_WIDE_INT d;
2615 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2616 if (t % d == 0 && t > d && m < maxm
2617 && (!cache_hit || cache_alg == alg_add_factor))
2619 /* If the target has a cheap shift-and-add instruction use
2620 that in preference to a shift insn followed by an add insn.
2621 Assume that the shift-and-add is "atomic" with a latency
2622 equal to its cost, otherwise assume that on superscalar
2623 hardware the shift may be executed concurrently with the
2624 earlier steps in the algorithm. */
2625 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2626 if (shiftadd_cost[speed][mode][m] < op_cost)
2628 op_cost = shiftadd_cost[speed][mode][m];
2629 op_latency = op_cost;
2632 op_latency = add_cost[speed][mode];
2634 new_limit.cost = best_cost.cost - op_cost;
2635 new_limit.latency = best_cost.latency - op_latency;
2636 synth_mult (alg_in, t / d, &new_limit, mode);
2638 alg_in->cost.cost += op_cost;
2639 alg_in->cost.latency += op_latency;
2640 if (alg_in->cost.latency < op_cost)
2641 alg_in->cost.latency = op_cost;
2642 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2644 struct algorithm *x;
2645 best_cost = alg_in->cost;
2646 x = alg_in, alg_in = best_alg, best_alg = x;
2647 best_alg->log[best_alg->ops] = m;
2648 best_alg->op[best_alg->ops] = alg_add_factor;
2650 /* Other factors will have been taken care of in the recursion. */
2654 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2655 if (t % d == 0 && t > d && m < maxm
2656 && (!cache_hit || cache_alg == alg_sub_factor))
2658 /* If the target has a cheap shift-and-subtract insn use
2659 that in preference to a shift insn followed by a sub insn.
2660 Assume that the shift-and-sub is "atomic" with a latency
2661 equal to it's cost, otherwise assume that on superscalar
2662 hardware the shift may be executed concurrently with the
2663 earlier steps in the algorithm. */
2664 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2665 if (shiftsub_cost[speed][mode][m] < op_cost)
2667 op_cost = shiftsub_cost[speed][mode][m];
2668 op_latency = op_cost;
2671 op_latency = add_cost[speed][mode];
2673 new_limit.cost = best_cost.cost - op_cost;
2674 new_limit.latency = best_cost.latency - op_latency;
2675 synth_mult (alg_in, t / d, &new_limit, mode);
2677 alg_in->cost.cost += op_cost;
2678 alg_in->cost.latency += op_latency;
2679 if (alg_in->cost.latency < op_cost)
2680 alg_in->cost.latency = op_cost;
2681 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2683 struct algorithm *x;
2684 best_cost = alg_in->cost;
2685 x = alg_in, alg_in = best_alg, best_alg = x;
2686 best_alg->log[best_alg->ops] = m;
2687 best_alg->op[best_alg->ops] = alg_sub_factor;
2695 /* Try shift-and-add (load effective address) instructions,
2696 i.e. do a*3, a*5, a*9. */
2703 if (m >= 0 && m < maxm)
2705 op_cost = shiftadd_cost[speed][mode][m];
2706 new_limit.cost = best_cost.cost - op_cost;
2707 new_limit.latency = best_cost.latency - op_cost;
2708 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2710 alg_in->cost.cost += op_cost;
2711 alg_in->cost.latency += op_cost;
2712 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2714 struct algorithm *x;
2715 best_cost = alg_in->cost;
2716 x = alg_in, alg_in = best_alg, best_alg = x;
2717 best_alg->log[best_alg->ops] = m;
2718 best_alg->op[best_alg->ops] = alg_add_t2_m;
2728 if (m >= 0 && m < maxm)
2730 op_cost = shiftsub_cost[speed][mode][m];
2731 new_limit.cost = best_cost.cost - op_cost;
2732 new_limit.latency = best_cost.latency - op_cost;
2733 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2735 alg_in->cost.cost += op_cost;
2736 alg_in->cost.latency += op_cost;
2737 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2739 struct algorithm *x;
2740 best_cost = alg_in->cost;
2741 x = alg_in, alg_in = best_alg, best_alg = x;
2742 best_alg->log[best_alg->ops] = m;
2743 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2751 /* If best_cost has not decreased, we have not found any algorithm. */
2752 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2754 /* We failed to find an algorithm. Record alg_impossible for
2755 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2756 we are asked to find an algorithm for T within the same or
2757 lower COST_LIMIT, we can immediately return to the
2759 alg_hash[hash_index].t = t;
2760 alg_hash[hash_index].mode = mode;
2761 alg_hash[hash_index].speed = speed;
2762 alg_hash[hash_index].alg = alg_impossible;
2763 alg_hash[hash_index].cost = *cost_limit;
2767 /* Cache the result. */
2770 alg_hash[hash_index].t = t;
2771 alg_hash[hash_index].mode = mode;
2772 alg_hash[hash_index].speed = speed;
2773 alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2774 alg_hash[hash_index].cost.cost = best_cost.cost;
2775 alg_hash[hash_index].cost.latency = best_cost.latency;
2778 /* If we are getting a too long sequence for `struct algorithm'
2779 to record, make this search fail. */
2780 if (best_alg->ops == MAX_BITS_PER_WORD)
2783 /* Copy the algorithm from temporary space to the space at alg_out.
2784 We avoid using structure assignment because the majority of
2785 best_alg is normally undefined, and this is a critical function. */
2786 alg_out->ops = best_alg->ops + 1;
2787 alg_out->cost = best_cost;
2788 memcpy (alg_out->op, best_alg->op,
2789 alg_out->ops * sizeof *alg_out->op);
2790 memcpy (alg_out->log, best_alg->log,
2791 alg_out->ops * sizeof *alg_out->log);
2794 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2795 Try three variations:
2797 - a shift/add sequence based on VAL itself
2798 - a shift/add sequence based on -VAL, followed by a negation
2799 - a shift/add sequence based on VAL - 1, followed by an addition.
2801 Return true if the cheapest of these cost less than MULT_COST,
2802 describing the algorithm in *ALG and final fixup in *VARIANT. */
2805 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2806 struct algorithm *alg, enum mult_variant *variant,
2809 struct algorithm alg2;
2810 struct mult_cost limit;
2812 bool speed = optimize_insn_for_speed_p ();
2814 /* Fail quickly for impossible bounds. */
2818 /* Ensure that mult_cost provides a reasonable upper bound.
2819 Any constant multiplication can be performed with less
2820 than 2 * bits additions. */
2821 op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2822 if (mult_cost > op_cost)
2823 mult_cost = op_cost;
2825 *variant = basic_variant;
2826 limit.cost = mult_cost;
2827 limit.latency = mult_cost;
2828 synth_mult (alg, val, &limit, mode);
2830 /* This works only if the inverted value actually fits in an
2832 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2834 op_cost = neg_cost[speed][mode];
2835 if (MULT_COST_LESS (&alg->cost, mult_cost))
2837 limit.cost = alg->cost.cost - op_cost;
2838 limit.latency = alg->cost.latency - op_cost;
2842 limit.cost = mult_cost - op_cost;
2843 limit.latency = mult_cost - op_cost;
2846 synth_mult (&alg2, -val, &limit, mode);
2847 alg2.cost.cost += op_cost;
2848 alg2.cost.latency += op_cost;
2849 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2850 *alg = alg2, *variant = negate_variant;
2853 /* This proves very useful for division-by-constant. */
2854 op_cost = add_cost[speed][mode];
2855 if (MULT_COST_LESS (&alg->cost, mult_cost))
2857 limit.cost = alg->cost.cost - op_cost;
2858 limit.latency = alg->cost.latency - op_cost;
2862 limit.cost = mult_cost - op_cost;
2863 limit.latency = mult_cost - op_cost;
2866 synth_mult (&alg2, val - 1, &limit, mode);
2867 alg2.cost.cost += op_cost;
2868 alg2.cost.latency += op_cost;
2869 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2870 *alg = alg2, *variant = add_variant;
2872 return MULT_COST_LESS (&alg->cost, mult_cost);
2875 /* A subroutine of expand_mult, used for constant multiplications.
2876 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2877 convenient. Use the shift/add sequence described by ALG and apply
2878 the final fixup specified by VARIANT. */
2881 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2882 rtx target, const struct algorithm *alg,
2883 enum mult_variant variant)
2885 HOST_WIDE_INT val_so_far;
2886 rtx insn, accum, tem;
2888 enum machine_mode nmode;
2890 /* Avoid referencing memory over and over and invalid sharing
2892 op0 = force_reg (mode, op0);
2894 /* ACCUM starts out either as OP0 or as a zero, depending on
2895 the first operation. */
2897 if (alg->op[0] == alg_zero)
2899 accum = copy_to_mode_reg (mode, const0_rtx);
2902 else if (alg->op[0] == alg_m)
2904 accum = copy_to_mode_reg (mode, op0);
2910 for (opno = 1; opno < alg->ops; opno++)
2912 int log = alg->log[opno];
2913 rtx shift_subtarget = optimize ? 0 : accum;
2915 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2918 rtx accum_target = optimize ? 0 : accum;
2920 switch (alg->op[opno])
2923 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2924 build_int_cst (NULL_TREE, log),
2930 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2931 build_int_cst (NULL_TREE, log),
2933 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2934 add_target ? add_target : accum_target);
2935 val_so_far += (HOST_WIDE_INT) 1 << log;
2939 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2940 build_int_cst (NULL_TREE, log),
2942 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2943 add_target ? add_target : accum_target);
2944 val_so_far -= (HOST_WIDE_INT) 1 << log;
2948 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2949 build_int_cst (NULL_TREE, log),
2952 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2953 add_target ? add_target : accum_target);
2954 val_so_far = (val_so_far << log) + 1;
2958 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2959 build_int_cst (NULL_TREE, log),
2960 shift_subtarget, 0);
2961 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2962 add_target ? add_target : accum_target);
2963 val_so_far = (val_so_far << log) - 1;
2966 case alg_add_factor:
2967 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2968 build_int_cst (NULL_TREE, log),
2970 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2971 add_target ? add_target : accum_target);
2972 val_so_far += val_so_far << log;
2975 case alg_sub_factor:
2976 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2977 build_int_cst (NULL_TREE, log),
2979 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2981 ? add_target : (optimize ? 0 : tem)));
2982 val_so_far = (val_so_far << log) - val_so_far;
2989 /* Write a REG_EQUAL note on the last insn so that we can cse
2990 multiplication sequences. Note that if ACCUM is a SUBREG,
2991 we've set the inner register and must properly indicate
2994 tem = op0, nmode = mode;
2995 if (GET_CODE (accum) == SUBREG)
2997 nmode = GET_MODE (SUBREG_REG (accum));
2998 tem = gen_lowpart (nmode, op0);
3001 insn = get_last_insn ();
3002 set_unique_reg_note (insn, REG_EQUAL,
3003 gen_rtx_MULT (nmode, tem,
3004 GEN_INT (val_so_far)));
3007 if (variant == negate_variant)
3009 val_so_far = -val_so_far;
3010 accum = expand_unop (mode, neg_optab, accum, target, 0);
3012 else if (variant == add_variant)
3014 val_so_far = val_so_far + 1;
3015 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3018 /* Compare only the bits of val and val_so_far that are significant
3019 in the result mode, to avoid sign-/zero-extension confusion. */
3020 val &= GET_MODE_MASK (mode);
3021 val_so_far &= GET_MODE_MASK (mode);
3022 gcc_assert (val == val_so_far);
3027 /* Perform a multiplication and return an rtx for the result.
3028 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3029 TARGET is a suggestion for where to store the result (an rtx).
3031 We check specially for a constant integer as OP1.
3032 If you want this check for OP0 as well, then before calling
3033 you should swap the two operands if OP0 would be constant. */
3036 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3039 enum mult_variant variant;
3040 struct algorithm algorithm;
3042 bool speed = optimize_insn_for_speed_p ();
3044 /* Handling const0_rtx here allows us to use zero as a rogue value for
3046 if (op1 == const0_rtx)
3048 if (op1 == const1_rtx)
3050 if (op1 == constm1_rtx)
3051 return expand_unop (mode,
3052 GET_MODE_CLASS (mode) == MODE_INT
3053 && !unsignedp && flag_trapv
3054 ? negv_optab : neg_optab,
3057 /* These are the operations that are potentially turned into a sequence
3058 of shifts and additions. */
3059 if (SCALAR_INT_MODE_P (mode)
3060 && (unsignedp || !flag_trapv))
3062 HOST_WIDE_INT coeff = 0;
3063 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3065 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3066 less than or equal in size to `unsigned int' this doesn't matter.
3067 If the mode is larger than `unsigned int', then synth_mult works
3068 only if the constant value exactly fits in an `unsigned int' without
3069 any truncation. This means that multiplying by negative values does
3070 not work; results are off by 2^32 on a 32 bit machine. */
3072 if (GET_CODE (op1) == CONST_INT)
3074 /* Attempt to handle multiplication of DImode values by negative
3075 coefficients, by performing the multiplication by a positive
3076 multiplier and then inverting the result. */
3077 if (INTVAL (op1) < 0
3078 && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3080 /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3081 result is interpreted as an unsigned coefficient.
3082 Exclude cost of op0 from max_cost to match the cost
3083 calculation of the synth_mult. */
3084 max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
3085 - neg_cost[speed][mode];
3087 && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3088 &variant, max_cost))
3090 rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3091 NULL_RTX, &algorithm,
3093 return expand_unop (mode, neg_optab, temp, target, 0);
3096 else coeff = INTVAL (op1);
3098 else if (GET_CODE (op1) == CONST_DOUBLE)
3100 /* If we are multiplying in DImode, it may still be a win
3101 to try to work with shifts and adds. */
3102 if (CONST_DOUBLE_HIGH (op1) == 0
3103 && CONST_DOUBLE_LOW (op1) > 0)
3104 coeff = CONST_DOUBLE_LOW (op1);
3105 else if (CONST_DOUBLE_LOW (op1) == 0
3106 && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3108 int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3109 + HOST_BITS_PER_WIDE_INT;
3110 return expand_shift (LSHIFT_EXPR, mode, op0,
3111 build_int_cst (NULL_TREE, shift),
3116 /* We used to test optimize here, on the grounds that it's better to
3117 produce a smaller program when -O is not used. But this causes
3118 such a terrible slowdown sometimes that it seems better to always
3122 /* Special case powers of two. */
3123 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3124 return expand_shift (LSHIFT_EXPR, mode, op0,
3125 build_int_cst (NULL_TREE, floor_log2 (coeff)),
3128 /* Exclude cost of op0 from max_cost to match the cost
3129 calculation of the synth_mult. */
3130 max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3131 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3133 return expand_mult_const (mode, op0, coeff, target,
3134 &algorithm, variant);
3138 if (GET_CODE (op0) == CONST_DOUBLE)
3145 /* Expand x*2.0 as x+x. */
3146 if (GET_CODE (op1) == CONST_DOUBLE
3147 && SCALAR_FLOAT_MODE_P (mode))
3150 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3152 if (REAL_VALUES_EQUAL (d, dconst2))
3154 op0 = force_reg (GET_MODE (op0), op0);
3155 return expand_binop (mode, add_optab, op0, op0,
3156 target, unsignedp, OPTAB_LIB_WIDEN);
3160 /* This used to use umul_optab if unsigned, but for non-widening multiply
3161 there is no difference between signed and unsigned. */
3162 op0 = expand_binop (mode,
3164 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3165 ? smulv_optab : smul_optab,
3166 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3171 /* Return the smallest n such that 2**n >= X. */
3174 ceil_log2 (unsigned HOST_WIDE_INT x)
3176 return floor_log2 (x - 1) + 1;
3179 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3180 replace division by D, and put the least significant N bits of the result
3181 in *MULTIPLIER_PTR and return the most significant bit.
3183 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3184 needed precision is in PRECISION (should be <= N).
3186 PRECISION should be as small as possible so this function can choose
3187 multiplier more freely.
3189 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3190 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3192 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3193 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3196 unsigned HOST_WIDE_INT
3197 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3198 rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3200 HOST_WIDE_INT mhigh_hi, mlow_hi;
3201 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3202 int lgup, post_shift;
3204 unsigned HOST_WIDE_INT nl, dummy1;
3205 HOST_WIDE_INT nh, dummy2;
3207 /* lgup = ceil(log2(divisor)); */
3208 lgup = ceil_log2 (d);
3210 gcc_assert (lgup <= n);
3213 pow2 = n + lgup - precision;
3215 /* We could handle this with some effort, but this case is much
3216 better handled directly with a scc insn, so rely on caller using
3218 gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3220 /* mlow = 2^(N + lgup)/d */
3221 if (pow >= HOST_BITS_PER_WIDE_INT)
3223 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3229 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3231 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3232 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3234 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3235 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3236 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3238 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3239 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3240 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3242 gcc_assert (!mhigh_hi || nh - d < d);
3243 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3244 /* Assert that mlow < mhigh. */
3245 gcc_assert (mlow_hi < mhigh_hi
3246 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3248 /* If precision == N, then mlow, mhigh exceed 2^N
3249 (but they do not exceed 2^(N+1)). */
3251 /* Reduce to lowest terms. */
3252 for (post_shift = lgup; post_shift > 0; post_shift--)
3254 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3255 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3265 *post_shift_ptr = post_shift;
3267 if (n < HOST_BITS_PER_WIDE_INT)
3269 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3270 *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3271 return mhigh_lo >= mask;
3275 *multiplier_ptr = GEN_INT (mhigh_lo);
3280 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3281 congruent to 1 (mod 2**N). */
3283 static unsigned HOST_WIDE_INT
3284 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3286 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3288 /* The algorithm notes that the choice y = x satisfies
3289 x*y == 1 mod 2^3, since x is assumed odd.
3290 Each iteration doubles the number of bits of significance in y. */
3292 unsigned HOST_WIDE_INT mask;
3293 unsigned HOST_WIDE_INT y = x;
3296 mask = (n == HOST_BITS_PER_WIDE_INT
3297 ? ~(unsigned HOST_WIDE_INT) 0
3298 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3302 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3308 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3309 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3310 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3311 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3314 The result is put in TARGET if that is convenient.
3316 MODE is the mode of operation. */
3319 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3320 rtx op1, rtx target, int unsignedp)
3323 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3325 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3326 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3328 tem = expand_and (mode, tem, op1, NULL_RTX);
3330 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3333 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3334 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3336 tem = expand_and (mode, tem, op0, NULL_RTX);
3337 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3343 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
3346 extract_high_half (enum machine_mode mode, rtx op)
3348 enum machine_mode wider_mode;
3350 if (mode == word_mode)
3351 return gen_highpart (mode, op);
3353 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3355 wider_mode = GET_MODE_WIDER_MODE (mode);
3356 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3357 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3358 return convert_modes (mode, wider_mode, op, 0);
3361 /* Like expand_mult_highpart, but only consider using a multiplication
3362 optab. OP1 is an rtx for the constant operand. */
3365 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3366 rtx target, int unsignedp, int max_cost)
3368 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3369 enum machine_mode wider_mode;
3373 bool speed = optimize_insn_for_speed_p ();
3375 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3377 wider_mode = GET_MODE_WIDER_MODE (mode);
3378 size = GET_MODE_BITSIZE (mode);
3380 /* Firstly, try using a multiplication insn that only generates the needed
3381 high part of the product, and in the sign flavor of unsignedp. */
3382 if (mul_highpart_cost[speed][mode] < max_cost)
3384 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3385 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3386 unsignedp, OPTAB_DIRECT);
3391 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3392 Need to adjust the result after the multiplication. */
3393 if (size - 1 < BITS_PER_WORD
3394 && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3395 + 4 * add_cost[speed][mode] < max_cost))
3397 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3398 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3399 unsignedp, OPTAB_DIRECT);
3401 /* We used the wrong signedness. Adjust the result. */
3402 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3406 /* Try widening multiplication. */
3407 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3408 if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3409 && mul_widen_cost[speed][wider_mode] < max_cost)
3411 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3412 unsignedp, OPTAB_WIDEN);
3414 return extract_high_half (mode, tem);
3417 /* Try widening the mode and perform a non-widening multiplication. */
3418 if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
3419 && size - 1 < BITS_PER_WORD
3420 && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3422 rtx insns, wop0, wop1;
3424 /* We need to widen the operands, for example to ensure the
3425 constant multiplier is correctly sign or zero extended.
3426 Use a sequence to clean-up any instructions emitted by
3427 the conversions if things don't work out. */
3429 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3430 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3431 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3432 unsignedp, OPTAB_WIDEN);
3433 insns = get_insns ();
3439 return extract_high_half (mode, tem);
3443 /* Try widening multiplication of opposite signedness, and adjust. */
3444 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3445 if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3446 && size - 1 < BITS_PER_WORD
3447 && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3448 + 4 * add_cost[speed][mode] < max_cost))
3450 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3451 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3454 tem = extract_high_half (mode, tem);
3455 /* We used the wrong signedness. Adjust the result. */
3456 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3464 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3465 putting the high half of the result in TARGET if that is convenient,
3466 and return where the result is. If the operation can not be performed,
3469 MODE is the mode of operation and result.
3471 UNSIGNEDP nonzero means unsigned multiply.
3473 MAX_COST is the total allowed cost for the expanded RTL. */
3476 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3477 rtx target, int unsignedp, int max_cost)
3479 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3480 unsigned HOST_WIDE_INT cnst1;
3482 bool sign_adjust = false;
3483 enum mult_variant variant;
3484 struct algorithm alg;
3486 bool speed = optimize_insn_for_speed_p ();
3488 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3489 /* We can't support modes wider than HOST_BITS_PER_INT. */
3490 gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3492 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3494 /* We can't optimize modes wider than BITS_PER_WORD.
3495 ??? We might be able to perform double-word arithmetic if
3496 mode == word_mode, however all the cost calculations in
3497 synth_mult etc. assume single-word operations. */
3498 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3499 return expand_mult_highpart_optab (mode, op0, op1, target,
3500 unsignedp, max_cost);
3502 extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3504 /* Check whether we try to multiply by a negative constant. */
3505 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3508 extra_cost += add_cost[speed][mode];
3511 /* See whether shift/add multiplication is cheap enough. */
3512 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3513 max_cost - extra_cost))
3515 /* See whether the specialized multiplication optabs are
3516 cheaper than the shift/add version. */
3517 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3518 alg.cost.cost + extra_cost);
3522 tem = convert_to_mode (wider_mode, op0, unsignedp);
3523 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3524 tem = extract_high_half (mode, tem);
3526 /* Adjust result for signedness. */
3528 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3532 return expand_mult_highpart_optab (mode, op0, op1, target,
3533 unsignedp, max_cost);
3537 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3540 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3542 unsigned HOST_WIDE_INT masklow, maskhigh;
3543 rtx result, temp, shift, label;
3546 logd = floor_log2 (d);
3547 result = gen_reg_rtx (mode);
3549 /* Avoid conditional branches when they're expensive. */
3550 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3551 && optimize_insn_for_speed_p ())
3553 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3557 signmask = force_reg (mode, signmask);
3558 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3559 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3561 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3562 which instruction sequence to use. If logical right shifts
3563 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3564 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3566 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3567 if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
3568 || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3570 temp = expand_binop (mode, xor_optab, op0, signmask,
3571 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3572 temp = expand_binop (mode, sub_optab, temp, signmask,
3573 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3574 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3575 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3576 temp = expand_binop (mode, xor_optab, temp, signmask,
3577 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3578 temp = expand_binop (mode, sub_optab, temp, signmask,
3579 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3583 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3584 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3585 signmask = force_reg (mode, signmask);
3587 temp = expand_binop (mode, add_optab, op0, signmask,
3588 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3589 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3590 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3591 temp = expand_binop (mode, sub_optab, temp, signmask,
3592 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3598 /* Mask contains the mode's signbit and the significant bits of the
3599 modulus. By including the signbit in the operation, many targets
3600 can avoid an explicit compare operation in the following comparison
3603 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3604 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3606 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3610 maskhigh = (HOST_WIDE_INT) -1
3611 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3613 temp = expand_binop (mode, and_optab, op0,
3614 immed_double_const (masklow, maskhigh, mode),
3615 result, 1, OPTAB_LIB_WIDEN);
3617 emit_move_insn (result, temp);
3619 label = gen_label_rtx ();
3620 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3622 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3623 0, OPTAB_LIB_WIDEN);
3624 masklow = (HOST_WIDE_INT) -1 << logd;
3626 temp = expand_binop (mode, ior_optab, temp,
3627 immed_double_const (masklow, maskhigh, mode),
3628 result, 1, OPTAB_LIB_WIDEN);
3629 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3630 0, OPTAB_LIB_WIDEN);
3632 emit_move_insn (result, temp);
3637 /* Expand signed division of OP0 by a power of two D in mode MODE.
3638 This routine is only called for positive values of D. */
3641 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3647 logd = floor_log2 (d);
3648 shift = build_int_cst (NULL_TREE, logd);
3651 && BRANCH_COST (optimize_insn_for_speed_p (),
3654 temp = gen_reg_rtx (mode);
3655 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3656 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3657 0, OPTAB_LIB_WIDEN);
3658 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3661 #ifdef HAVE_conditional_move
3662 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3667 /* ??? emit_conditional_move forces a stack adjustment via
3668 compare_from_rtx so, if the sequence is discarded, it will
3669 be lost. Do it now instead. */
3670 do_pending_stack_adjust ();
3673 temp2 = copy_to_mode_reg (mode, op0);
3674 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3675 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3676 temp = force_reg (mode, temp);
3678 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3679 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3680 mode, temp, temp2, mode, 0);
3683 rtx seq = get_insns ();
3686 return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3692 if (BRANCH_COST (optimize_insn_for_speed_p (),
3695 int ushift = GET_MODE_BITSIZE (mode) - logd;
3697 temp = gen_reg_rtx (mode);
3698 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3699 if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3700 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3701 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3703 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3704 build_int_cst (NULL_TREE, ushift),
3706 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3707 0, OPTAB_LIB_WIDEN);
3708 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3711 label = gen_label_rtx ();
3712 temp = copy_to_mode_reg (mode, op0);
3713 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3714 expand_inc (temp, GEN_INT (d - 1));
3716 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3719 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3720 if that is convenient, and returning where the result is.
3721 You may request either the quotient or the remainder as the result;
3722 specify REM_FLAG nonzero to get the remainder.
3724 CODE is the expression code for which kind of division this is;
3725 it controls how rounding is done. MODE is the machine mode to use.
3726 UNSIGNEDP nonzero means do unsigned division. */
3728 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3729 and then correct it by or'ing in missing high bits
3730 if result of ANDI is nonzero.
3731 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3732 This could optimize to a bfexts instruction.
3733 But C doesn't use these operations, so their optimizations are
3735 /* ??? For modulo, we don't actually need the highpart of the first product,
3736 the low part will do nicely. And for small divisors, the second multiply
3737 can also be a low-part only multiply or even be completely left out.
3738 E.g. to calculate the remainder of a division by 3 with a 32 bit
3739 multiply, multiply with 0x55555556 and extract the upper two bits;
3740 the result is exact for inputs up to 0x1fffffff.
3741 The input range can be reduced by using cross-sum rules.
3742 For odd divisors >= 3, the following table gives right shift counts
3743 so that if a number is shifted by an integer multiple of the given
3744 amount, the remainder stays the same:
3745 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3746 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3747 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3748 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3749 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3751 Cross-sum rules for even numbers can be derived by leaving as many bits
3752 to the right alone as the divisor has zeros to the right.
3753 E.g. if x is an unsigned 32 bit number:
3754 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3758 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3759 rtx op0, rtx op1, rtx target, int unsignedp)
3761 enum machine_mode compute_mode;
3763 rtx quotient = 0, remainder = 0;
3767 optab optab1, optab2;
3768 int op1_is_constant, op1_is_pow2 = 0;
3769 int max_cost, extra_cost;
3770 static HOST_WIDE_INT last_div_const = 0;
3771 static HOST_WIDE_INT ext_op1;
3772 bool speed = optimize_insn_for_speed_p ();
3774 op1_is_constant = GET_CODE (op1) == CONST_INT;
3775 if (op1_is_constant)
3777 ext_op1 = INTVAL (op1);
3779 ext_op1 &= GET_MODE_MASK (mode);
3780 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3781 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3785 This is the structure of expand_divmod:
3787 First comes code to fix up the operands so we can perform the operations
3788 correctly and efficiently.
3790 Second comes a switch statement with code specific for each rounding mode.
3791 For some special operands this code emits all RTL for the desired
3792 operation, for other cases, it generates only a quotient and stores it in
3793 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3794 to indicate that it has not done anything.
3796 Last comes code that finishes the operation. If QUOTIENT is set and
3797 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3798 QUOTIENT is not set, it is computed using trunc rounding.
3800 We try to generate special code for division and remainder when OP1 is a
3801 constant. If |OP1| = 2**n we can use shifts and some other fast
3802 operations. For other values of OP1, we compute a carefully selected
3803 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3806 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3807 half of the product. Different strategies for generating the product are
3808 implemented in expand_mult_highpart.
3810 If what we actually want is the remainder, we generate that by another
3811 by-constant multiplication and a subtraction. */
3813 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3814 code below will malfunction if we are, so check here and handle
3815 the special case if so. */
3816 if (op1 == const1_rtx)
3817 return rem_flag ? const0_rtx : op0;
3819 /* When dividing by -1, we could get an overflow.
3820 negv_optab can handle overflows. */
3821 if (! unsignedp && op1 == constm1_rtx)
3825 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3826 ? negv_optab : neg_optab, op0, target, 0);
3830 /* Don't use the function value register as a target
3831 since we have to read it as well as write it,
3832 and function-inlining gets confused by this. */
3833 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3834 /* Don't clobber an operand while doing a multi-step calculation. */
3835 || ((rem_flag || op1_is_constant)
3836 && (reg_mentioned_p (target, op0)
3837 || (MEM_P (op0) && MEM_P (target))))
3838 || reg_mentioned_p (target, op1)
3839 || (MEM_P (op1) && MEM_P (target))))
3842 /* Get the mode in which to perform this computation. Normally it will
3843 be MODE, but sometimes we can't do the desired operation in MODE.
3844 If so, pick a wider mode in which we can do the operation. Convert
3845 to that mode at the start to avoid repeated conversions.
3847 First see what operations we need. These depend on the expression
3848 we are evaluating. (We assume that divxx3 insns exist under the
3849 same conditions that modxx3 insns and that these insns don't normally
3850 fail. If these assumptions are not correct, we may generate less
3851 efficient code in some cases.)
3853 Then see if we find a mode in which we can open-code that operation
3854 (either a division, modulus, or shift). Finally, check for the smallest
3855 mode for which we can do the operation with a library call. */
3857 /* We might want to refine this now that we have division-by-constant
3858 optimization. Since expand_mult_highpart tries so many variants, it is
3859 not straightforward to generalize this. Maybe we should make an array
3860 of possible modes in init_expmed? Save this for GCC 2.7. */
3862 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3863 ? (unsignedp ? lshr_optab : ashr_optab)
3864 : (unsignedp ? udiv_optab : sdiv_optab));
3865 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3867 : (unsignedp ? udivmod_optab : sdivmod_optab));
3869 for (compute_mode = mode; compute_mode != VOIDmode;
3870 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3871 if (optab_handler (optab1, compute_mode)->insn_code != CODE_FOR_nothing
3872 || optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing)
3875 if (compute_mode == VOIDmode)
3876 for (compute_mode = mode; compute_mode != VOIDmode;
3877 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3878 if (optab_libfunc (optab1, compute_mode)
3879 || optab_libfunc (optab2, compute_mode))
3882 /* If we still couldn't find a mode, use MODE, but expand_binop will
3884 if (compute_mode == VOIDmode)
3885 compute_mode = mode;
3887 if (target && GET_MODE (target) == compute_mode)
3890 tquotient = gen_reg_rtx (compute_mode);
3892 size = GET_MODE_BITSIZE (compute_mode);
3894 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3895 (mode), and thereby get better code when OP1 is a constant. Do that
3896 later. It will require going over all usages of SIZE below. */
3897 size = GET_MODE_BITSIZE (mode);
3900 /* Only deduct something for a REM if the last divide done was
3901 for a different constant. Then set the constant of the last
3903 max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3904 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3905 && INTVAL (op1) == last_div_const))
3906 max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3908 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3910 /* Now convert to the best mode to use. */
3911 if (compute_mode != mode)
3913 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3914 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3916 /* convert_modes may have placed op1 into a register, so we
3917 must recompute the following. */
3918 op1_is_constant = GET_CODE (op1) == CONST_INT;
3919 op1_is_pow2 = (op1_is_constant
3920 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3922 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3925 /* If one of the operands is a volatile MEM, copy it into a register. */
3927 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3928 op0 = force_reg (compute_mode, op0);
3929 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3930 op1 = force_reg (compute_mode, op1);
3932 /* If we need the remainder or if OP1 is constant, we need to
3933 put OP0 in a register in case it has any queued subexpressions. */
3934 if (rem_flag || op1_is_constant)
3935 op0 = force_reg (compute_mode, op0);
3937 last = get_last_insn ();
3939 /* Promote floor rounding to trunc rounding for unsigned operations. */
3942 if (code == FLOOR_DIV_EXPR)
3943 code = TRUNC_DIV_EXPR;
3944 if (code == FLOOR_MOD_EXPR)
3945 code = TRUNC_MOD_EXPR;
3946 if (code == EXACT_DIV_EXPR && op1_is_pow2)
3947 code = TRUNC_DIV_EXPR;
3950 if (op1 != const0_rtx)
3953 case TRUNC_MOD_EXPR:
3954 case TRUNC_DIV_EXPR:
3955 if (op1_is_constant)
3959 unsigned HOST_WIDE_INT mh;
3960 int pre_shift, post_shift;
3963 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3964 & GET_MODE_MASK (compute_mode));
3966 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3968 pre_shift = floor_log2 (d);
3972 = expand_binop (compute_mode, and_optab, op0,
3973 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3977 return gen_lowpart (mode, remainder);
3979 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3980 build_int_cst (NULL_TREE,
3984 else if (size <= HOST_BITS_PER_WIDE_INT)
3986 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3988 /* Most significant bit of divisor is set; emit an scc
3990 quotient = emit_store_flag (tquotient, GEU, op0, op1,
3991 compute_mode, 1, 1);
3997 /* Find a suitable multiplier and right shift count
3998 instead of multiplying with D. */
4000 mh = choose_multiplier (d, size, size,
4001 &ml, &post_shift, &dummy);
4003 /* If the suggested multiplier is more than SIZE bits,
4004 we can do better for even divisors, using an
4005 initial right shift. */
4006 if (mh != 0 && (d & 1) == 0)
4008 pre_shift = floor_log2 (d & -d);
4009 mh = choose_multiplier (d >> pre_shift, size,
4011 &ml, &post_shift, &dummy);
4021 if (post_shift - 1 >= BITS_PER_WORD)
4025 = (shift_cost[speed][compute_mode][post_shift - 1]
4026 + shift_cost[speed][compute_mode][1]
4027 + 2 * add_cost[speed][compute_mode]);
4028 t1 = expand_mult_highpart (compute_mode, op0, ml,
4030 max_cost - extra_cost);
4033 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4037 (RSHIFT_EXPR, compute_mode, t2,
4038 build_int_cst (NULL_TREE, 1),
4040 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4043 quotient = expand_shift
4044 (RSHIFT_EXPR, compute_mode, t4,
4045 build_int_cst (NULL_TREE, post_shift - 1),
4052 if (pre_shift >= BITS_PER_WORD
4053 || post_shift >= BITS_PER_WORD)
4057 (RSHIFT_EXPR, compute_mode, op0,
4058 build_int_cst (NULL_TREE, pre_shift),
4061 = (shift_cost[speed][compute_mode][pre_shift]
4062 + shift_cost[speed][compute_mode][post_shift]);
4063 t2 = expand_mult_highpart (compute_mode, t1, ml,
4065 max_cost - extra_cost);
4068 quotient = expand_shift
4069 (RSHIFT_EXPR, compute_mode, t2,
4070 build_int_cst (NULL_TREE, post_shift),
4075 else /* Too wide mode to use tricky code */
4078 insn = get_last_insn ();
4080 && (set = single_set (insn)) != 0
4081 && SET_DEST (set) == quotient)
4082 set_unique_reg_note (insn,
4084 gen_rtx_UDIV (compute_mode, op0, op1));
4086 else /* TRUNC_DIV, signed */
4088 unsigned HOST_WIDE_INT ml;
4089 int lgup, post_shift;
4091 HOST_WIDE_INT d = INTVAL (op1);
4092 unsigned HOST_WIDE_INT abs_d;
4094 /* Since d might be INT_MIN, we have to cast to
4095 unsigned HOST_WIDE_INT before negating to avoid
4096 undefined signed overflow. */
4098 ? (unsigned HOST_WIDE_INT) d
4099 : - (unsigned HOST_WIDE_INT) d);
4101 /* n rem d = n rem -d */
4102 if (rem_flag && d < 0)
4105 op1 = gen_int_mode (abs_d, compute_mode);
4111 quotient = expand_unop (compute_mode, neg_optab, op0,
4113 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4115 /* This case is not handled correctly below. */
4116 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4117 compute_mode, 1, 1);
4121 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4122 && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4123 : sdiv_pow2_cheap[speed][compute_mode])
4124 /* We assume that cheap metric is true if the
4125 optab has an expander for this mode. */
4126 && ((optab_handler ((rem_flag ? smod_optab
4128 compute_mode)->insn_code
4129 != CODE_FOR_nothing)
4130 || (optab_handler(sdivmod_optab,
4132 ->insn_code != CODE_FOR_nothing)))
4134 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4138 remainder = expand_smod_pow2 (compute_mode, op0, d);
4140 return gen_lowpart (mode, remainder);
4143 if (sdiv_pow2_cheap[speed][compute_mode]
4144 && ((optab_handler (sdiv_optab, compute_mode)->insn_code
4145 != CODE_FOR_nothing)
4146 || (optab_handler (sdivmod_optab, compute_mode)->insn_code
4147 != CODE_FOR_nothing)))
4148 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4150 gen_int_mode (abs_d,
4154 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4156 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4157 negate the quotient. */
4160 insn = get_last_insn ();
4162 && (set = single_set (insn)) != 0
4163 && SET_DEST (set) == quotient
4164 && abs_d < ((unsigned HOST_WIDE_INT) 1
4165 << (HOST_BITS_PER_WIDE_INT - 1)))
4166 set_unique_reg_note (insn,
4168 gen_rtx_DIV (compute_mode,
4175 quotient = expand_unop (compute_mode, neg_optab,
4176 quotient, quotient, 0);
4179 else if (size <= HOST_BITS_PER_WIDE_INT)
4181 choose_multiplier (abs_d, size, size - 1,
4182 &mlr, &post_shift, &lgup);
4183 ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4184 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4188 if (post_shift >= BITS_PER_WORD
4189 || size - 1 >= BITS_PER_WORD)
4192 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4193 + shift_cost[speed][compute_mode][size - 1]
4194 + add_cost[speed][compute_mode]);
4195 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4197 max_cost - extra_cost);
4201 (RSHIFT_EXPR, compute_mode, t1,
4202 build_int_cst (NULL_TREE, post_shift),
4205 (RSHIFT_EXPR, compute_mode, op0,
4206 build_int_cst (NULL_TREE, size - 1),
4210 = force_operand (gen_rtx_MINUS (compute_mode,
4215 = force_operand (gen_rtx_MINUS (compute_mode,
4223 if (post_shift >= BITS_PER_WORD
4224 || size - 1 >= BITS_PER_WORD)
4227 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4228 mlr = gen_int_mode (ml, compute_mode);
4229 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4230 + shift_cost[speed][compute_mode][size - 1]
4231 + 2 * add_cost[speed][compute_mode]);
4232 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4234 max_cost - extra_cost);
4237 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4241 (RSHIFT_EXPR, compute_mode, t2,
4242 build_int_cst (NULL_TREE, post_shift),
4245 (RSHIFT_EXPR, compute_mode, op0,
4246 build_int_cst (NULL_TREE, size - 1),
4250 = force_operand (gen_rtx_MINUS (compute_mode,
4255 = force_operand (gen_rtx_MINUS (compute_mode,
4260 else /* Too wide mode to use tricky code */
4263 insn = get_last_insn ();
4265 && (set = single_set (insn)) != 0
4266 && SET_DEST (set) == quotient)
4267 set_unique_reg_note (insn,
4269 gen_rtx_DIV (compute_mode, op0, op1));
4274 delete_insns_since (last);
4277 case FLOOR_DIV_EXPR:
4278 case FLOOR_MOD_EXPR:
4279 /* We will come here only for signed operations. */
4280 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4282 unsigned HOST_WIDE_INT mh;
4283 int pre_shift, lgup, post_shift;
4284 HOST_WIDE_INT d = INTVAL (op1);
4289 /* We could just as easily deal with negative constants here,
4290 but it does not seem worth the trouble for GCC 2.6. */
4291 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4293 pre_shift = floor_log2 (d);
4296 remainder = expand_binop (compute_mode, and_optab, op0,
4297 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4298 remainder, 0, OPTAB_LIB_WIDEN);
4300 return gen_lowpart (mode, remainder);
4302 quotient = expand_shift
4303 (RSHIFT_EXPR, compute_mode, op0,
4304 build_int_cst (NULL_TREE, pre_shift),
4311 mh = choose_multiplier (d, size, size - 1,
4312 &ml, &post_shift, &lgup);
4315 if (post_shift < BITS_PER_WORD
4316 && size - 1 < BITS_PER_WORD)
4319 (RSHIFT_EXPR, compute_mode, op0,
4320 build_int_cst (NULL_TREE, size - 1),
4322 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4323 NULL_RTX, 0, OPTAB_WIDEN);
4324 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4325 + shift_cost[speed][compute_mode][size - 1]
4326 + 2 * add_cost[speed][compute_mode]);
4327 t3 = expand_mult_highpart (compute_mode, t2, ml,
4329 max_cost - extra_cost);
4333 (RSHIFT_EXPR, compute_mode, t3,
4334 build_int_cst (NULL_TREE, post_shift),
4336 quotient = expand_binop (compute_mode, xor_optab,
4337 t4, t1, tquotient, 0,
4345 rtx nsign, t1, t2, t3, t4;
4346 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4347 op0, constm1_rtx), NULL_RTX);
4348 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4350 nsign = expand_shift
4351 (RSHIFT_EXPR, compute_mode, t2,
4352 build_int_cst (NULL_TREE, size - 1),
4354 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4356 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4361 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4363 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4372 delete_insns_since (last);
4374 /* Try using an instruction that produces both the quotient and
4375 remainder, using truncation. We can easily compensate the quotient
4376 or remainder to get floor rounding, once we have the remainder.
4377 Notice that we compute also the final remainder value here,
4378 and return the result right away. */
4379 if (target == 0 || GET_MODE (target) != compute_mode)
4380 target = gen_reg_rtx (compute_mode);
4385 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4386 quotient = gen_reg_rtx (compute_mode);
4391 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4392 remainder = gen_reg_rtx (compute_mode);
4395 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4396 quotient, remainder, 0))
4398 /* This could be computed with a branch-less sequence.
4399 Save that for later. */
4401 rtx label = gen_label_rtx ();
4402 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4403 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4404 NULL_RTX, 0, OPTAB_WIDEN);
4405 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4406 expand_dec (quotient, const1_rtx);
4407 expand_inc (remainder, op1);
4409 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4412 /* No luck with division elimination or divmod. Have to do it
4413 by conditionally adjusting op0 *and* the result. */
4415 rtx label1, label2, label3, label4, label5;
4419 quotient = gen_reg_rtx (compute_mode);
4420 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4421 label1 = gen_label_rtx ();
4422 label2 = gen_label_rtx ();
4423 label3 = gen_label_rtx ();
4424 label4 = gen_label_rtx ();
4425 label5 = gen_label_rtx ();
4426 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4427 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4428 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4429 quotient, 0, OPTAB_LIB_WIDEN);
4430 if (tem != quotient)
4431 emit_move_insn (quotient, tem);
4432 emit_jump_insn (gen_jump (label5));
4434 emit_label (label1);
4435 expand_inc (adjusted_op0, const1_rtx);
4436 emit_jump_insn (gen_jump (label4));
4438 emit_label (label2);
4439 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4440 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4441 quotient, 0, OPTAB_LIB_WIDEN);
4442 if (tem != quotient)
4443 emit_move_insn (quotient, tem);
4444 emit_jump_insn (gen_jump (label5));
4446 emit_label (label3);
4447 expand_dec (adjusted_op0, const1_rtx);
4448 emit_label (label4);
4449 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4450 quotient, 0, OPTAB_LIB_WIDEN);
4451 if (tem != quotient)
4452 emit_move_insn (quotient, tem);
4453 expand_dec (quotient, const1_rtx);
4454 emit_label (label5);
4462 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4465 unsigned HOST_WIDE_INT d = INTVAL (op1);
4466 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4467 build_int_cst (NULL_TREE, floor_log2 (d)),
4469 t2 = expand_binop (compute_mode, and_optab, op0,
4471 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4472 t3 = gen_reg_rtx (compute_mode);
4473 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4474 compute_mode, 1, 1);
4478 lab = gen_label_rtx ();
4479 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4480 expand_inc (t1, const1_rtx);
4485 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4491 /* Try using an instruction that produces both the quotient and
4492 remainder, using truncation. We can easily compensate the
4493 quotient or remainder to get ceiling rounding, once we have the
4494 remainder. Notice that we compute also the final remainder
4495 value here, and return the result right away. */
4496 if (target == 0 || GET_MODE (target) != compute_mode)
4497 target = gen_reg_rtx (compute_mode);
4501 remainder = (REG_P (target)
4502 ? target : gen_reg_rtx (compute_mode));
4503 quotient = gen_reg_rtx (compute_mode);
4507 quotient = (REG_P (target)
4508 ? target : gen_reg_rtx (compute_mode));
4509 remainder = gen_reg_rtx (compute_mode);
4512 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4515 /* This could be computed with a branch-less sequence.
4516 Save that for later. */
4517 rtx label = gen_label_rtx ();
4518 do_cmp_and_jump (remainder, const0_rtx, EQ,
4519 compute_mode, label);
4520 expand_inc (quotient, const1_rtx);
4521 expand_dec (remainder, op1);
4523 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4526 /* No luck with division elimination or divmod. Have to do it
4527 by conditionally adjusting op0 *and* the result. */
4530 rtx adjusted_op0, tem;
4532 quotient = gen_reg_rtx (compute_mode);
4533 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4534 label1 = gen_label_rtx ();
4535 label2 = gen_label_rtx ();
4536 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4537 compute_mode, label1);
4538 emit_move_insn (quotient, const0_rtx);
4539 emit_jump_insn (gen_jump (label2));
4541 emit_label (label1);
4542 expand_dec (adjusted_op0, const1_rtx);
4543 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4544 quotient, 1, OPTAB_LIB_WIDEN);
4545 if (tem != quotient)
4546 emit_move_insn (quotient, tem);
4547 expand_inc (quotient, const1_rtx);
4548 emit_label (label2);
4553 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4554 && INTVAL (op1) >= 0)
4556 /* This is extremely similar to the code for the unsigned case
4557 above. For 2.7 we should merge these variants, but for
4558 2.6.1 I don't want to touch the code for unsigned since that
4559 get used in C. The signed case will only be used by other
4563 unsigned HOST_WIDE_INT d = INTVAL (op1);
4564 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4565 build_int_cst (NULL_TREE, floor_log2 (d)),
4567 t2 = expand_binop (compute_mode, and_optab, op0,
4569 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4570 t3 = gen_reg_rtx (compute_mode);
4571 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4572 compute_mode, 1, 1);
4576 lab = gen_label_rtx ();
4577 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4578 expand_inc (t1, const1_rtx);
4583 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4589 /* Try using an instruction that produces both the quotient and
4590 remainder, using truncation. We can easily compensate the
4591 quotient or remainder to get ceiling rounding, once we have the
4592 remainder. Notice that we compute also the final remainder
4593 value here, and return the result right away. */
4594 if (target == 0 || GET_MODE (target) != compute_mode)
4595 target = gen_reg_rtx (compute_mode);
4598 remainder= (REG_P (target)
4599 ? target : gen_reg_rtx (compute_mode));
4600 quotient = gen_reg_rtx (compute_mode);
4604 quotient = (REG_P (target)
4605 ? target : gen_reg_rtx (compute_mode));
4606 remainder = gen_reg_rtx (compute_mode);
4609 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4612 /* This could be computed with a branch-less sequence.
4613 Save that for later. */
4615 rtx label = gen_label_rtx ();
4616 do_cmp_and_jump (remainder, const0_rtx, EQ,
4617 compute_mode, label);
4618 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4619 NULL_RTX, 0, OPTAB_WIDEN);
4620 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4621 expand_inc (quotient, const1_rtx);
4622 expand_dec (remainder, op1);
4624 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4627 /* No luck with division elimination or divmod. Have to do it
4628 by conditionally adjusting op0 *and* the result. */
4630 rtx label1, label2, label3, label4, label5;
4634 quotient = gen_reg_rtx (compute_mode);
4635 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4636 label1 = gen_label_rtx ();
4637 label2 = gen_label_rtx ();
4638 label3 = gen_label_rtx ();
4639 label4 = gen_label_rtx ();
4640 label5 = gen_label_rtx ();
4641 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4642 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4643 compute_mode, label1);
4644 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4645 quotient, 0, OPTAB_LIB_WIDEN);
4646 if (tem != quotient)
4647 emit_move_insn (quotient, tem);
4648 emit_jump_insn (gen_jump (label5));
4650 emit_label (label1);
4651 expand_dec (adjusted_op0, const1_rtx);
4652 emit_jump_insn (gen_jump (label4));
4654 emit_label (label2);
4655 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4656 compute_mode, label3);
4657 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4658 quotient, 0, OPTAB_LIB_WIDEN);
4659 if (tem != quotient)
4660 emit_move_insn (quotient, tem);
4661 emit_jump_insn (gen_jump (label5));
4663 emit_label (label3);
4664 expand_inc (adjusted_op0, const1_rtx);
4665 emit_label (label4);
4666 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4667 quotient, 0, OPTAB_LIB_WIDEN);
4668 if (tem != quotient)
4669 emit_move_insn (quotient, tem);
4670 expand_inc (quotient, const1_rtx);
4671 emit_label (label5);
4676 case EXACT_DIV_EXPR:
4677 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4679 HOST_WIDE_INT d = INTVAL (op1);
4680 unsigned HOST_WIDE_INT ml;
4684 pre_shift = floor_log2 (d & -d);
4685 ml = invert_mod2n (d >> pre_shift, size);
4686 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4687 build_int_cst (NULL_TREE, pre_shift),
4688 NULL_RTX, unsignedp);
4689 quotient = expand_mult (compute_mode, t1,
4690 gen_int_mode (ml, compute_mode),
4693 insn = get_last_insn ();
4694 set_unique_reg_note (insn,
4696 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4702 case ROUND_DIV_EXPR:
4703 case ROUND_MOD_EXPR:
4708 label = gen_label_rtx ();
4709 quotient = gen_reg_rtx (compute_mode);
4710 remainder = gen_reg_rtx (compute_mode);
4711 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4714 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4715 quotient, 1, OPTAB_LIB_WIDEN);
4716 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4717 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4718 remainder, 1, OPTAB_LIB_WIDEN);
4720 tem = plus_constant (op1, -1);
4721 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4722 build_int_cst (NULL_TREE, 1),
4724 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4725 expand_inc (quotient, const1_rtx);
4726 expand_dec (remainder, op1);
4731 rtx abs_rem, abs_op1, tem, mask;
4733 label = gen_label_rtx ();
4734 quotient = gen_reg_rtx (compute_mode);
4735 remainder = gen_reg_rtx (compute_mode);
4736 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4739 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4740 quotient, 0, OPTAB_LIB_WIDEN);
4741 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4742 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4743 remainder, 0, OPTAB_LIB_WIDEN);
4745 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4746 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4747 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4748 build_int_cst (NULL_TREE, 1),
4750 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4751 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4752 NULL_RTX, 0, OPTAB_WIDEN);
4753 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4754 build_int_cst (NULL_TREE, size - 1),
4756 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4757 NULL_RTX, 0, OPTAB_WIDEN);
4758 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4759 NULL_RTX, 0, OPTAB_WIDEN);
4760 expand_inc (quotient, tem);
4761 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4762 NULL_RTX, 0, OPTAB_WIDEN);
4763 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4764 NULL_RTX, 0, OPTAB_WIDEN);
4765 expand_dec (remainder, tem);
4768 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4776 if (target && GET_MODE (target) != compute_mode)
4781 /* Try to produce the remainder without producing the quotient.
4782 If we seem to have a divmod pattern that does not require widening,
4783 don't try widening here. We should really have a WIDEN argument
4784 to expand_twoval_binop, since what we'd really like to do here is
4785 1) try a mod insn in compute_mode
4786 2) try a divmod insn in compute_mode
4787 3) try a div insn in compute_mode and multiply-subtract to get
4789 4) try the same things with widening allowed. */
4791 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4794 ((optab_handler (optab2, compute_mode)->insn_code
4795 != CODE_FOR_nothing)
4796 ? OPTAB_DIRECT : OPTAB_WIDEN));
4799 /* No luck there. Can we do remainder and divide at once
4800 without a library call? */
4801 remainder = gen_reg_rtx (compute_mode);
4802 if (! expand_twoval_binop ((unsignedp
4806 NULL_RTX, remainder, unsignedp))
4811 return gen_lowpart (mode, remainder);
4814 /* Produce the quotient. Try a quotient insn, but not a library call.
4815 If we have a divmod in this mode, use it in preference to widening
4816 the div (for this test we assume it will not fail). Note that optab2
4817 is set to the one of the two optabs that the call below will use. */
4819 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4820 op0, op1, rem_flag ? NULL_RTX : target,
4822 ((optab_handler (optab2, compute_mode)->insn_code
4823 != CODE_FOR_nothing)
4824 ? OPTAB_DIRECT : OPTAB_WIDEN));
4828 /* No luck there. Try a quotient-and-remainder insn,
4829 keeping the quotient alone. */
4830 quotient = gen_reg_rtx (compute_mode);
4831 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4833 quotient, NULL_RTX, unsignedp))
4837 /* Still no luck. If we are not computing the remainder,
4838 use a library call for the quotient. */
4839 quotient = sign_expand_binop (compute_mode,
4840 udiv_optab, sdiv_optab,
4842 unsignedp, OPTAB_LIB_WIDEN);
4849 if (target && GET_MODE (target) != compute_mode)
4854 /* No divide instruction either. Use library for remainder. */
4855 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4857 unsignedp, OPTAB_LIB_WIDEN);
4858 /* No remainder function. Try a quotient-and-remainder
4859 function, keeping the remainder. */
4862 remainder = gen_reg_rtx (compute_mode);
4863 if (!expand_twoval_binop_libfunc
4864 (unsignedp ? udivmod_optab : sdivmod_optab,
4866 NULL_RTX, remainder,
4867 unsignedp ? UMOD : MOD))
4868 remainder = NULL_RTX;
4873 /* We divided. Now finish doing X - Y * (X / Y). */
4874 remainder = expand_mult (compute_mode, quotient, op1,
4875 NULL_RTX, unsignedp);
4876 remainder = expand_binop (compute_mode, sub_optab, op0,
4877 remainder, target, unsignedp,
4882 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4885 /* Return a tree node with data type TYPE, describing the value of X.
4886 Usually this is an VAR_DECL, if there is no obvious better choice.
4887 X may be an expression, however we only support those expressions
4888 generated by loop.c. */
4891 make_tree (tree type, rtx x)
4895 switch (GET_CODE (x))
4899 HOST_WIDE_INT hi = 0;
4902 && !(TYPE_UNSIGNED (type)
4903 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4904 < HOST_BITS_PER_WIDE_INT)))
4907 t = build_int_cst_wide (type, INTVAL (x), hi);
4913 if (GET_MODE (x) == VOIDmode)
4914 t = build_int_cst_wide (type,
4915 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4920 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4921 t = build_real (type, d);
4928 int units = CONST_VECTOR_NUNITS (x);
4929 tree itype = TREE_TYPE (type);
4934 /* Build a tree with vector elements. */
4935 for (i = units - 1; i >= 0; --i)
4937 rtx elt = CONST_VECTOR_ELT (x, i);
4938 t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4941 return build_vector (type, t);
4945 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4946 make_tree (type, XEXP (x, 1)));
4949 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4950 make_tree (type, XEXP (x, 1)));
4953 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4956 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4957 make_tree (type, XEXP (x, 1)));
4960 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4961 make_tree (type, XEXP (x, 1)));
4964 t = unsigned_type_for (type);
4965 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4966 make_tree (t, XEXP (x, 0)),
4967 make_tree (type, XEXP (x, 1))));
4970 t = signed_type_for (type);
4971 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4972 make_tree (t, XEXP (x, 0)),
4973 make_tree (type, XEXP (x, 1))));
4976 if (TREE_CODE (type) != REAL_TYPE)
4977 t = signed_type_for (type);
4981 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4982 make_tree (t, XEXP (x, 0)),
4983 make_tree (t, XEXP (x, 1))));
4985 t = unsigned_type_for (type);
4986 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4987 make_tree (t, XEXP (x, 0)),
4988 make_tree (t, XEXP (x, 1))));
4992 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4993 GET_CODE (x) == ZERO_EXTEND);
4994 return fold_convert (type, make_tree (t, XEXP (x, 0)));
4997 return make_tree (type, XEXP (x, 0));
5000 t = SYMBOL_REF_DECL (x);
5002 return fold_convert (type, build_fold_addr_expr (t));
5003 /* else fall through. */
5006 t = build_decl (VAR_DECL, NULL_TREE, type);
5008 /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
5009 ptr_mode. So convert. */
5010 if (POINTER_TYPE_P (type))
5011 x = convert_memory_address (TYPE_MODE (type), x);
5013 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5014 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5015 t->decl_with_rtl.rtl = x;
5021 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5022 and returning TARGET.
5024 If TARGET is 0, a pseudo-register or constant is returned. */
5027 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5031 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5032 tem = simplify_binary_operation (AND, mode, op0, op1);
5034 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5038 else if (tem != target)
5039 emit_move_insn (target, tem);
5043 /* Helper function for emit_store_flag. */
5045 emit_store_flag_1 (rtx target, rtx subtarget, enum machine_mode mode,
5049 enum machine_mode target_mode = GET_MODE (target);
5051 /* If we are converting to a wider mode, first convert to
5052 TARGET_MODE, then normalize. This produces better combining
5053 opportunities on machines that have a SIGN_EXTRACT when we are
5054 testing a single bit. This mostly benefits the 68k.
5056 If STORE_FLAG_VALUE does not have the sign bit set when
5057 interpreted in MODE, we can do this conversion as unsigned, which
5058 is usually more efficient. */
5059 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5061 convert_move (target, subtarget,
5062 (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5063 && 0 == (STORE_FLAG_VALUE
5064 & ((HOST_WIDE_INT) 1
5065 << (GET_MODE_BITSIZE (mode) -1))));
5072 /* If we want to keep subexpressions around, don't reuse our last
5077 /* Now normalize to the proper value in MODE. Sometimes we don't
5078 have to do anything. */
5079 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5081 /* STORE_FLAG_VALUE might be the most negative number, so write
5082 the comparison this way to avoid a compiler-time warning. */
5083 else if (- normalizep == STORE_FLAG_VALUE)
5084 op0 = expand_unop (mode, neg_optab, op0, subtarget, 0);
5086 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5087 it hard to use a value of just the sign bit due to ANSI integer
5088 constant typing rules. */
5089 else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5090 && (STORE_FLAG_VALUE
5091 & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))))
5092 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5093 size_int (GET_MODE_BITSIZE (mode) - 1), subtarget,
5097 gcc_assert (STORE_FLAG_VALUE & 1);
5099 op0 = expand_and (mode, op0, const1_rtx, subtarget);
5100 if (normalizep == -1)
5101 op0 = expand_unop (mode, neg_optab, op0, op0, 0);
5104 /* If we were converting to a smaller mode, do the conversion now. */
5105 if (target_mode != mode)
5107 convert_move (target, op0, 0);
5114 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5115 and storing in TARGET. Normally return TARGET.
5116 Return 0 if that cannot be done.
5118 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5119 it is VOIDmode, they cannot both be CONST_INT.
5121 UNSIGNEDP is for the case where we have to widen the operands
5122 to perform the operation. It says to use zero-extension.
5124 NORMALIZEP is 1 if we should convert the result to be either zero
5125 or one. Normalize is -1 if we should convert the result to be
5126 either zero or -1. If NORMALIZEP is zero, the result will be left
5127 "raw" out of the scc insn. */
5130 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5131 enum machine_mode mode, int unsignedp, int normalizep)
5134 enum insn_code icode;
5135 enum machine_mode compare_mode;
5136 enum machine_mode target_mode = GET_MODE (target);
5138 rtx last = get_last_insn ();
5139 rtx pattern, comparison;
5142 code = unsigned_condition (code);
5144 /* If one operand is constant, make it the second one. Only do this
5145 if the other operand is not constant as well. */
5147 if (swap_commutative_operands_p (op0, op1))
5152 code = swap_condition (code);
5155 if (mode == VOIDmode)
5156 mode = GET_MODE (op0);
5158 /* For some comparisons with 1 and -1, we can convert this to
5159 comparisons with zero. This will often produce more opportunities for
5160 store-flag insns. */
5165 if (op1 == const1_rtx)
5166 op1 = const0_rtx, code = LE;
5169 if (op1 == constm1_rtx)
5170 op1 = const0_rtx, code = LT;
5173 if (op1 == const1_rtx)
5174 op1 = const0_rtx, code = GT;
5177 if (op1 == constm1_rtx)
5178 op1 = const0_rtx, code = GE;
5181 if (op1 == const1_rtx)
5182 op1 = const0_rtx, code = NE;
5185 if (op1 == const1_rtx)
5186 op1 = const0_rtx, code = EQ;
5192 /* If we are comparing a double-word integer with zero or -1, we can
5193 convert the comparison into one involving a single word. */
5194 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5195 && GET_MODE_CLASS (mode) == MODE_INT
5196 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5198 if ((code == EQ || code == NE)
5199 && (op1 == const0_rtx || op1 == constm1_rtx))
5201 rtx op00, op01, op0both;
5203 /* Do a logical OR or AND of the two words and compare the
5205 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5206 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5207 op0both = expand_binop (word_mode,
5208 op1 == const0_rtx ? ior_optab : and_optab,
5209 op00, op01, NULL_RTX, unsignedp,
5213 return emit_store_flag (target, code, op0both, op1, word_mode,
5214 unsignedp, normalizep);
5216 else if ((code == LT || code == GE) && op1 == const0_rtx)
5220 /* If testing the sign bit, can just test on high word. */
5221 op0h = simplify_gen_subreg (word_mode, op0, mode,
5222 subreg_highpart_offset (word_mode,
5224 return emit_store_flag (target, code, op0h, op1, word_mode,
5225 unsignedp, normalizep);
5229 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5230 complement of A (for GE) and shifting the sign bit to the low bit. */
5231 if (op1 == const0_rtx && (code == LT || code == GE)
5232 && GET_MODE_CLASS (mode) == MODE_INT
5233 && (normalizep || STORE_FLAG_VALUE == 1
5234 || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5235 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5236 == ((unsigned HOST_WIDE_INT) 1
5237 << (GET_MODE_BITSIZE (mode) - 1))))))
5241 /* If the result is to be wider than OP0, it is best to convert it
5242 first. If it is to be narrower, it is *incorrect* to convert it
5244 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5246 op0 = convert_modes (target_mode, mode, op0, 0);
5250 if (target_mode != mode)
5254 op0 = expand_unop (mode, one_cmpl_optab, op0,
5255 ((STORE_FLAG_VALUE == 1 || normalizep)
5256 ? 0 : subtarget), 0);
5258 if (STORE_FLAG_VALUE == 1 || normalizep)
5259 /* If we are supposed to produce a 0/1 value, we want to do
5260 a logical shift from the sign bit to the low-order bit; for
5261 a -1/0 value, we do an arithmetic shift. */
5262 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5263 size_int (GET_MODE_BITSIZE (mode) - 1),
5264 subtarget, normalizep != -1);
5266 if (mode != target_mode)
5267 op0 = convert_modes (target_mode, mode, op0, 0);
5272 icode = setcc_gen_code[(int) code];
5274 if (icode != CODE_FOR_nothing)
5276 insn_operand_predicate_fn pred;
5278 /* We think we may be able to do this with a scc insn. Emit the
5279 comparison and then the scc insn. */
5281 do_pending_stack_adjust ();
5282 last = get_last_insn ();
5285 = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5286 if (CONSTANT_P (comparison))
5288 switch (GET_CODE (comparison))
5291 if (comparison == const0_rtx)
5295 #ifdef FLOAT_STORE_FLAG_VALUE
5297 if (comparison == CONST0_RTX (GET_MODE (comparison)))
5305 if (normalizep == 1)
5307 if (normalizep == -1)
5309 return const_true_rtx;
5312 /* The code of COMPARISON may not match CODE if compare_from_rtx
5313 decided to swap its operands and reverse the original code.
5315 We know that compare_from_rtx returns either a CONST_INT or
5316 a new comparison code, so it is safe to just extract the
5317 code from COMPARISON. */
5318 code = GET_CODE (comparison);
5320 /* Get a reference to the target in the proper mode for this insn. */
5321 compare_mode = insn_data[(int) icode].operand[0].mode;
5323 pred = insn_data[(int) icode].operand[0].predicate;
5324 if (optimize || ! (*pred) (subtarget, compare_mode))
5325 subtarget = gen_reg_rtx (compare_mode);
5327 pattern = GEN_FCN (icode) (subtarget);
5330 emit_insn (pattern);
5331 return emit_store_flag_1 (target, subtarget, compare_mode,
5337 /* We don't have an scc insn, so try a cstore insn. */
5339 for (compare_mode = mode; compare_mode != VOIDmode;
5340 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5342 icode = optab_handler (cstore_optab, compare_mode)->insn_code;
5343 if (icode != CODE_FOR_nothing)
5347 if (icode != CODE_FOR_nothing)
5349 enum machine_mode result_mode
5350 = insn_data[(int) icode].operand[0].mode;
5351 rtx cstore_op0 = op0;
5352 rtx cstore_op1 = op1;
5354 do_pending_stack_adjust ();
5355 last = get_last_insn ();
5357 if (compare_mode != mode)
5359 cstore_op0 = convert_modes (compare_mode, mode, cstore_op0,
5361 cstore_op1 = convert_modes (compare_mode, mode, cstore_op1,
5365 if (!insn_data[(int) icode].operand[2].predicate (cstore_op0,
5367 cstore_op0 = copy_to_mode_reg (compare_mode, cstore_op0);
5369 if (!insn_data[(int) icode].operand[3].predicate (cstore_op1,
5371 cstore_op1 = copy_to_mode_reg (compare_mode, cstore_op1);
5373 comparison = gen_rtx_fmt_ee (code, result_mode, cstore_op0,
5377 if (optimize || !(insn_data[(int) icode].operand[0].predicate
5378 (subtarget, result_mode)))
5379 subtarget = gen_reg_rtx (result_mode);
5381 pattern = GEN_FCN (icode) (subtarget, comparison, cstore_op0,
5386 emit_insn (pattern);
5387 return emit_store_flag_1 (target, subtarget, result_mode,
5393 delete_insns_since (last);
5395 /* If optimizing, use different pseudo registers for each insn, instead
5396 of reusing the same pseudo. This leads to better CSE, but slows
5397 down the compiler, since there are more pseudos */
5398 subtarget = (!optimize
5399 && (target_mode == mode)) ? target : NULL_RTX;
5401 /* If we reached here, we can't do this with a scc insn. However, there
5402 are some comparisons that can be done directly. For example, if
5403 this is an equality comparison of integers, we can try to exclusive-or
5404 (or subtract) the two operands and use a recursive call to try the
5405 comparison with zero. Don't do any of these cases if branches are
5408 if (BRANCH_COST (optimize_insn_for_speed_p (),
5410 && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5411 && op1 != const0_rtx)
5413 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5417 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5420 tem = emit_store_flag (target, code, tem, const0_rtx,
5421 mode, unsignedp, normalizep);
5423 delete_insns_since (last);
5427 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5428 the constant zero. Reject all other comparisons at this point. Only
5429 do LE and GT if branches are expensive since they are expensive on
5430 2-operand machines. */
5432 if (BRANCH_COST (optimize_insn_for_speed_p (),
5434 || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5435 || (code != EQ && code != NE
5436 && (BRANCH_COST (optimize_insn_for_speed_p (),
5437 false) <= 1 || (code != LE && code != GT))))
5440 /* See what we need to return. We can only return a 1, -1, or the
5443 if (normalizep == 0)
5445 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5446 normalizep = STORE_FLAG_VALUE;
5448 else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5449 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5450 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5456 /* Try to put the result of the comparison in the sign bit. Assume we can't
5457 do the necessary operation below. */
5461 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5462 the sign bit set. */
5466 /* This is destructive, so SUBTARGET can't be OP0. */
5467 if (rtx_equal_p (subtarget, op0))
5470 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5473 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5477 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5478 number of bits in the mode of OP0, minus one. */
5482 if (rtx_equal_p (subtarget, op0))
5485 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5486 size_int (GET_MODE_BITSIZE (mode) - 1),
5488 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5492 if (code == EQ || code == NE)
5494 /* For EQ or NE, one way to do the comparison is to apply an operation
5495 that converts the operand into a positive number if it is nonzero
5496 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5497 for NE we negate. This puts the result in the sign bit. Then we
5498 normalize with a shift, if needed.
5500 Two operations that can do the above actions are ABS and FFS, so try
5501 them. If that doesn't work, and MODE is smaller than a full word,
5502 we can use zero-extension to the wider mode (an unsigned conversion)
5503 as the operation. */
5505 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5506 that is compensated by the subsequent overflow when subtracting
5509 if (optab_handler (abs_optab, mode)->insn_code != CODE_FOR_nothing)
5510 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5511 else if (optab_handler (ffs_optab, mode)->insn_code != CODE_FOR_nothing)
5512 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5513 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5515 tem = convert_modes (word_mode, mode, op0, 1);
5522 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5525 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5528 /* If we couldn't do it that way, for NE we can "or" the two's complement
5529 of the value with itself. For EQ, we take the one's complement of
5530 that "or", which is an extra insn, so we only handle EQ if branches
5535 || BRANCH_COST (optimize_insn_for_speed_p (),
5538 if (rtx_equal_p (subtarget, op0))
5541 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5542 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5545 if (tem && code == EQ)
5546 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5550 if (tem && normalizep)
5551 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5552 size_int (GET_MODE_BITSIZE (mode) - 1),
5553 subtarget, normalizep == 1);
5557 if (GET_MODE (tem) != target_mode)
5559 convert_move (target, tem, 0);
5562 else if (!subtarget)
5564 emit_move_insn (target, tem);
5569 delete_insns_since (last);
5574 /* Like emit_store_flag, but always succeeds. */
5577 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5578 enum machine_mode mode, int unsignedp, int normalizep)
5582 /* First see if emit_store_flag can do the job. */
5583 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5587 if (normalizep == 0)
5590 /* If this failed, we have to do this with set/compare/jump/set code. */
5593 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5594 target = gen_reg_rtx (GET_MODE (target));
5596 emit_move_insn (target, const1_rtx);
5597 label = gen_label_rtx ();
5598 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5601 emit_move_insn (target, const0_rtx);
5607 /* Perform possibly multi-word comparison and conditional jump to LABEL
5608 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5609 now a thin wrapper around do_compare_rtx_and_jump. */
5612 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5615 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5616 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5617 NULL_RTX, NULL_RTX, label);