1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
6 Free Software Foundation, Inc.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
29 #include "diagnostic-core.h"
34 #include "insn-config.h"
38 #include "langhooks.h"
43 struct target_expmed default_target_expmed;
45 struct target_expmed *this_target_expmed = &default_target_expmed;
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx mask_rtx (enum machine_mode, int, int, int);
64 static rtx lshift_value (enum machine_mode, rtx, int, int);
65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, int);
67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
71 /* Test whether a value is zero of a power of two. */
72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
74 #ifndef SLOW_UNALIGNED_ACCESS
75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
79 /* Reduce conditional compilation elsewhere. */
82 #define CODE_FOR_insv CODE_FOR_nothing
83 #define gen_insv(a,b,c,d) NULL_RTX
87 #define CODE_FOR_extv CODE_FOR_nothing
88 #define gen_extv(a,b,c,d) NULL_RTX
92 #define CODE_FOR_extzv CODE_FOR_nothing
93 #define gen_extzv(a,b,c,d) NULL_RTX
101 struct rtx_def reg; rtunion reg_fld[2];
102 struct rtx_def plus; rtunion plus_fld1;
104 struct rtx_def mult; rtunion mult_fld1;
105 struct rtx_def sdiv; rtunion sdiv_fld1;
106 struct rtx_def udiv; rtunion udiv_fld1;
108 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
109 struct rtx_def smod_32; rtunion smod_32_fld1;
110 struct rtx_def wide_mult; rtunion wide_mult_fld1;
111 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
112 struct rtx_def wide_trunc;
113 struct rtx_def shift; rtunion shift_fld1;
114 struct rtx_def shift_mult; rtunion shift_mult_fld1;
115 struct rtx_def shift_add; rtunion shift_add_fld1;
116 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
117 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
120 rtx pow2[MAX_BITS_PER_WORD];
121 rtx cint[MAX_BITS_PER_WORD];
123 enum machine_mode mode, wider_mode;
127 for (m = 1; m < MAX_BITS_PER_WORD; m++)
129 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
130 cint[m] = GEN_INT (m);
132 memset (&all, 0, sizeof all);
134 PUT_CODE (&all.reg, REG);
135 /* Avoid using hard regs in ways which may be unsupported. */
136 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
138 PUT_CODE (&all.plus, PLUS);
139 XEXP (&all.plus, 0) = &all.reg;
140 XEXP (&all.plus, 1) = &all.reg;
142 PUT_CODE (&all.neg, NEG);
143 XEXP (&all.neg, 0) = &all.reg;
145 PUT_CODE (&all.mult, MULT);
146 XEXP (&all.mult, 0) = &all.reg;
147 XEXP (&all.mult, 1) = &all.reg;
149 PUT_CODE (&all.sdiv, DIV);
150 XEXP (&all.sdiv, 0) = &all.reg;
151 XEXP (&all.sdiv, 1) = &all.reg;
153 PUT_CODE (&all.udiv, UDIV);
154 XEXP (&all.udiv, 0) = &all.reg;
155 XEXP (&all.udiv, 1) = &all.reg;
157 PUT_CODE (&all.sdiv_32, DIV);
158 XEXP (&all.sdiv_32, 0) = &all.reg;
159 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
161 PUT_CODE (&all.smod_32, MOD);
162 XEXP (&all.smod_32, 0) = &all.reg;
163 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
165 PUT_CODE (&all.zext, ZERO_EXTEND);
166 XEXP (&all.zext, 0) = &all.reg;
168 PUT_CODE (&all.wide_mult, MULT);
169 XEXP (&all.wide_mult, 0) = &all.zext;
170 XEXP (&all.wide_mult, 1) = &all.zext;
172 PUT_CODE (&all.wide_lshr, LSHIFTRT);
173 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
175 PUT_CODE (&all.wide_trunc, TRUNCATE);
176 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
178 PUT_CODE (&all.shift, ASHIFT);
179 XEXP (&all.shift, 0) = &all.reg;
181 PUT_CODE (&all.shift_mult, MULT);
182 XEXP (&all.shift_mult, 0) = &all.reg;
184 PUT_CODE (&all.shift_add, PLUS);
185 XEXP (&all.shift_add, 0) = &all.shift_mult;
186 XEXP (&all.shift_add, 1) = &all.reg;
188 PUT_CODE (&all.shift_sub0, MINUS);
189 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
190 XEXP (&all.shift_sub0, 1) = &all.reg;
192 PUT_CODE (&all.shift_sub1, MINUS);
193 XEXP (&all.shift_sub1, 0) = &all.reg;
194 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
196 for (speed = 0; speed < 2; speed++)
198 crtl->maybe_hot_insn_p = speed;
199 zero_cost[speed] = set_src_cost (const0_rtx, speed);
201 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
203 mode = GET_MODE_WIDER_MODE (mode))
205 PUT_MODE (&all.reg, mode);
206 PUT_MODE (&all.plus, mode);
207 PUT_MODE (&all.neg, mode);
208 PUT_MODE (&all.mult, mode);
209 PUT_MODE (&all.sdiv, mode);
210 PUT_MODE (&all.udiv, mode);
211 PUT_MODE (&all.sdiv_32, mode);
212 PUT_MODE (&all.smod_32, mode);
213 PUT_MODE (&all.wide_trunc, mode);
214 PUT_MODE (&all.shift, mode);
215 PUT_MODE (&all.shift_mult, mode);
216 PUT_MODE (&all.shift_add, mode);
217 PUT_MODE (&all.shift_sub0, mode);
218 PUT_MODE (&all.shift_sub1, mode);
220 add_cost[speed][mode] = set_src_cost (&all.plus, speed);
221 neg_cost[speed][mode] = set_src_cost (&all.neg, speed);
222 mul_cost[speed][mode] = set_src_cost (&all.mult, speed);
223 sdiv_cost[speed][mode] = set_src_cost (&all.sdiv, speed);
224 udiv_cost[speed][mode] = set_src_cost (&all.udiv, speed);
226 sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all.sdiv_32, speed)
227 <= 2 * add_cost[speed][mode]);
228 smod_pow2_cheap[speed][mode] = (set_src_cost (&all.smod_32, speed)
229 <= 4 * add_cost[speed][mode]);
231 wider_mode = GET_MODE_WIDER_MODE (mode);
232 if (wider_mode != VOIDmode)
234 PUT_MODE (&all.zext, wider_mode);
235 PUT_MODE (&all.wide_mult, wider_mode);
236 PUT_MODE (&all.wide_lshr, wider_mode);
237 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
239 mul_widen_cost[speed][wider_mode]
240 = set_src_cost (&all.wide_mult, speed);
241 mul_highpart_cost[speed][mode]
242 = set_src_cost (&all.wide_trunc, speed);
245 shift_cost[speed][mode][0] = 0;
246 shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
247 = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
249 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
250 for (m = 1; m < n; m++)
252 XEXP (&all.shift, 1) = cint[m];
253 XEXP (&all.shift_mult, 1) = pow2[m];
255 shift_cost[speed][mode][m] = set_src_cost (&all.shift, speed);
256 shiftadd_cost[speed][mode][m] = set_src_cost (&all.shift_add,
258 shiftsub0_cost[speed][mode][m] = set_src_cost (&all.shift_sub0,
260 shiftsub1_cost[speed][mode][m] = set_src_cost (&all.shift_sub1,
266 memset (alg_hash, 0, sizeof (alg_hash));
268 alg_hash_used_p = true;
269 default_rtl_profile ();
272 /* Return an rtx representing minus the value of X.
273 MODE is the intended mode of the result,
274 useful if X is a CONST_INT. */
277 negate_rtx (enum machine_mode mode, rtx x)
279 rtx result = simplify_unary_operation (NEG, mode, x, mode);
282 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
287 /* Report on the availability of insv/extv/extzv and the desired mode
288 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
289 is false; else the mode of the specified operand. If OPNO is -1,
290 all the caller cares about is whether the insn is available. */
292 mode_for_extraction (enum extraction_pattern pattern, int opno)
294 const struct insn_data_d *data;
301 data = &insn_data[CODE_FOR_insv];
304 return MAX_MACHINE_MODE;
309 data = &insn_data[CODE_FOR_extv];
312 return MAX_MACHINE_MODE;
317 data = &insn_data[CODE_FOR_extzv];
320 return MAX_MACHINE_MODE;
329 /* Everyone who uses this function used to follow it with
330 if (result == VOIDmode) result = word_mode; */
331 if (data->operand[opno].mode == VOIDmode)
333 return data->operand[opno].mode;
336 /* A subroutine of store_bit_field, with the same arguments. Return true
337 if the operation could be implemented.
339 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
340 no other way of implementing the operation. If FALLBACK_P is false,
341 return false instead. */
344 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
345 unsigned HOST_WIDE_INT bitnum,
346 unsigned HOST_WIDE_INT bitregion_start,
347 unsigned HOST_WIDE_INT bitregion_end,
348 enum machine_mode fieldmode,
349 rtx value, bool fallback_p)
352 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
353 unsigned HOST_WIDE_INT offset, bitpos;
358 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
360 while (GET_CODE (op0) == SUBREG)
362 /* The following line once was done only if WORDS_BIG_ENDIAN,
363 but I think that is a mistake. WORDS_BIG_ENDIAN is
364 meaningful at a much higher level; when structures are copied
365 between memory and regs, the higher-numbered regs
366 always get higher addresses. */
367 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
368 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
372 /* Paradoxical subregs need special handling on big endian machines. */
373 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
375 int difference = inner_mode_size - outer_mode_size;
377 if (WORDS_BIG_ENDIAN)
378 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
379 if (BYTES_BIG_ENDIAN)
380 byte_offset += difference % UNITS_PER_WORD;
383 byte_offset = SUBREG_BYTE (op0);
385 bitnum += byte_offset * BITS_PER_UNIT;
386 op0 = SUBREG_REG (op0);
389 /* No action is needed if the target is a register and if the field
390 lies completely outside that register. This can occur if the source
391 code contains an out-of-bounds access to a small array. */
392 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
395 /* Use vec_set patterns for inserting parts of vectors whenever
397 if (VECTOR_MODE_P (GET_MODE (op0))
399 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
400 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
401 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
402 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
404 struct expand_operand ops[3];
405 enum machine_mode outermode = GET_MODE (op0);
406 enum machine_mode innermode = GET_MODE_INNER (outermode);
407 enum insn_code icode = optab_handler (vec_set_optab, outermode);
408 int pos = bitnum / GET_MODE_BITSIZE (innermode);
410 create_fixed_operand (&ops[0], op0);
411 create_input_operand (&ops[1], value, innermode);
412 create_integer_operand (&ops[2], pos);
413 if (maybe_expand_insn (icode, 3, ops))
417 /* If the target is a register, overwriting the entire object, or storing
418 a full-word or multi-word field can be done with just a SUBREG.
420 If the target is memory, storing any naturally aligned field can be
421 done with a simple store. For targets that support fast unaligned
422 memory, any naturally sized, unit aligned field can be done directly. */
424 offset = bitnum / unit;
425 bitpos = bitnum % unit;
426 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
427 + (offset * UNITS_PER_WORD);
430 && bitsize == GET_MODE_BITSIZE (fieldmode)
432 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
433 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
434 && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
435 || validate_subreg (fieldmode, GET_MODE (op0), op0,
437 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
438 || (offset * BITS_PER_UNIT % bitsize == 0
439 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
442 op0 = adjust_address (op0, fieldmode, offset);
443 else if (GET_MODE (op0) != fieldmode)
444 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
446 emit_move_insn (op0, value);
450 /* Make sure we are playing with integral modes. Pun with subregs
451 if we aren't. This must come after the entire register case above,
452 since that case is valid for any mode. The following cases are only
453 valid for integral modes. */
455 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
456 if (imode != GET_MODE (op0))
459 op0 = adjust_address (op0, imode, 0);
462 gcc_assert (imode != BLKmode);
463 op0 = gen_lowpart (imode, op0);
468 /* We may be accessing data outside the field, which means
469 we can alias adjacent data. */
470 /* ?? not always for C++0x memory model ?? */
473 op0 = shallow_copy_rtx (op0);
474 set_mem_alias_set (op0, 0);
475 set_mem_expr (op0, 0);
478 /* If OP0 is a register, BITPOS must count within a word.
479 But as we have it, it counts within whatever size OP0 now has.
480 On a bigendian machine, these are not the same, so convert. */
483 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
484 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
486 /* Storing an lsb-aligned field in a register
487 can be done with a movestrict instruction. */
490 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
491 && bitsize == GET_MODE_BITSIZE (fieldmode)
492 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
494 struct expand_operand ops[2];
495 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
497 unsigned HOST_WIDE_INT subreg_off;
499 if (GET_CODE (arg0) == SUBREG)
501 /* Else we've got some float mode source being extracted into
502 a different float mode destination -- this combination of
503 subregs results in Severe Tire Damage. */
504 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
505 || GET_MODE_CLASS (fieldmode) == MODE_INT
506 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
507 arg0 = SUBREG_REG (arg0);
510 subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
511 + (offset * UNITS_PER_WORD);
512 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
514 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
516 create_fixed_operand (&ops[0], arg0);
517 /* Shrink the source operand to FIELDMODE. */
518 create_convert_operand_to (&ops[1], value, fieldmode, false);
519 if (maybe_expand_insn (icode, 2, ops))
524 /* Handle fields bigger than a word. */
526 if (bitsize > BITS_PER_WORD)
528 /* Here we transfer the words of the field
529 in the order least significant first.
530 This is because the most significant word is the one which may
532 However, only do that if the value is not BLKmode. */
534 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
535 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
539 /* This is the mode we must force value to, so that there will be enough
540 subwords to extract. Note that fieldmode will often (always?) be
541 VOIDmode, because that is what store_field uses to indicate that this
542 is a bit field, but passing VOIDmode to operand_subword_force
544 fieldmode = GET_MODE (value);
545 if (fieldmode == VOIDmode)
546 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
548 last = get_last_insn ();
549 for (i = 0; i < nwords; i++)
551 /* If I is 0, use the low-order word in both field and target;
552 if I is 1, use the next to lowest word; and so on. */
553 unsigned int wordnum = (backwards
554 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
557 unsigned int bit_offset = (backwards
558 ? MAX ((int) bitsize - ((int) i + 1)
561 : (int) i * BITS_PER_WORD);
562 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
563 unsigned HOST_WIDE_INT new_bitsize =
564 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
566 /* If the remaining chunk doesn't have full wordsize we have
567 to make sure that for big endian machines the higher order
569 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
570 value_word = simplify_expand_binop (word_mode, lshr_optab,
572 GEN_INT (BITS_PER_WORD
577 if (!store_bit_field_1 (op0, new_bitsize,
579 bitregion_start, bitregion_end,
581 value_word, fallback_p))
583 delete_insns_since (last);
590 /* From here on we can assume that the field to be stored in is
591 a full-word (whatever type that is), since it is shorter than a word. */
593 /* OFFSET is the number of words or bytes (UNIT says which)
594 from STR_RTX to the first word or byte containing part of the field. */
599 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
603 /* Since this is a destination (lvalue), we can't copy
604 it to a pseudo. We can remove a SUBREG that does not
605 change the size of the operand. Such a SUBREG may
606 have been added above. */
607 gcc_assert (GET_CODE (op0) == SUBREG
608 && (GET_MODE_SIZE (GET_MODE (op0))
609 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
610 op0 = SUBREG_REG (op0);
612 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
613 op0, (offset * UNITS_PER_WORD));
618 /* If VALUE has a floating-point or complex mode, access it as an
619 integer of the corresponding size. This can occur on a machine
620 with 64 bit registers that uses SFmode for float. It can also
621 occur for unaligned float or complex fields. */
623 if (GET_MODE (value) != VOIDmode
624 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
625 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
627 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
628 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
631 /* Now OFFSET is nonzero only if OP0 is memory
632 and is therefore always measured in bytes. */
635 && GET_MODE (value) != BLKmode
637 && GET_MODE_BITSIZE (op_mode) >= bitsize
638 /* Do not use insv for volatile bitfields when
639 -fstrict-volatile-bitfields is in effect. */
640 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
641 && flag_strict_volatile_bitfields > 0)
642 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
643 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
645 struct expand_operand ops[4];
646 int xbitpos = bitpos;
649 rtx last = get_last_insn ();
650 bool copy_back = false;
652 /* Add OFFSET into OP0's address. */
654 xop0 = adjust_address (xop0, byte_mode, offset);
656 /* If xop0 is a register, we need it in OP_MODE
657 to make it acceptable to the format of insv. */
658 if (GET_CODE (xop0) == SUBREG)
659 /* We can't just change the mode, because this might clobber op0,
660 and we will need the original value of op0 if insv fails. */
661 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
662 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
663 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
665 /* If the destination is a paradoxical subreg such that we need a
666 truncate to the inner mode, perform the insertion on a temporary and
667 truncate the result to the original destination. Note that we can't
668 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
669 X) 0)) is (reg:N X). */
670 if (GET_CODE (xop0) == SUBREG
671 && REG_P (SUBREG_REG (xop0))
672 && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
675 rtx tem = gen_reg_rtx (op_mode);
676 emit_move_insn (tem, xop0);
681 /* We have been counting XBITPOS within UNIT.
682 Count instead within the size of the register. */
683 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
684 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
686 unit = GET_MODE_BITSIZE (op_mode);
688 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
689 "backwards" from the size of the unit we are inserting into.
690 Otherwise, we count bits from the most significant on a
691 BYTES/BITS_BIG_ENDIAN machine. */
693 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
694 xbitpos = unit - bitsize - xbitpos;
696 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
698 if (GET_MODE (value) != op_mode)
700 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
702 /* Optimization: Don't bother really extending VALUE
703 if it has all the bits we will actually use. However,
704 if we must narrow it, be sure we do it correctly. */
706 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
710 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
712 tmp = simplify_gen_subreg (op_mode,
713 force_reg (GET_MODE (value),
715 GET_MODE (value), 0);
719 value1 = gen_lowpart (op_mode, value1);
721 else if (CONST_INT_P (value))
722 value1 = gen_int_mode (INTVAL (value), op_mode);
724 /* Parse phase is supposed to make VALUE's data type
725 match that of the component reference, which is a type
726 at least as wide as the field; so VALUE should have
727 a mode that corresponds to that type. */
728 gcc_assert (CONSTANT_P (value));
731 create_fixed_operand (&ops[0], xop0);
732 create_integer_operand (&ops[1], bitsize);
733 create_integer_operand (&ops[2], xbitpos);
734 create_input_operand (&ops[3], value1, op_mode);
735 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
738 convert_move (op0, xop0, true);
741 delete_insns_since (last);
744 /* If OP0 is a memory, try copying it to a register and seeing if a
745 cheap register alternative is available. */
746 if (HAVE_insv && MEM_P (op0))
748 enum machine_mode bestmode;
749 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
752 maxbits = bitregion_end - bitregion_start + 1;
754 /* Get the mode to use for inserting into this field. If OP0 is
755 BLKmode, get the smallest mode consistent with the alignment. If
756 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
757 mode. Otherwise, use the smallest mode containing the field. */
759 if (GET_MODE (op0) == BLKmode
760 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
761 || (op_mode != MAX_MACHINE_MODE
762 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
763 bestmode = get_best_mode (bitsize, bitnum,
764 bitregion_start, bitregion_end,
766 (op_mode == MAX_MACHINE_MODE
767 ? VOIDmode : op_mode),
768 MEM_VOLATILE_P (op0));
770 bestmode = GET_MODE (op0);
772 if (bestmode != VOIDmode
773 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
774 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
775 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
777 rtx last, tempreg, xop0;
778 unsigned HOST_WIDE_INT xoffset, xbitpos;
780 last = get_last_insn ();
782 /* Adjust address to point to the containing unit of
783 that mode. Compute the offset as a multiple of this unit,
784 counting in bytes. */
785 unit = GET_MODE_BITSIZE (bestmode);
786 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
787 xbitpos = bitnum % unit;
788 xop0 = adjust_address (op0, bestmode, xoffset);
790 /* Fetch that unit, store the bitfield in it, then store
792 tempreg = copy_to_reg (xop0);
793 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
794 bitregion_start, bitregion_end,
795 fieldmode, orig_value, false))
797 emit_move_insn (xop0, tempreg);
800 delete_insns_since (last);
807 store_fixed_bit_field (op0, offset, bitsize, bitpos,
808 bitregion_start, bitregion_end, value);
812 /* Generate code to store value from rtx VALUE
813 into a bit-field within structure STR_RTX
814 containing BITSIZE bits starting at bit BITNUM.
816 BITREGION_START is bitpos of the first bitfield in this region.
817 BITREGION_END is the bitpos of the ending bitfield in this region.
818 These two fields are 0, if the C++ memory model does not apply,
819 or we are not interested in keeping track of bitfield regions.
821 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
824 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
825 unsigned HOST_WIDE_INT bitnum,
826 unsigned HOST_WIDE_INT bitregion_start,
827 unsigned HOST_WIDE_INT bitregion_end,
828 enum machine_mode fieldmode,
831 /* Under the C++0x memory model, we must not touch bits outside the
832 bit region. Adjust the address to start at the beginning of the
834 if (MEM_P (str_rtx) && bitregion_start > 0)
836 enum machine_mode bestmode;
837 enum machine_mode op_mode;
838 unsigned HOST_WIDE_INT offset;
840 op_mode = mode_for_extraction (EP_insv, 3);
841 if (op_mode == MAX_MACHINE_MODE)
844 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
846 offset = bitregion_start / BITS_PER_UNIT;
847 bitnum -= bitregion_start;
848 bitregion_end -= bitregion_start;
850 bestmode = get_best_mode (bitsize, bitnum,
851 bitregion_start, bitregion_end,
854 MEM_VOLATILE_P (str_rtx));
855 str_rtx = adjust_address (str_rtx, bestmode, offset);
858 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
859 bitregion_start, bitregion_end,
860 fieldmode, value, true))
864 /* Use shifts and boolean operations to store VALUE
865 into a bit field of width BITSIZE
866 in a memory location specified by OP0 except offset by OFFSET bytes.
867 (OFFSET must be 0 if OP0 is a register.)
868 The field starts at position BITPOS within the byte.
869 (If OP0 is a register, it may be a full word or a narrower mode,
870 but BITPOS still counts within a full word,
871 which is significant on bigendian machines.) */
874 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
875 unsigned HOST_WIDE_INT bitsize,
876 unsigned HOST_WIDE_INT bitpos,
877 unsigned HOST_WIDE_INT bitregion_start,
878 unsigned HOST_WIDE_INT bitregion_end,
881 enum machine_mode mode;
882 unsigned int total_bits = BITS_PER_WORD;
887 /* There is a case not handled here:
888 a structure with a known alignment of just a halfword
889 and a field split across two aligned halfwords within the structure.
890 Or likewise a structure with a known alignment of just a byte
891 and a field split across two bytes.
892 Such cases are not supposed to be able to occur. */
894 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
896 gcc_assert (!offset);
897 /* Special treatment for a bit field split across two registers. */
898 if (bitsize + bitpos > BITS_PER_WORD)
900 store_split_bit_field (op0, bitsize, bitpos,
901 bitregion_start, bitregion_end,
908 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
911 maxbits = bitregion_end - bitregion_start + 1;
913 /* Get the proper mode to use for this field. We want a mode that
914 includes the entire field. If such a mode would be larger than
915 a word, we won't be doing the extraction the normal way.
916 We don't want a mode bigger than the destination. */
918 mode = GET_MODE (op0);
919 if (GET_MODE_BITSIZE (mode) == 0
920 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
923 if (MEM_VOLATILE_P (op0)
924 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
925 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
926 && flag_strict_volatile_bitfields > 0)
927 mode = GET_MODE (op0);
929 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
930 bitregion_start, bitregion_end,
931 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
933 if (mode == VOIDmode)
935 /* The only way this should occur is if the field spans word
937 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
938 bitregion_start, bitregion_end, value);
942 total_bits = GET_MODE_BITSIZE (mode);
944 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
945 be in the range 0 to total_bits-1, and put any excess bytes in
947 if (bitpos >= total_bits)
949 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
950 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
954 /* Get ref to an aligned byte, halfword, or word containing the field.
955 Adjust BITPOS to be position within a word,
956 and OFFSET to be the offset of that word.
957 Then alter OP0 to refer to that word. */
958 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
959 offset -= (offset % (total_bits / BITS_PER_UNIT));
960 op0 = adjust_address (op0, mode, offset);
963 mode = GET_MODE (op0);
965 /* Now MODE is either some integral mode for a MEM as OP0,
966 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
967 The bit field is contained entirely within OP0.
968 BITPOS is the starting bit number within OP0.
969 (OP0's mode may actually be narrower than MODE.) */
971 if (BYTES_BIG_ENDIAN)
972 /* BITPOS is the distance between our msb
973 and that of the containing datum.
974 Convert it to the distance from the lsb. */
975 bitpos = total_bits - bitsize - bitpos;
977 /* Now BITPOS is always the distance between our lsb
980 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
981 we must first convert its mode to MODE. */
983 if (CONST_INT_P (value))
985 HOST_WIDE_INT v = INTVAL (value);
987 if (bitsize < HOST_BITS_PER_WIDE_INT)
988 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
992 else if ((bitsize < HOST_BITS_PER_WIDE_INT
993 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
994 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
997 value = lshift_value (mode, value, bitpos, bitsize);
1001 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1002 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
1004 if (GET_MODE (value) != mode)
1005 value = convert_to_mode (mode, value, 1);
1008 value = expand_binop (mode, and_optab, value,
1009 mask_rtx (mode, 0, bitsize, 0),
1010 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1012 value = expand_shift (LSHIFT_EXPR, mode, value,
1013 bitpos, NULL_RTX, 1);
1016 /* Now clear the chosen bits in OP0,
1017 except that if VALUE is -1 we need not bother. */
1018 /* We keep the intermediates in registers to allow CSE to combine
1019 consecutive bitfield assignments. */
1021 temp = force_reg (mode, op0);
1025 temp = expand_binop (mode, and_optab, temp,
1026 mask_rtx (mode, bitpos, bitsize, 1),
1027 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1028 temp = force_reg (mode, temp);
1031 /* Now logical-or VALUE into OP0, unless it is zero. */
1035 temp = expand_binop (mode, ior_optab, temp, value,
1036 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1037 temp = force_reg (mode, temp);
1042 op0 = copy_rtx (op0);
1043 emit_move_insn (op0, temp);
1047 /* Store a bit field that is split across multiple accessible memory objects.
1049 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1050 BITSIZE is the field width; BITPOS the position of its first bit
1052 VALUE is the value to store.
1054 This does not yet handle fields wider than BITS_PER_WORD. */
1057 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1058 unsigned HOST_WIDE_INT bitpos,
1059 unsigned HOST_WIDE_INT bitregion_start,
1060 unsigned HOST_WIDE_INT bitregion_end,
1064 unsigned int bitsdone = 0;
1066 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1068 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1069 unit = BITS_PER_WORD;
1071 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1073 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1074 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1075 that VALUE might be a floating-point constant. */
1076 if (CONSTANT_P (value) && !CONST_INT_P (value))
1078 rtx word = gen_lowpart_common (word_mode, value);
1080 if (word && (value != word))
1083 value = gen_lowpart_common (word_mode,
1084 force_reg (GET_MODE (value) != VOIDmode
1086 : word_mode, value));
1089 while (bitsdone < bitsize)
1091 unsigned HOST_WIDE_INT thissize;
1093 unsigned HOST_WIDE_INT thispos;
1094 unsigned HOST_WIDE_INT offset;
1096 offset = (bitpos + bitsdone) / unit;
1097 thispos = (bitpos + bitsdone) % unit;
1099 /* THISSIZE must not overrun a word boundary. Otherwise,
1100 store_fixed_bit_field will call us again, and we will mutually
1102 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1103 thissize = MIN (thissize, unit - thispos);
1105 if (BYTES_BIG_ENDIAN)
1109 /* We must do an endian conversion exactly the same way as it is
1110 done in extract_bit_field, so that the two calls to
1111 extract_fixed_bit_field will have comparable arguments. */
1112 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1113 total_bits = BITS_PER_WORD;
1115 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1117 /* Fetch successively less significant portions. */
1118 if (CONST_INT_P (value))
1119 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1120 >> (bitsize - bitsdone - thissize))
1121 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1123 /* The args are chosen so that the last part includes the
1124 lsb. Give extract_bit_field the value it needs (with
1125 endianness compensation) to fetch the piece we want. */
1126 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1127 total_bits - bitsize + bitsdone,
1128 NULL_RTX, 1, false);
1132 /* Fetch successively more significant portions. */
1133 if (CONST_INT_P (value))
1134 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1136 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1138 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1139 bitsdone, NULL_RTX, 1, false);
1142 /* If OP0 is a register, then handle OFFSET here.
1144 When handling multiword bitfields, extract_bit_field may pass
1145 down a word_mode SUBREG of a larger REG for a bitfield that actually
1146 crosses a word boundary. Thus, for a SUBREG, we must find
1147 the current word starting from the base register. */
1148 if (GET_CODE (op0) == SUBREG)
1150 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1151 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1152 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1153 word = word_offset ? const0_rtx : op0;
1155 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1156 GET_MODE (SUBREG_REG (op0)));
1159 else if (REG_P (op0))
1161 enum machine_mode op0_mode = GET_MODE (op0);
1162 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1163 word = offset ? const0_rtx : op0;
1165 word = operand_subword_force (op0, offset, GET_MODE (op0));
1171 /* OFFSET is in UNITs, and UNIT is in bits.
1172 store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx,
1173 it is just an out-of-bounds access. Ignore it. */
1174 if (word != const0_rtx)
1175 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1176 thispos, bitregion_start, bitregion_end, part);
1177 bitsdone += thissize;
1181 /* A subroutine of extract_bit_field_1 that converts return value X
1182 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1183 to extract_bit_field. */
1186 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1187 enum machine_mode tmode, bool unsignedp)
1189 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1192 /* If the x mode is not a scalar integral, first convert to the
1193 integer mode of that size and then access it as a floating-point
1194 value via a SUBREG. */
1195 if (!SCALAR_INT_MODE_P (tmode))
1197 enum machine_mode smode;
1199 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1200 x = convert_to_mode (smode, x, unsignedp);
1201 x = force_reg (smode, x);
1202 return gen_lowpart (tmode, x);
1205 return convert_to_mode (tmode, x, unsignedp);
1208 /* A subroutine of extract_bit_field, with the same arguments.
1209 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1210 if we can find no other means of implementing the operation.
1211 if FALLBACK_P is false, return NULL instead. */
1214 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1215 unsigned HOST_WIDE_INT bitnum,
1216 int unsignedp, bool packedp, rtx target,
1217 enum machine_mode mode, enum machine_mode tmode,
1221 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1222 unsigned HOST_WIDE_INT offset, bitpos;
1224 enum machine_mode int_mode;
1225 enum machine_mode ext_mode;
1226 enum machine_mode mode1;
1229 if (tmode == VOIDmode)
1232 while (GET_CODE (op0) == SUBREG)
1234 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1235 op0 = SUBREG_REG (op0);
1238 /* If we have an out-of-bounds access to a register, just return an
1239 uninitialized register of the required mode. This can occur if the
1240 source code contains an out-of-bounds access to a small array. */
1241 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1242 return gen_reg_rtx (tmode);
1245 && mode == GET_MODE (op0)
1247 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1249 /* We're trying to extract a full register from itself. */
1253 /* See if we can get a better vector mode before extracting. */
1254 if (VECTOR_MODE_P (GET_MODE (op0))
1256 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1258 enum machine_mode new_mode;
1260 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1261 new_mode = MIN_MODE_VECTOR_FLOAT;
1262 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1263 new_mode = MIN_MODE_VECTOR_FRACT;
1264 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1265 new_mode = MIN_MODE_VECTOR_UFRACT;
1266 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1267 new_mode = MIN_MODE_VECTOR_ACCUM;
1268 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1269 new_mode = MIN_MODE_VECTOR_UACCUM;
1271 new_mode = MIN_MODE_VECTOR_INT;
1273 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1274 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1275 && targetm.vector_mode_supported_p (new_mode))
1277 if (new_mode != VOIDmode)
1278 op0 = gen_lowpart (new_mode, op0);
1281 /* Use vec_extract patterns for extracting parts of vectors whenever
1283 if (VECTOR_MODE_P (GET_MODE (op0))
1285 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1286 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1287 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1289 struct expand_operand ops[3];
1290 enum machine_mode outermode = GET_MODE (op0);
1291 enum machine_mode innermode = GET_MODE_INNER (outermode);
1292 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1293 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1295 create_output_operand (&ops[0], target, innermode);
1296 create_input_operand (&ops[1], op0, outermode);
1297 create_integer_operand (&ops[2], pos);
1298 if (maybe_expand_insn (icode, 3, ops))
1300 target = ops[0].value;
1301 if (GET_MODE (target) != mode)
1302 return gen_lowpart (tmode, target);
1307 /* Make sure we are playing with integral modes. Pun with subregs
1310 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1311 if (imode != GET_MODE (op0))
1314 op0 = adjust_address (op0, imode, 0);
1315 else if (imode != BLKmode)
1317 op0 = gen_lowpart (imode, op0);
1319 /* If we got a SUBREG, force it into a register since we
1320 aren't going to be able to do another SUBREG on it. */
1321 if (GET_CODE (op0) == SUBREG)
1322 op0 = force_reg (imode, op0);
1324 else if (REG_P (op0))
1327 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1329 reg = gen_reg_rtx (imode);
1330 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1331 emit_move_insn (subreg, op0);
1333 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1337 rtx mem = assign_stack_temp (GET_MODE (op0),
1338 GET_MODE_SIZE (GET_MODE (op0)), 0);
1339 emit_move_insn (mem, op0);
1340 op0 = adjust_address (mem, BLKmode, 0);
1345 /* We may be accessing data outside the field, which means
1346 we can alias adjacent data. */
1349 op0 = shallow_copy_rtx (op0);
1350 set_mem_alias_set (op0, 0);
1351 set_mem_expr (op0, 0);
1354 /* Extraction of a full-word or multi-word value from a structure
1355 in a register or aligned memory can be done with just a SUBREG.
1356 A subword value in the least significant part of a register
1357 can also be extracted with a SUBREG. For this, we need the
1358 byte offset of the value in op0. */
1360 bitpos = bitnum % unit;
1361 offset = bitnum / unit;
1362 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1364 /* If OP0 is a register, BITPOS must count within a word.
1365 But as we have it, it counts within whatever size OP0 now has.
1366 On a bigendian machine, these are not the same, so convert. */
1367 if (BYTES_BIG_ENDIAN
1369 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1370 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1372 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1373 If that's wrong, the solution is to test for it and set TARGET to 0
1376 /* Only scalar integer modes can be converted via subregs. There is an
1377 additional problem for FP modes here in that they can have a precision
1378 which is different from the size. mode_for_size uses precision, but
1379 we want a mode based on the size, so we must avoid calling it for FP
1381 mode1 = (SCALAR_INT_MODE_P (tmode)
1382 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1385 /* If the bitfield is volatile, we need to make sure the access
1386 remains on a type-aligned boundary. */
1387 if (GET_CODE (op0) == MEM
1388 && MEM_VOLATILE_P (op0)
1389 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1390 && flag_strict_volatile_bitfields > 0)
1391 goto no_subreg_mode_swap;
1393 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1394 && bitpos % BITS_PER_WORD == 0)
1395 || (mode1 != BLKmode
1396 /* ??? The big endian test here is wrong. This is correct
1397 if the value is in a register, and if mode_for_size is not
1398 the same mode as op0. This causes us to get unnecessarily
1399 inefficient code from the Thumb port when -mbig-endian. */
1400 && (BYTES_BIG_ENDIAN
1401 ? bitpos + bitsize == BITS_PER_WORD
1404 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1405 && GET_MODE_SIZE (mode1) != 0
1406 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1408 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1409 || (offset * BITS_PER_UNIT % bitsize == 0
1410 && MEM_ALIGN (op0) % bitsize == 0)))))
1413 op0 = adjust_address (op0, mode1, offset);
1414 else if (mode1 != GET_MODE (op0))
1416 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1419 goto no_subreg_mode_swap;
1423 return convert_to_mode (tmode, op0, unsignedp);
1426 no_subreg_mode_swap:
1428 /* Handle fields bigger than a word. */
1430 if (bitsize > BITS_PER_WORD)
1432 /* Here we transfer the words of the field
1433 in the order least significant first.
1434 This is because the most significant word is the one which may
1435 be less than full. */
1437 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1440 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1441 target = gen_reg_rtx (mode);
1443 /* Indicate for flow that the entire target reg is being set. */
1444 emit_clobber (target);
1446 for (i = 0; i < nwords; i++)
1448 /* If I is 0, use the low-order word in both field and target;
1449 if I is 1, use the next to lowest word; and so on. */
1450 /* Word number in TARGET to use. */
1451 unsigned int wordnum
1453 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1455 /* Offset from start of field in OP0. */
1456 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1457 ? MAX (0, ((int) bitsize - ((int) i + 1)
1458 * (int) BITS_PER_WORD))
1459 : (int) i * BITS_PER_WORD);
1460 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1462 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1463 bitsize - i * BITS_PER_WORD),
1464 bitnum + bit_offset, 1, false, target_part, mode,
1467 gcc_assert (target_part);
1469 if (result_part != target_part)
1470 emit_move_insn (target_part, result_part);
1475 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1476 need to be zero'd out. */
1477 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1479 unsigned int i, total_words;
1481 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1482 for (i = nwords; i < total_words; i++)
1484 (operand_subword (target,
1485 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1492 /* Signed bit field: sign-extend with two arithmetic shifts. */
1493 target = expand_shift (LSHIFT_EXPR, mode, target,
1494 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1495 return expand_shift (RSHIFT_EXPR, mode, target,
1496 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1499 /* From here on we know the desired field is smaller than a word. */
1501 /* Check if there is a correspondingly-sized integer field, so we can
1502 safely extract it as one size of integer, if necessary; then
1503 truncate or extend to the size that is wanted; then use SUBREGs or
1504 convert_to_mode to get one of the modes we really wanted. */
1506 int_mode = int_mode_for_mode (tmode);
1507 if (int_mode == BLKmode)
1508 int_mode = int_mode_for_mode (mode);
1509 /* Should probably push op0 out to memory and then do a load. */
1510 gcc_assert (int_mode != BLKmode);
1512 /* OFFSET is the number of words or bytes (UNIT says which)
1513 from STR_RTX to the first word or byte containing part of the field. */
1517 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1520 op0 = copy_to_reg (op0);
1521 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1522 op0, (offset * UNITS_PER_WORD));
1527 /* Now OFFSET is nonzero only for memory operands. */
1528 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1529 if (ext_mode != MAX_MACHINE_MODE
1531 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1532 /* Do not use extv/extzv for volatile bitfields when
1533 -fstrict-volatile-bitfields is in effect. */
1534 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1535 && flag_strict_volatile_bitfields > 0)
1536 /* If op0 is a register, we need it in EXT_MODE to make it
1537 acceptable to the format of ext(z)v. */
1538 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1539 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1540 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1542 struct expand_operand ops[4];
1543 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1545 rtx xtarget = target;
1546 rtx xspec_target = target;
1547 rtx xspec_target_subreg = 0;
1549 /* If op0 is a register, we need it in EXT_MODE to make it
1550 acceptable to the format of ext(z)v. */
1551 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1552 xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1554 /* Get ref to first byte containing part of the field. */
1555 xop0 = adjust_address (xop0, byte_mode, xoffset);
1557 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1558 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1559 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1561 unit = GET_MODE_BITSIZE (ext_mode);
1563 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1564 "backwards" from the size of the unit we are extracting from.
1565 Otherwise, we count bits from the most significant on a
1566 BYTES/BITS_BIG_ENDIAN machine. */
1568 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1569 xbitpos = unit - bitsize - xbitpos;
1572 xtarget = xspec_target = gen_reg_rtx (tmode);
1574 if (GET_MODE (xtarget) != ext_mode)
1576 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1577 between the mode of the extraction (word_mode) and the target
1578 mode. Instead, create a temporary and use convert_move to set
1581 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1583 xtarget = gen_lowpart (ext_mode, xtarget);
1584 if (GET_MODE_PRECISION (ext_mode)
1585 > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1586 xspec_target_subreg = xtarget;
1589 xtarget = gen_reg_rtx (ext_mode);
1592 create_output_operand (&ops[0], xtarget, ext_mode);
1593 create_fixed_operand (&ops[1], xop0);
1594 create_integer_operand (&ops[2], bitsize);
1595 create_integer_operand (&ops[3], xbitpos);
1596 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1599 xtarget = ops[0].value;
1600 if (xtarget == xspec_target)
1602 if (xtarget == xspec_target_subreg)
1603 return xspec_target;
1604 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1608 /* If OP0 is a memory, try copying it to a register and seeing if a
1609 cheap register alternative is available. */
1610 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1612 enum machine_mode bestmode;
1614 /* Get the mode to use for inserting into this field. If
1615 OP0 is BLKmode, get the smallest mode consistent with the
1616 alignment. If OP0 is a non-BLKmode object that is no
1617 wider than EXT_MODE, use its mode. Otherwise, use the
1618 smallest mode containing the field. */
1620 if (GET_MODE (op0) == BLKmode
1621 || (ext_mode != MAX_MACHINE_MODE
1622 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1623 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1624 (ext_mode == MAX_MACHINE_MODE
1625 ? VOIDmode : ext_mode),
1626 MEM_VOLATILE_P (op0));
1628 bestmode = GET_MODE (op0);
1630 if (bestmode != VOIDmode
1631 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1632 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1634 unsigned HOST_WIDE_INT xoffset, xbitpos;
1636 /* Compute the offset as a multiple of this unit,
1637 counting in bytes. */
1638 unit = GET_MODE_BITSIZE (bestmode);
1639 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1640 xbitpos = bitnum % unit;
1642 /* Make sure the register is big enough for the whole field. */
1643 if (xoffset * BITS_PER_UNIT + unit
1644 >= offset * BITS_PER_UNIT + bitsize)
1646 rtx last, result, xop0;
1648 last = get_last_insn ();
1650 /* Fetch it to a register in that size. */
1651 xop0 = adjust_address (op0, bestmode, xoffset);
1652 xop0 = force_reg (bestmode, xop0);
1653 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1654 unsignedp, packedp, target,
1655 mode, tmode, false);
1659 delete_insns_since (last);
1667 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1668 bitpos, target, unsignedp, packedp);
1669 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1672 /* Generate code to extract a byte-field from STR_RTX
1673 containing BITSIZE bits, starting at BITNUM,
1674 and put it in TARGET if possible (if TARGET is nonzero).
1675 Regardless of TARGET, we return the rtx for where the value is placed.
1677 STR_RTX is the structure containing the byte (a REG or MEM).
1678 UNSIGNEDP is nonzero if this is an unsigned bit field.
1679 PACKEDP is nonzero if the field has the packed attribute.
1680 MODE is the natural mode of the field value once extracted.
1681 TMODE is the mode the caller would like the value to have;
1682 but the value may be returned with type MODE instead.
1684 If a TARGET is specified and we can store in it at no extra cost,
1685 we do so, and return TARGET.
1686 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1687 if they are equally easy. */
1690 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1691 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1692 rtx target, enum machine_mode mode, enum machine_mode tmode)
1694 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1695 target, mode, tmode, true);
1698 /* Extract a bit field using shifts and boolean operations
1699 Returns an rtx to represent the value.
1700 OP0 addresses a register (word) or memory (byte).
1701 BITPOS says which bit within the word or byte the bit field starts in.
1702 OFFSET says how many bytes farther the bit field starts;
1703 it is 0 if OP0 is a register.
1704 BITSIZE says how many bits long the bit field is.
1705 (If OP0 is a register, it may be narrower than a full word,
1706 but BITPOS still counts within a full word,
1707 which is significant on bigendian machines.)
1709 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1710 PACKEDP is true if the field has the packed attribute.
1712 If TARGET is nonzero, attempts to store the value there
1713 and return TARGET, but this is not guaranteed.
1714 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1717 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1718 unsigned HOST_WIDE_INT offset,
1719 unsigned HOST_WIDE_INT bitsize,
1720 unsigned HOST_WIDE_INT bitpos, rtx target,
1721 int unsignedp, bool packedp)
1723 unsigned int total_bits = BITS_PER_WORD;
1724 enum machine_mode mode;
1726 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1728 /* Special treatment for a bit field split across two registers. */
1729 if (bitsize + bitpos > BITS_PER_WORD)
1730 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1734 /* Get the proper mode to use for this field. We want a mode that
1735 includes the entire field. If such a mode would be larger than
1736 a word, we won't be doing the extraction the normal way. */
1738 if (MEM_VOLATILE_P (op0)
1739 && flag_strict_volatile_bitfields > 0)
1741 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1742 mode = GET_MODE (op0);
1743 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1744 mode = GET_MODE (target);
1749 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1750 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1752 if (mode == VOIDmode)
1753 /* The only way this should occur is if the field spans word
1755 return extract_split_bit_field (op0, bitsize,
1756 bitpos + offset * BITS_PER_UNIT,
1759 total_bits = GET_MODE_BITSIZE (mode);
1761 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1762 be in the range 0 to total_bits-1, and put any excess bytes in
1764 if (bitpos >= total_bits)
1766 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1767 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1771 /* If we're accessing a volatile MEM, we can't do the next
1772 alignment step if it results in a multi-word access where we
1773 otherwise wouldn't have one. So, check for that case
1776 && MEM_VOLATILE_P (op0)
1777 && flag_strict_volatile_bitfields > 0
1778 && bitpos + bitsize <= total_bits
1779 && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1781 if (STRICT_ALIGNMENT)
1783 static bool informed_about_misalignment = false;
1788 if (bitsize == total_bits)
1789 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1790 "multiple accesses to volatile structure member"
1791 " because of packed attribute");
1793 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1794 "multiple accesses to volatile structure bitfield"
1795 " because of packed attribute");
1797 return extract_split_bit_field (op0, bitsize,
1798 bitpos + offset * BITS_PER_UNIT,
1802 if (bitsize == total_bits)
1803 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1804 "mis-aligned access used for structure member");
1806 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1807 "mis-aligned access used for structure bitfield");
1809 if (! informed_about_misalignment && warned)
1811 informed_about_misalignment = true;
1812 inform (input_location,
1813 "when a volatile object spans multiple type-sized locations,"
1814 " the compiler must choose between using a single mis-aligned access to"
1815 " preserve the volatility, or using multiple aligned accesses to avoid"
1816 " runtime faults; this code may fail at runtime if the hardware does"
1817 " not allow this access");
1824 /* Get ref to an aligned byte, halfword, or word containing the field.
1825 Adjust BITPOS to be position within a word,
1826 and OFFSET to be the offset of that word.
1827 Then alter OP0 to refer to that word. */
1828 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1829 offset -= (offset % (total_bits / BITS_PER_UNIT));
1832 op0 = adjust_address (op0, mode, offset);
1835 mode = GET_MODE (op0);
1837 if (BYTES_BIG_ENDIAN)
1838 /* BITPOS is the distance between our msb and that of OP0.
1839 Convert it to the distance from the lsb. */
1840 bitpos = total_bits - bitsize - bitpos;
1842 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1843 We have reduced the big-endian case to the little-endian case. */
1849 /* If the field does not already start at the lsb,
1850 shift it so it does. */
1851 /* Maybe propagate the target for the shift. */
1852 /* But not if we will return it--could confuse integrate.c. */
1853 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1854 if (tmode != mode) subtarget = 0;
1855 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1857 /* Convert the value to the desired mode. */
1859 op0 = convert_to_mode (tmode, op0, 1);
1861 /* Unless the msb of the field used to be the msb when we shifted,
1862 mask out the upper bits. */
1864 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1865 return expand_binop (GET_MODE (op0), and_optab, op0,
1866 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1867 target, 1, OPTAB_LIB_WIDEN);
1871 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1872 then arithmetic-shift its lsb to the lsb of the word. */
1873 op0 = force_reg (mode, op0);
1875 /* Find the narrowest integer mode that contains the field. */
1877 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1878 mode = GET_MODE_WIDER_MODE (mode))
1879 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1881 op0 = convert_to_mode (mode, op0, 0);
1888 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1890 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1891 /* Maybe propagate the target for the shift. */
1892 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1893 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1896 return expand_shift (RSHIFT_EXPR, mode, op0,
1897 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1900 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1901 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1902 complement of that if COMPLEMENT. The mask is truncated if
1903 necessary to the width of mode MODE. The mask is zero-extended if
1904 BITSIZE+BITPOS is too small for MODE. */
1907 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1911 mask = double_int_mask (bitsize);
1912 mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1915 mask = double_int_not (mask);
1917 return immed_double_int_const (mask, mode);
1920 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1921 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1924 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1928 val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1929 val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1931 return immed_double_int_const (val, mode);
1934 /* Extract a bit field that is split across two words
1935 and return an RTX for the result.
1937 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1938 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1939 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1942 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1943 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1946 unsigned int bitsdone = 0;
1947 rtx result = NULL_RTX;
1950 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1952 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1953 unit = BITS_PER_WORD;
1955 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1957 while (bitsdone < bitsize)
1959 unsigned HOST_WIDE_INT thissize;
1961 unsigned HOST_WIDE_INT thispos;
1962 unsigned HOST_WIDE_INT offset;
1964 offset = (bitpos + bitsdone) / unit;
1965 thispos = (bitpos + bitsdone) % unit;
1967 /* THISSIZE must not overrun a word boundary. Otherwise,
1968 extract_fixed_bit_field will call us again, and we will mutually
1970 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1971 thissize = MIN (thissize, unit - thispos);
1973 /* If OP0 is a register, then handle OFFSET here.
1975 When handling multiword bitfields, extract_bit_field may pass
1976 down a word_mode SUBREG of a larger REG for a bitfield that actually
1977 crosses a word boundary. Thus, for a SUBREG, we must find
1978 the current word starting from the base register. */
1979 if (GET_CODE (op0) == SUBREG)
1981 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1982 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1983 GET_MODE (SUBREG_REG (op0)));
1986 else if (REG_P (op0))
1988 word = operand_subword_force (op0, offset, GET_MODE (op0));
1994 /* Extract the parts in bit-counting order,
1995 whose meaning is determined by BYTES_PER_UNIT.
1996 OFFSET is in UNITs, and UNIT is in bits.
1997 extract_fixed_bit_field wants offset in bytes. */
1998 part = extract_fixed_bit_field (word_mode, word,
1999 offset * unit / BITS_PER_UNIT,
2000 thissize, thispos, 0, 1, false);
2001 bitsdone += thissize;
2003 /* Shift this part into place for the result. */
2004 if (BYTES_BIG_ENDIAN)
2006 if (bitsize != bitsdone)
2007 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2008 bitsize - bitsdone, 0, 1);
2012 if (bitsdone != thissize)
2013 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2014 bitsdone - thissize, 0, 1);
2020 /* Combine the parts with bitwise or. This works
2021 because we extracted each part as an unsigned bit field. */
2022 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2028 /* Unsigned bit field: we are done. */
2031 /* Signed bit field: sign-extend with two arithmetic shifts. */
2032 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2033 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2034 return expand_shift (RSHIFT_EXPR, word_mode, result,
2035 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2038 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2039 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2040 MODE, fill the upper bits with zeros. Fail if the layout of either
2041 mode is unknown (as for CC modes) or if the extraction would involve
2042 unprofitable mode punning. Return the value on success, otherwise
2045 This is different from gen_lowpart* in these respects:
2047 - the returned value must always be considered an rvalue
2049 - when MODE is wider than SRC_MODE, the extraction involves
2052 - when MODE is smaller than SRC_MODE, the extraction involves
2053 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2055 In other words, this routine performs a computation, whereas the
2056 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2060 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2062 enum machine_mode int_mode, src_int_mode;
2064 if (mode == src_mode)
2067 if (CONSTANT_P (src))
2069 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2070 fails, it will happily create (subreg (symbol_ref)) or similar
2072 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2073 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2077 if (GET_MODE (src) == VOIDmode
2078 || !validate_subreg (mode, src_mode, src, byte))
2081 src = force_reg (GET_MODE (src), src);
2082 return gen_rtx_SUBREG (mode, src, byte);
2085 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2088 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2089 && MODES_TIEABLE_P (mode, src_mode))
2091 rtx x = gen_lowpart_common (mode, src);
2096 src_int_mode = int_mode_for_mode (src_mode);
2097 int_mode = int_mode_for_mode (mode);
2098 if (src_int_mode == BLKmode || int_mode == BLKmode)
2101 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2103 if (!MODES_TIEABLE_P (int_mode, mode))
2106 src = gen_lowpart (src_int_mode, src);
2107 src = convert_modes (int_mode, src_int_mode, src, true);
2108 src = gen_lowpart (mode, src);
2112 /* Add INC into TARGET. */
2115 expand_inc (rtx target, rtx inc)
2117 rtx value = expand_binop (GET_MODE (target), add_optab,
2119 target, 0, OPTAB_LIB_WIDEN);
2120 if (value != target)
2121 emit_move_insn (target, value);
2124 /* Subtract DEC from TARGET. */
2127 expand_dec (rtx target, rtx dec)
2129 rtx value = expand_binop (GET_MODE (target), sub_optab,
2131 target, 0, OPTAB_LIB_WIDEN);
2132 if (value != target)
2133 emit_move_insn (target, value);
2136 /* Output a shift instruction for expression code CODE,
2137 with SHIFTED being the rtx for the value to shift,
2138 and AMOUNT the rtx for the amount to shift by.
2139 Store the result in the rtx TARGET, if that is convenient.
2140 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2141 Return the rtx for where the value is. */
2144 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2145 rtx amount, rtx target, int unsignedp)
2148 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2149 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2150 optab lshift_optab = ashl_optab;
2151 optab rshift_arith_optab = ashr_optab;
2152 optab rshift_uns_optab = lshr_optab;
2153 optab lrotate_optab = rotl_optab;
2154 optab rrotate_optab = rotr_optab;
2155 enum machine_mode op1_mode;
2157 bool speed = optimize_insn_for_speed_p ();
2160 op1_mode = GET_MODE (op1);
2162 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2163 shift amount is a vector, use the vector/vector shift patterns. */
2164 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2166 lshift_optab = vashl_optab;
2167 rshift_arith_optab = vashr_optab;
2168 rshift_uns_optab = vlshr_optab;
2169 lrotate_optab = vrotl_optab;
2170 rrotate_optab = vrotr_optab;
2173 /* Previously detected shift-counts computed by NEGATE_EXPR
2174 and shifted in the other direction; but that does not work
2177 if (SHIFT_COUNT_TRUNCATED)
2179 if (CONST_INT_P (op1)
2180 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2181 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2182 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2183 % GET_MODE_BITSIZE (mode));
2184 else if (GET_CODE (op1) == SUBREG
2185 && subreg_lowpart_p (op1)
2186 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2187 op1 = SUBREG_REG (op1);
2190 if (op1 == const0_rtx)
2193 /* Check whether its cheaper to implement a left shift by a constant
2194 bit count by a sequence of additions. */
2195 if (code == LSHIFT_EXPR
2196 && CONST_INT_P (op1)
2198 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2199 && INTVAL (op1) < MAX_BITS_PER_WORD
2200 && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2201 && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2204 for (i = 0; i < INTVAL (op1); i++)
2206 temp = force_reg (mode, shifted);
2207 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2208 unsignedp, OPTAB_LIB_WIDEN);
2213 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2215 enum optab_methods methods;
2218 methods = OPTAB_DIRECT;
2219 else if (attempt == 1)
2220 methods = OPTAB_WIDEN;
2222 methods = OPTAB_LIB_WIDEN;
2226 /* Widening does not work for rotation. */
2227 if (methods == OPTAB_WIDEN)
2229 else if (methods == OPTAB_LIB_WIDEN)
2231 /* If we have been unable to open-code this by a rotation,
2232 do it as the IOR of two shifts. I.e., to rotate A
2233 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2234 where C is the bitsize of A.
2236 It is theoretically possible that the target machine might
2237 not be able to perform either shift and hence we would
2238 be making two libcalls rather than just the one for the
2239 shift (similarly if IOR could not be done). We will allow
2240 this extremely unlikely lossage to avoid complicating the
2243 rtx subtarget = target == shifted ? 0 : target;
2244 rtx new_amount, other_amount;
2248 if (CONST_INT_P (op1))
2249 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2253 = simplify_gen_binary (MINUS, GET_MODE (op1),
2254 GEN_INT (GET_MODE_PRECISION (mode)),
2257 shifted = force_reg (mode, shifted);
2259 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2260 mode, shifted, new_amount, 0, 1);
2261 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2262 mode, shifted, other_amount,
2264 return expand_binop (mode, ior_optab, temp, temp1, target,
2265 unsignedp, methods);
2268 temp = expand_binop (mode,
2269 left ? lrotate_optab : rrotate_optab,
2270 shifted, op1, target, unsignedp, methods);
2273 temp = expand_binop (mode,
2274 left ? lshift_optab : rshift_uns_optab,
2275 shifted, op1, target, unsignedp, methods);
2277 /* Do arithmetic shifts.
2278 Also, if we are going to widen the operand, we can just as well
2279 use an arithmetic right-shift instead of a logical one. */
2280 if (temp == 0 && ! rotate
2281 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2283 enum optab_methods methods1 = methods;
2285 /* If trying to widen a log shift to an arithmetic shift,
2286 don't accept an arithmetic shift of the same size. */
2288 methods1 = OPTAB_MUST_WIDEN;
2290 /* Arithmetic shift */
2292 temp = expand_binop (mode,
2293 left ? lshift_optab : rshift_arith_optab,
2294 shifted, op1, target, unsignedp, methods1);
2297 /* We used to try extzv here for logical right shifts, but that was
2298 only useful for one machine, the VAX, and caused poor code
2299 generation there for lshrdi3, so the code was deleted and a
2300 define_expand for lshrsi3 was added to vax.md. */
2307 /* Output a shift instruction for expression code CODE,
2308 with SHIFTED being the rtx for the value to shift,
2309 and AMOUNT the amount to shift by.
2310 Store the result in the rtx TARGET, if that is convenient.
2311 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2312 Return the rtx for where the value is. */
2315 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2316 int amount, rtx target, int unsignedp)
2318 return expand_shift_1 (code, mode,
2319 shifted, GEN_INT (amount), target, unsignedp);
2322 /* Output a shift instruction for expression code CODE,
2323 with SHIFTED being the rtx for the value to shift,
2324 and AMOUNT the tree for the amount to shift by.
2325 Store the result in the rtx TARGET, if that is convenient.
2326 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2327 Return the rtx for where the value is. */
2330 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2331 tree amount, rtx target, int unsignedp)
2333 return expand_shift_1 (code, mode,
2334 shifted, expand_normal (amount), target, unsignedp);
2338 /* Indicates the type of fixup needed after a constant multiplication.
2339 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2340 the result should be negated, and ADD_VARIANT means that the
2341 multiplicand should be added to the result. */
2342 enum mult_variant {basic_variant, negate_variant, add_variant};
2344 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2345 const struct mult_cost *, enum machine_mode mode);
2346 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2347 struct algorithm *, enum mult_variant *, int);
2348 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2349 const struct algorithm *, enum mult_variant);
2350 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2351 int, rtx *, int *, int *);
2352 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2353 static rtx extract_high_half (enum machine_mode, rtx);
2354 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2355 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2357 /* Compute and return the best algorithm for multiplying by T.
2358 The algorithm must cost less than cost_limit
2359 If retval.cost >= COST_LIMIT, no algorithm was found and all
2360 other field of the returned struct are undefined.
2361 MODE is the machine mode of the multiplication. */
2364 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2365 const struct mult_cost *cost_limit, enum machine_mode mode)
2368 struct algorithm *alg_in, *best_alg;
2369 struct mult_cost best_cost;
2370 struct mult_cost new_limit;
2371 int op_cost, op_latency;
2372 unsigned HOST_WIDE_INT orig_t = t;
2373 unsigned HOST_WIDE_INT q;
2374 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2376 bool cache_hit = false;
2377 enum alg_code cache_alg = alg_zero;
2378 bool speed = optimize_insn_for_speed_p ();
2380 /* Indicate that no algorithm is yet found. If no algorithm
2381 is found, this value will be returned and indicate failure. */
2382 alg_out->cost.cost = cost_limit->cost + 1;
2383 alg_out->cost.latency = cost_limit->latency + 1;
2385 if (cost_limit->cost < 0
2386 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2389 /* Restrict the bits of "t" to the multiplication's mode. */
2390 t &= GET_MODE_MASK (mode);
2392 /* t == 1 can be done in zero cost. */
2396 alg_out->cost.cost = 0;
2397 alg_out->cost.latency = 0;
2398 alg_out->op[0] = alg_m;
2402 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2406 if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2411 alg_out->cost.cost = zero_cost[speed];
2412 alg_out->cost.latency = zero_cost[speed];
2413 alg_out->op[0] = alg_zero;
2418 /* We'll be needing a couple extra algorithm structures now. */
2420 alg_in = XALLOCA (struct algorithm);
2421 best_alg = XALLOCA (struct algorithm);
2422 best_cost = *cost_limit;
2424 /* Compute the hash index. */
2425 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2427 /* See if we already know what to do for T. */
2428 if (alg_hash[hash_index].t == t
2429 && alg_hash[hash_index].mode == mode
2430 && alg_hash[hash_index].mode == mode
2431 && alg_hash[hash_index].speed == speed
2432 && alg_hash[hash_index].alg != alg_unknown)
2434 cache_alg = alg_hash[hash_index].alg;
2436 if (cache_alg == alg_impossible)
2438 /* The cache tells us that it's impossible to synthesize
2439 multiplication by T within alg_hash[hash_index].cost. */
2440 if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2441 /* COST_LIMIT is at least as restrictive as the one
2442 recorded in the hash table, in which case we have no
2443 hope of synthesizing a multiplication. Just
2447 /* If we get here, COST_LIMIT is less restrictive than the
2448 one recorded in the hash table, so we may be able to
2449 synthesize a multiplication. Proceed as if we didn't
2450 have the cache entry. */
2454 if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2455 /* The cached algorithm shows that this multiplication
2456 requires more cost than COST_LIMIT. Just return. This
2457 way, we don't clobber this cache entry with
2458 alg_impossible but retain useful information. */
2470 goto do_alg_addsub_t_m2;
2472 case alg_add_factor:
2473 case alg_sub_factor:
2474 goto do_alg_addsub_factor;
2477 goto do_alg_add_t2_m;
2480 goto do_alg_sub_t2_m;
2488 /* If we have a group of zero bits at the low-order part of T, try
2489 multiplying by the remaining bits and then doing a shift. */
2494 m = floor_log2 (t & -t); /* m = number of low zero bits */
2498 /* The function expand_shift will choose between a shift and
2499 a sequence of additions, so the observed cost is given as
2500 MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */
2501 op_cost = m * add_cost[speed][mode];
2502 if (shift_cost[speed][mode][m] < op_cost)
2503 op_cost = shift_cost[speed][mode][m];
2504 new_limit.cost = best_cost.cost - op_cost;
2505 new_limit.latency = best_cost.latency - op_cost;
2506 synth_mult (alg_in, q, &new_limit, mode);
2508 alg_in->cost.cost += op_cost;
2509 alg_in->cost.latency += op_cost;
2510 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2512 struct algorithm *x;
2513 best_cost = alg_in->cost;
2514 x = alg_in, alg_in = best_alg, best_alg = x;
2515 best_alg->log[best_alg->ops] = m;
2516 best_alg->op[best_alg->ops] = alg_shift;
2519 /* See if treating ORIG_T as a signed number yields a better
2520 sequence. Try this sequence only for a negative ORIG_T
2521 as it would be useless for a non-negative ORIG_T. */
2522 if ((HOST_WIDE_INT) orig_t < 0)
2524 /* Shift ORIG_T as follows because a right shift of a
2525 negative-valued signed type is implementation
2527 q = ~(~orig_t >> m);
2528 /* The function expand_shift will choose between a shift
2529 and a sequence of additions, so the observed cost is
2530 given as MIN (m * add_cost[speed][mode],
2531 shift_cost[speed][mode][m]). */
2532 op_cost = m * add_cost[speed][mode];
2533 if (shift_cost[speed][mode][m] < op_cost)
2534 op_cost = shift_cost[speed][mode][m];
2535 new_limit.cost = best_cost.cost - op_cost;
2536 new_limit.latency = best_cost.latency - op_cost;
2537 synth_mult (alg_in, q, &new_limit, mode);
2539 alg_in->cost.cost += op_cost;
2540 alg_in->cost.latency += op_cost;
2541 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2543 struct algorithm *x;
2544 best_cost = alg_in->cost;
2545 x = alg_in, alg_in = best_alg, best_alg = x;
2546 best_alg->log[best_alg->ops] = m;
2547 best_alg->op[best_alg->ops] = alg_shift;
2555 /* If we have an odd number, add or subtract one. */
2558 unsigned HOST_WIDE_INT w;
2561 for (w = 1; (w & t) != 0; w <<= 1)
2563 /* If T was -1, then W will be zero after the loop. This is another
2564 case where T ends with ...111. Handling this with (T + 1) and
2565 subtract 1 produces slightly better code and results in algorithm
2566 selection much faster than treating it like the ...0111 case
2570 /* Reject the case where t is 3.
2571 Thus we prefer addition in that case. */
2574 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2576 op_cost = add_cost[speed][mode];
2577 new_limit.cost = best_cost.cost - op_cost;
2578 new_limit.latency = best_cost.latency - op_cost;
2579 synth_mult (alg_in, t + 1, &new_limit, mode);
2581 alg_in->cost.cost += op_cost;
2582 alg_in->cost.latency += op_cost;
2583 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2585 struct algorithm *x;
2586 best_cost = alg_in->cost;
2587 x = alg_in, alg_in = best_alg, best_alg = x;
2588 best_alg->log[best_alg->ops] = 0;
2589 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2594 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2596 op_cost = add_cost[speed][mode];
2597 new_limit.cost = best_cost.cost - op_cost;
2598 new_limit.latency = best_cost.latency - op_cost;
2599 synth_mult (alg_in, t - 1, &new_limit, mode);
2601 alg_in->cost.cost += op_cost;
2602 alg_in->cost.latency += op_cost;
2603 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2605 struct algorithm *x;
2606 best_cost = alg_in->cost;
2607 x = alg_in, alg_in = best_alg, best_alg = x;
2608 best_alg->log[best_alg->ops] = 0;
2609 best_alg->op[best_alg->ops] = alg_add_t_m2;
2613 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2614 quickly with a - a * n for some appropriate constant n. */
2615 m = exact_log2 (-orig_t + 1);
2616 if (m >= 0 && m < maxm)
2618 op_cost = shiftsub1_cost[speed][mode][m];
2619 new_limit.cost = best_cost.cost - op_cost;
2620 new_limit.latency = best_cost.latency - op_cost;
2621 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2623 alg_in->cost.cost += op_cost;
2624 alg_in->cost.latency += op_cost;
2625 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2627 struct algorithm *x;
2628 best_cost = alg_in->cost;
2629 x = alg_in, alg_in = best_alg, best_alg = x;
2630 best_alg->log[best_alg->ops] = m;
2631 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2639 /* Look for factors of t of the form
2640 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2641 If we find such a factor, we can multiply by t using an algorithm that
2642 multiplies by q, shift the result by m and add/subtract it to itself.
2644 We search for large factors first and loop down, even if large factors
2645 are less probable than small; if we find a large factor we will find a
2646 good sequence quickly, and therefore be able to prune (by decreasing
2647 COST_LIMIT) the search. */
2649 do_alg_addsub_factor:
2650 for (m = floor_log2 (t - 1); m >= 2; m--)
2652 unsigned HOST_WIDE_INT d;
2654 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2655 if (t % d == 0 && t > d && m < maxm
2656 && (!cache_hit || cache_alg == alg_add_factor))
2658 /* If the target has a cheap shift-and-add instruction use
2659 that in preference to a shift insn followed by an add insn.
2660 Assume that the shift-and-add is "atomic" with a latency
2661 equal to its cost, otherwise assume that on superscalar
2662 hardware the shift may be executed concurrently with the
2663 earlier steps in the algorithm. */
2664 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2665 if (shiftadd_cost[speed][mode][m] < op_cost)
2667 op_cost = shiftadd_cost[speed][mode][m];
2668 op_latency = op_cost;
2671 op_latency = add_cost[speed][mode];
2673 new_limit.cost = best_cost.cost - op_cost;
2674 new_limit.latency = best_cost.latency - op_latency;
2675 synth_mult (alg_in, t / d, &new_limit, mode);
2677 alg_in->cost.cost += op_cost;
2678 alg_in->cost.latency += op_latency;
2679 if (alg_in->cost.latency < op_cost)
2680 alg_in->cost.latency = op_cost;
2681 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2683 struct algorithm *x;
2684 best_cost = alg_in->cost;
2685 x = alg_in, alg_in = best_alg, best_alg = x;
2686 best_alg->log[best_alg->ops] = m;
2687 best_alg->op[best_alg->ops] = alg_add_factor;
2689 /* Other factors will have been taken care of in the recursion. */
2693 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2694 if (t % d == 0 && t > d && m < maxm
2695 && (!cache_hit || cache_alg == alg_sub_factor))
2697 /* If the target has a cheap shift-and-subtract insn use
2698 that in preference to a shift insn followed by a sub insn.
2699 Assume that the shift-and-sub is "atomic" with a latency
2700 equal to it's cost, otherwise assume that on superscalar
2701 hardware the shift may be executed concurrently with the
2702 earlier steps in the algorithm. */
2703 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2704 if (shiftsub0_cost[speed][mode][m] < op_cost)
2706 op_cost = shiftsub0_cost[speed][mode][m];
2707 op_latency = op_cost;
2710 op_latency = add_cost[speed][mode];
2712 new_limit.cost = best_cost.cost - op_cost;
2713 new_limit.latency = best_cost.latency - op_latency;
2714 synth_mult (alg_in, t / d, &new_limit, mode);
2716 alg_in->cost.cost += op_cost;
2717 alg_in->cost.latency += op_latency;
2718 if (alg_in->cost.latency < op_cost)
2719 alg_in->cost.latency = op_cost;
2720 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2722 struct algorithm *x;
2723 best_cost = alg_in->cost;
2724 x = alg_in, alg_in = best_alg, best_alg = x;
2725 best_alg->log[best_alg->ops] = m;
2726 best_alg->op[best_alg->ops] = alg_sub_factor;
2734 /* Try shift-and-add (load effective address) instructions,
2735 i.e. do a*3, a*5, a*9. */
2742 if (m >= 0 && m < maxm)
2744 op_cost = shiftadd_cost[speed][mode][m];
2745 new_limit.cost = best_cost.cost - op_cost;
2746 new_limit.latency = best_cost.latency - op_cost;
2747 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2749 alg_in->cost.cost += op_cost;
2750 alg_in->cost.latency += op_cost;
2751 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2753 struct algorithm *x;
2754 best_cost = alg_in->cost;
2755 x = alg_in, alg_in = best_alg, best_alg = x;
2756 best_alg->log[best_alg->ops] = m;
2757 best_alg->op[best_alg->ops] = alg_add_t2_m;
2767 if (m >= 0 && m < maxm)
2769 op_cost = shiftsub0_cost[speed][mode][m];
2770 new_limit.cost = best_cost.cost - op_cost;
2771 new_limit.latency = best_cost.latency - op_cost;
2772 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2774 alg_in->cost.cost += op_cost;
2775 alg_in->cost.latency += op_cost;
2776 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2778 struct algorithm *x;
2779 best_cost = alg_in->cost;
2780 x = alg_in, alg_in = best_alg, best_alg = x;
2781 best_alg->log[best_alg->ops] = m;
2782 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2790 /* If best_cost has not decreased, we have not found any algorithm. */
2791 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2793 /* We failed to find an algorithm. Record alg_impossible for
2794 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2795 we are asked to find an algorithm for T within the same or
2796 lower COST_LIMIT, we can immediately return to the
2798 alg_hash[hash_index].t = t;
2799 alg_hash[hash_index].mode = mode;
2800 alg_hash[hash_index].speed = speed;
2801 alg_hash[hash_index].alg = alg_impossible;
2802 alg_hash[hash_index].cost = *cost_limit;
2806 /* Cache the result. */
2809 alg_hash[hash_index].t = t;
2810 alg_hash[hash_index].mode = mode;
2811 alg_hash[hash_index].speed = speed;
2812 alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2813 alg_hash[hash_index].cost.cost = best_cost.cost;
2814 alg_hash[hash_index].cost.latency = best_cost.latency;
2817 /* If we are getting a too long sequence for `struct algorithm'
2818 to record, make this search fail. */
2819 if (best_alg->ops == MAX_BITS_PER_WORD)
2822 /* Copy the algorithm from temporary space to the space at alg_out.
2823 We avoid using structure assignment because the majority of
2824 best_alg is normally undefined, and this is a critical function. */
2825 alg_out->ops = best_alg->ops + 1;
2826 alg_out->cost = best_cost;
2827 memcpy (alg_out->op, best_alg->op,
2828 alg_out->ops * sizeof *alg_out->op);
2829 memcpy (alg_out->log, best_alg->log,
2830 alg_out->ops * sizeof *alg_out->log);
2833 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2834 Try three variations:
2836 - a shift/add sequence based on VAL itself
2837 - a shift/add sequence based on -VAL, followed by a negation
2838 - a shift/add sequence based on VAL - 1, followed by an addition.
2840 Return true if the cheapest of these cost less than MULT_COST,
2841 describing the algorithm in *ALG and final fixup in *VARIANT. */
2844 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2845 struct algorithm *alg, enum mult_variant *variant,
2848 struct algorithm alg2;
2849 struct mult_cost limit;
2851 bool speed = optimize_insn_for_speed_p ();
2853 /* Fail quickly for impossible bounds. */
2857 /* Ensure that mult_cost provides a reasonable upper bound.
2858 Any constant multiplication can be performed with less
2859 than 2 * bits additions. */
2860 op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2861 if (mult_cost > op_cost)
2862 mult_cost = op_cost;
2864 *variant = basic_variant;
2865 limit.cost = mult_cost;
2866 limit.latency = mult_cost;
2867 synth_mult (alg, val, &limit, mode);
2869 /* This works only if the inverted value actually fits in an
2871 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2873 op_cost = neg_cost[speed][mode];
2874 if (MULT_COST_LESS (&alg->cost, mult_cost))
2876 limit.cost = alg->cost.cost - op_cost;
2877 limit.latency = alg->cost.latency - op_cost;
2881 limit.cost = mult_cost - op_cost;
2882 limit.latency = mult_cost - op_cost;
2885 synth_mult (&alg2, -val, &limit, mode);
2886 alg2.cost.cost += op_cost;
2887 alg2.cost.latency += op_cost;
2888 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2889 *alg = alg2, *variant = negate_variant;
2892 /* This proves very useful for division-by-constant. */
2893 op_cost = add_cost[speed][mode];
2894 if (MULT_COST_LESS (&alg->cost, mult_cost))
2896 limit.cost = alg->cost.cost - op_cost;
2897 limit.latency = alg->cost.latency - op_cost;
2901 limit.cost = mult_cost - op_cost;
2902 limit.latency = mult_cost - op_cost;
2905 synth_mult (&alg2, val - 1, &limit, mode);
2906 alg2.cost.cost += op_cost;
2907 alg2.cost.latency += op_cost;
2908 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2909 *alg = alg2, *variant = add_variant;
2911 return MULT_COST_LESS (&alg->cost, mult_cost);
2914 /* A subroutine of expand_mult, used for constant multiplications.
2915 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2916 convenient. Use the shift/add sequence described by ALG and apply
2917 the final fixup specified by VARIANT. */
2920 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2921 rtx target, const struct algorithm *alg,
2922 enum mult_variant variant)
2924 HOST_WIDE_INT val_so_far;
2925 rtx insn, accum, tem;
2927 enum machine_mode nmode;
2929 /* Avoid referencing memory over and over and invalid sharing
2931 op0 = force_reg (mode, op0);
2933 /* ACCUM starts out either as OP0 or as a zero, depending on
2934 the first operation. */
2936 if (alg->op[0] == alg_zero)
2938 accum = copy_to_mode_reg (mode, const0_rtx);
2941 else if (alg->op[0] == alg_m)
2943 accum = copy_to_mode_reg (mode, op0);
2949 for (opno = 1; opno < alg->ops; opno++)
2951 int log = alg->log[opno];
2952 rtx shift_subtarget = optimize ? 0 : accum;
2954 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2957 rtx accum_target = optimize ? 0 : accum;
2960 switch (alg->op[opno])
2963 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2964 /* REG_EQUAL note will be attached to the following insn. */
2965 emit_move_insn (accum, tem);
2970 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2971 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2972 add_target ? add_target : accum_target);
2973 val_so_far += (HOST_WIDE_INT) 1 << log;
2977 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2978 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2979 add_target ? add_target : accum_target);
2980 val_so_far -= (HOST_WIDE_INT) 1 << log;
2984 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2985 log, shift_subtarget, 0);
2986 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2987 add_target ? add_target : accum_target);
2988 val_so_far = (val_so_far << log) + 1;
2992 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2993 log, shift_subtarget, 0);
2994 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2995 add_target ? add_target : accum_target);
2996 val_so_far = (val_so_far << log) - 1;
2999 case alg_add_factor:
3000 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3001 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3002 add_target ? add_target : accum_target);
3003 val_so_far += val_so_far << log;
3006 case alg_sub_factor:
3007 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3008 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3010 ? add_target : (optimize ? 0 : tem)));
3011 val_so_far = (val_so_far << log) - val_so_far;
3018 /* Write a REG_EQUAL note on the last insn so that we can cse
3019 multiplication sequences. Note that if ACCUM is a SUBREG,
3020 we've set the inner register and must properly indicate
3023 tem = op0, nmode = mode;
3024 accum_inner = accum;
3025 if (GET_CODE (accum) == SUBREG)
3027 accum_inner = SUBREG_REG (accum);
3028 nmode = GET_MODE (accum_inner);
3029 tem = gen_lowpart (nmode, op0);
3032 insn = get_last_insn ();
3033 set_dst_reg_note (insn, REG_EQUAL,
3034 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3038 if (variant == negate_variant)
3040 val_so_far = -val_so_far;
3041 accum = expand_unop (mode, neg_optab, accum, target, 0);
3043 else if (variant == add_variant)
3045 val_so_far = val_so_far + 1;
3046 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3049 /* Compare only the bits of val and val_so_far that are significant
3050 in the result mode, to avoid sign-/zero-extension confusion. */
3051 val &= GET_MODE_MASK (mode);
3052 val_so_far &= GET_MODE_MASK (mode);
3053 gcc_assert (val == val_so_far);
3058 /* Perform a multiplication and return an rtx for the result.
3059 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3060 TARGET is a suggestion for where to store the result (an rtx).
3062 We check specially for a constant integer as OP1.
3063 If you want this check for OP0 as well, then before calling
3064 you should swap the two operands if OP0 would be constant. */
3067 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3070 enum mult_variant variant;
3071 struct algorithm algorithm;
3073 bool speed = optimize_insn_for_speed_p ();
3075 /* Handling const0_rtx here allows us to use zero as a rogue value for
3077 if (op1 == const0_rtx)
3079 if (op1 == const1_rtx)
3081 if (op1 == constm1_rtx)
3082 return expand_unop (mode,
3083 GET_MODE_CLASS (mode) == MODE_INT
3084 && !unsignedp && flag_trapv
3085 ? negv_optab : neg_optab,
3088 /* These are the operations that are potentially turned into a sequence
3089 of shifts and additions. */
3090 if (SCALAR_INT_MODE_P (mode)
3091 && (unsignedp || !flag_trapv))
3093 HOST_WIDE_INT coeff = 0;
3094 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3096 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3097 less than or equal in size to `unsigned int' this doesn't matter.
3098 If the mode is larger than `unsigned int', then synth_mult works
3099 only if the constant value exactly fits in an `unsigned int' without
3100 any truncation. This means that multiplying by negative values does
3101 not work; results are off by 2^32 on a 32 bit machine. */
3103 if (CONST_INT_P (op1))
3105 /* Attempt to handle multiplication of DImode values by negative
3106 coefficients, by performing the multiplication by a positive
3107 multiplier and then inverting the result. */
3108 if (INTVAL (op1) < 0
3109 && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3111 /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3112 result is interpreted as an unsigned coefficient.
3113 Exclude cost of op0 from max_cost to match the cost
3114 calculation of the synth_mult. */
3115 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3117 - neg_cost[speed][mode]);
3119 && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3120 &variant, max_cost))
3122 rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3123 NULL_RTX, &algorithm,
3125 return expand_unop (mode, neg_optab, temp, target, 0);
3128 else coeff = INTVAL (op1);
3130 else if (GET_CODE (op1) == CONST_DOUBLE)
3132 /* If we are multiplying in DImode, it may still be a win
3133 to try to work with shifts and adds. */
3134 if (CONST_DOUBLE_HIGH (op1) == 0
3135 && CONST_DOUBLE_LOW (op1) > 0)
3136 coeff = CONST_DOUBLE_LOW (op1);
3137 else if (CONST_DOUBLE_LOW (op1) == 0
3138 && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3140 int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3141 + HOST_BITS_PER_WIDE_INT;
3142 return expand_shift (LSHIFT_EXPR, mode, op0,
3143 shift, target, unsignedp);
3147 /* We used to test optimize here, on the grounds that it's better to
3148 produce a smaller program when -O is not used. But this causes
3149 such a terrible slowdown sometimes that it seems better to always
3153 /* Special case powers of two. */
3154 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3155 return expand_shift (LSHIFT_EXPR, mode, op0,
3156 floor_log2 (coeff), target, unsignedp);
3158 /* Exclude cost of op0 from max_cost to match the cost
3159 calculation of the synth_mult. */
3160 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3161 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3163 return expand_mult_const (mode, op0, coeff, target,
3164 &algorithm, variant);
3168 if (GET_CODE (op0) == CONST_DOUBLE)
3175 /* Expand x*2.0 as x+x. */
3176 if (GET_CODE (op1) == CONST_DOUBLE
3177 && SCALAR_FLOAT_MODE_P (mode))
3180 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3182 if (REAL_VALUES_EQUAL (d, dconst2))
3184 op0 = force_reg (GET_MODE (op0), op0);
3185 return expand_binop (mode, add_optab, op0, op0,
3186 target, unsignedp, OPTAB_LIB_WIDEN);
3190 /* This used to use umul_optab if unsigned, but for non-widening multiply
3191 there is no difference between signed and unsigned. */
3192 op0 = expand_binop (mode,
3194 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3195 ? smulv_optab : smul_optab,
3196 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3201 /* Perform a widening multiplication and return an rtx for the result.
3202 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3203 TARGET is a suggestion for where to store the result (an rtx).
3204 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3205 or smul_widen_optab.
3207 We check specially for a constant integer as OP1, comparing the
3208 cost of a widening multiply against the cost of a sequence of shifts
3212 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3213 int unsignedp, optab this_optab)
3215 bool speed = optimize_insn_for_speed_p ();
3218 if (CONST_INT_P (op1)
3219 && GET_MODE (op0) != VOIDmode
3220 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3221 this_optab == umul_widen_optab))
3222 && CONST_INT_P (cop1)
3223 && (INTVAL (cop1) >= 0
3224 || HWI_COMPUTABLE_MODE_P (mode)))
3226 HOST_WIDE_INT coeff = INTVAL (cop1);
3228 enum mult_variant variant;
3229 struct algorithm algorithm;
3231 /* Special case powers of two. */
3232 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3234 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3235 return expand_shift (LSHIFT_EXPR, mode, op0,
3236 floor_log2 (coeff), target, unsignedp);
3239 /* Exclude cost of op0 from max_cost to match the cost
3240 calculation of the synth_mult. */
3241 max_cost = mul_widen_cost[speed][mode];
3242 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3245 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3246 return expand_mult_const (mode, op0, coeff, target,
3247 &algorithm, variant);
3250 return expand_binop (mode, this_optab, op0, op1, target,
3251 unsignedp, OPTAB_LIB_WIDEN);
3254 /* Return the smallest n such that 2**n >= X. */
3257 ceil_log2 (unsigned HOST_WIDE_INT x)
3259 return floor_log2 (x - 1) + 1;
3262 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3263 replace division by D, and put the least significant N bits of the result
3264 in *MULTIPLIER_PTR and return the most significant bit.
3266 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3267 needed precision is in PRECISION (should be <= N).
3269 PRECISION should be as small as possible so this function can choose
3270 multiplier more freely.
3272 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3273 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3275 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3276 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3279 unsigned HOST_WIDE_INT
3280 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3281 rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3283 HOST_WIDE_INT mhigh_hi, mlow_hi;
3284 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3285 int lgup, post_shift;
3287 unsigned HOST_WIDE_INT nl, dummy1;
3288 HOST_WIDE_INT nh, dummy2;
3290 /* lgup = ceil(log2(divisor)); */
3291 lgup = ceil_log2 (d);
3293 gcc_assert (lgup <= n);
3296 pow2 = n + lgup - precision;
3298 /* We could handle this with some effort, but this case is much
3299 better handled directly with a scc insn, so rely on caller using
3301 gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3303 /* mlow = 2^(N + lgup)/d */
3304 if (pow >= HOST_BITS_PER_WIDE_INT)
3306 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3312 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3314 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3315 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3317 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3318 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3319 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3321 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3322 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3323 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3325 gcc_assert (!mhigh_hi || nh - d < d);
3326 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3327 /* Assert that mlow < mhigh. */
3328 gcc_assert (mlow_hi < mhigh_hi
3329 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3331 /* If precision == N, then mlow, mhigh exceed 2^N
3332 (but they do not exceed 2^(N+1)). */
3334 /* Reduce to lowest terms. */
3335 for (post_shift = lgup; post_shift > 0; post_shift--)
3337 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3338 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3348 *post_shift_ptr = post_shift;
3350 if (n < HOST_BITS_PER_WIDE_INT)
3352 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3353 *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3354 return mhigh_lo >= mask;
3358 *multiplier_ptr = GEN_INT (mhigh_lo);
3363 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3364 congruent to 1 (mod 2**N). */
3366 static unsigned HOST_WIDE_INT
3367 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3369 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3371 /* The algorithm notes that the choice y = x satisfies
3372 x*y == 1 mod 2^3, since x is assumed odd.
3373 Each iteration doubles the number of bits of significance in y. */
3375 unsigned HOST_WIDE_INT mask;
3376 unsigned HOST_WIDE_INT y = x;
3379 mask = (n == HOST_BITS_PER_WIDE_INT
3380 ? ~(unsigned HOST_WIDE_INT) 0
3381 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3385 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3391 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3392 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3393 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3394 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3397 The result is put in TARGET if that is convenient.
3399 MODE is the mode of operation. */
3402 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3403 rtx op1, rtx target, int unsignedp)
3406 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3408 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3409 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3410 tem = expand_and (mode, tem, op1, NULL_RTX);
3412 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3415 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3416 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3417 tem = expand_and (mode, tem, op0, NULL_RTX);
3418 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3424 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
3427 extract_high_half (enum machine_mode mode, rtx op)
3429 enum machine_mode wider_mode;
3431 if (mode == word_mode)
3432 return gen_highpart (mode, op);
3434 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3436 wider_mode = GET_MODE_WIDER_MODE (mode);
3437 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3438 GET_MODE_BITSIZE (mode), 0, 1);
3439 return convert_modes (mode, wider_mode, op, 0);
3442 /* Like expand_mult_highpart, but only consider using a multiplication
3443 optab. OP1 is an rtx for the constant operand. */
3446 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3447 rtx target, int unsignedp, int max_cost)
3449 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3450 enum machine_mode wider_mode;
3454 bool speed = optimize_insn_for_speed_p ();
3456 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3458 wider_mode = GET_MODE_WIDER_MODE (mode);
3459 size = GET_MODE_BITSIZE (mode);
3461 /* Firstly, try using a multiplication insn that only generates the needed
3462 high part of the product, and in the sign flavor of unsignedp. */
3463 if (mul_highpart_cost[speed][mode] < max_cost)
3465 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3466 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3467 unsignedp, OPTAB_DIRECT);
3472 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3473 Need to adjust the result after the multiplication. */
3474 if (size - 1 < BITS_PER_WORD
3475 && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3476 + 4 * add_cost[speed][mode] < max_cost))
3478 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3479 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3480 unsignedp, OPTAB_DIRECT);
3482 /* We used the wrong signedness. Adjust the result. */
3483 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3487 /* Try widening multiplication. */
3488 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3489 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3490 && mul_widen_cost[speed][wider_mode] < max_cost)
3492 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3493 unsignedp, OPTAB_WIDEN);
3495 return extract_high_half (mode, tem);
3498 /* Try widening the mode and perform a non-widening multiplication. */
3499 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3500 && size - 1 < BITS_PER_WORD
3501 && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3503 rtx insns, wop0, wop1;
3505 /* We need to widen the operands, for example to ensure the
3506 constant multiplier is correctly sign or zero extended.
3507 Use a sequence to clean-up any instructions emitted by
3508 the conversions if things don't work out. */
3510 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3511 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3512 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3513 unsignedp, OPTAB_WIDEN);
3514 insns = get_insns ();
3520 return extract_high_half (mode, tem);
3524 /* Try widening multiplication of opposite signedness, and adjust. */
3525 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3526 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3527 && size - 1 < BITS_PER_WORD
3528 && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3529 + 4 * add_cost[speed][mode] < max_cost))
3531 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3532 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3535 tem = extract_high_half (mode, tem);
3536 /* We used the wrong signedness. Adjust the result. */
3537 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3545 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3546 putting the high half of the result in TARGET if that is convenient,
3547 and return where the result is. If the operation can not be performed,
3550 MODE is the mode of operation and result.
3552 UNSIGNEDP nonzero means unsigned multiply.
3554 MAX_COST is the total allowed cost for the expanded RTL. */
3557 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3558 rtx target, int unsignedp, int max_cost)
3560 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3561 unsigned HOST_WIDE_INT cnst1;
3563 bool sign_adjust = false;
3564 enum mult_variant variant;
3565 struct algorithm alg;
3567 bool speed = optimize_insn_for_speed_p ();
3569 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3570 /* We can't support modes wider than HOST_BITS_PER_INT. */
3571 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3573 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3575 /* We can't optimize modes wider than BITS_PER_WORD.
3576 ??? We might be able to perform double-word arithmetic if
3577 mode == word_mode, however all the cost calculations in
3578 synth_mult etc. assume single-word operations. */
3579 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3580 return expand_mult_highpart_optab (mode, op0, op1, target,
3581 unsignedp, max_cost);
3583 extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3585 /* Check whether we try to multiply by a negative constant. */
3586 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3589 extra_cost += add_cost[speed][mode];
3592 /* See whether shift/add multiplication is cheap enough. */
3593 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3594 max_cost - extra_cost))
3596 /* See whether the specialized multiplication optabs are
3597 cheaper than the shift/add version. */
3598 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3599 alg.cost.cost + extra_cost);
3603 tem = convert_to_mode (wider_mode, op0, unsignedp);
3604 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3605 tem = extract_high_half (mode, tem);
3607 /* Adjust result for signedness. */
3609 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3613 return expand_mult_highpart_optab (mode, op0, op1, target,
3614 unsignedp, max_cost);
3618 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3621 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3623 unsigned HOST_WIDE_INT masklow, maskhigh;
3624 rtx result, temp, shift, label;
3627 logd = floor_log2 (d);
3628 result = gen_reg_rtx (mode);
3630 /* Avoid conditional branches when they're expensive. */
3631 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3632 && optimize_insn_for_speed_p ())
3634 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3638 signmask = force_reg (mode, signmask);
3639 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3640 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3642 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3643 which instruction sequence to use. If logical right shifts
3644 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3645 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3647 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3648 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3649 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3650 > COSTS_N_INSNS (2)))
3652 temp = expand_binop (mode, xor_optab, op0, signmask,
3653 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3654 temp = expand_binop (mode, sub_optab, temp, signmask,
3655 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3656 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3657 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3658 temp = expand_binop (mode, xor_optab, temp, signmask,
3659 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3660 temp = expand_binop (mode, sub_optab, temp, signmask,
3661 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3665 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3666 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3667 signmask = force_reg (mode, signmask);
3669 temp = expand_binop (mode, add_optab, op0, signmask,
3670 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3671 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3672 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3673 temp = expand_binop (mode, sub_optab, temp, signmask,
3674 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3680 /* Mask contains the mode's signbit and the significant bits of the
3681 modulus. By including the signbit in the operation, many targets
3682 can avoid an explicit compare operation in the following comparison
3685 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3686 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3688 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3692 maskhigh = (HOST_WIDE_INT) -1
3693 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3695 temp = expand_binop (mode, and_optab, op0,
3696 immed_double_const (masklow, maskhigh, mode),
3697 result, 1, OPTAB_LIB_WIDEN);
3699 emit_move_insn (result, temp);
3701 label = gen_label_rtx ();
3702 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3704 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3705 0, OPTAB_LIB_WIDEN);
3706 masklow = (HOST_WIDE_INT) -1 << logd;
3708 temp = expand_binop (mode, ior_optab, temp,
3709 immed_double_const (masklow, maskhigh, mode),
3710 result, 1, OPTAB_LIB_WIDEN);
3711 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3712 0, OPTAB_LIB_WIDEN);
3714 emit_move_insn (result, temp);
3719 /* Expand signed division of OP0 by a power of two D in mode MODE.
3720 This routine is only called for positive values of D. */
3723 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3728 logd = floor_log2 (d);
3731 && BRANCH_COST (optimize_insn_for_speed_p (),
3734 temp = gen_reg_rtx (mode);
3735 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3736 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3737 0, OPTAB_LIB_WIDEN);
3738 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3741 #ifdef HAVE_conditional_move
3742 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3747 /* ??? emit_conditional_move forces a stack adjustment via
3748 compare_from_rtx so, if the sequence is discarded, it will
3749 be lost. Do it now instead. */
3750 do_pending_stack_adjust ();
3753 temp2 = copy_to_mode_reg (mode, op0);
3754 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3755 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3756 temp = force_reg (mode, temp);
3758 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3759 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3760 mode, temp, temp2, mode, 0);
3763 rtx seq = get_insns ();
3766 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3772 if (BRANCH_COST (optimize_insn_for_speed_p (),
3775 int ushift = GET_MODE_BITSIZE (mode) - logd;
3777 temp = gen_reg_rtx (mode);
3778 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3779 if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3780 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3781 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3783 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3784 ushift, NULL_RTX, 1);
3785 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3786 0, OPTAB_LIB_WIDEN);
3787 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3790 label = gen_label_rtx ();
3791 temp = copy_to_mode_reg (mode, op0);
3792 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3793 expand_inc (temp, GEN_INT (d - 1));
3795 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3798 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3799 if that is convenient, and returning where the result is.
3800 You may request either the quotient or the remainder as the result;
3801 specify REM_FLAG nonzero to get the remainder.
3803 CODE is the expression code for which kind of division this is;
3804 it controls how rounding is done. MODE is the machine mode to use.
3805 UNSIGNEDP nonzero means do unsigned division. */
3807 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3808 and then correct it by or'ing in missing high bits
3809 if result of ANDI is nonzero.
3810 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3811 This could optimize to a bfexts instruction.
3812 But C doesn't use these operations, so their optimizations are
3814 /* ??? For modulo, we don't actually need the highpart of the first product,
3815 the low part will do nicely. And for small divisors, the second multiply
3816 can also be a low-part only multiply or even be completely left out.
3817 E.g. to calculate the remainder of a division by 3 with a 32 bit
3818 multiply, multiply with 0x55555556 and extract the upper two bits;
3819 the result is exact for inputs up to 0x1fffffff.
3820 The input range can be reduced by using cross-sum rules.
3821 For odd divisors >= 3, the following table gives right shift counts
3822 so that if a number is shifted by an integer multiple of the given
3823 amount, the remainder stays the same:
3824 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3825 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3826 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3827 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3828 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3830 Cross-sum rules for even numbers can be derived by leaving as many bits
3831 to the right alone as the divisor has zeros to the right.
3832 E.g. if x is an unsigned 32 bit number:
3833 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3837 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3838 rtx op0, rtx op1, rtx target, int unsignedp)
3840 enum machine_mode compute_mode;
3842 rtx quotient = 0, remainder = 0;
3846 optab optab1, optab2;
3847 int op1_is_constant, op1_is_pow2 = 0;
3848 int max_cost, extra_cost;
3849 static HOST_WIDE_INT last_div_const = 0;
3850 static HOST_WIDE_INT ext_op1;
3851 bool speed = optimize_insn_for_speed_p ();
3853 op1_is_constant = CONST_INT_P (op1);
3854 if (op1_is_constant)
3856 ext_op1 = INTVAL (op1);
3858 ext_op1 &= GET_MODE_MASK (mode);
3859 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3860 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3864 This is the structure of expand_divmod:
3866 First comes code to fix up the operands so we can perform the operations
3867 correctly and efficiently.
3869 Second comes a switch statement with code specific for each rounding mode.
3870 For some special operands this code emits all RTL for the desired
3871 operation, for other cases, it generates only a quotient and stores it in
3872 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3873 to indicate that it has not done anything.
3875 Last comes code that finishes the operation. If QUOTIENT is set and
3876 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3877 QUOTIENT is not set, it is computed using trunc rounding.
3879 We try to generate special code for division and remainder when OP1 is a
3880 constant. If |OP1| = 2**n we can use shifts and some other fast
3881 operations. For other values of OP1, we compute a carefully selected
3882 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3885 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3886 half of the product. Different strategies for generating the product are
3887 implemented in expand_mult_highpart.
3889 If what we actually want is the remainder, we generate that by another
3890 by-constant multiplication and a subtraction. */
3892 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3893 code below will malfunction if we are, so check here and handle
3894 the special case if so. */
3895 if (op1 == const1_rtx)
3896 return rem_flag ? const0_rtx : op0;
3898 /* When dividing by -1, we could get an overflow.
3899 negv_optab can handle overflows. */
3900 if (! unsignedp && op1 == constm1_rtx)
3904 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3905 ? negv_optab : neg_optab, op0, target, 0);
3909 /* Don't use the function value register as a target
3910 since we have to read it as well as write it,
3911 and function-inlining gets confused by this. */
3912 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3913 /* Don't clobber an operand while doing a multi-step calculation. */
3914 || ((rem_flag || op1_is_constant)
3915 && (reg_mentioned_p (target, op0)
3916 || (MEM_P (op0) && MEM_P (target))))
3917 || reg_mentioned_p (target, op1)
3918 || (MEM_P (op1) && MEM_P (target))))
3921 /* Get the mode in which to perform this computation. Normally it will
3922 be MODE, but sometimes we can't do the desired operation in MODE.
3923 If so, pick a wider mode in which we can do the operation. Convert
3924 to that mode at the start to avoid repeated conversions.
3926 First see what operations we need. These depend on the expression
3927 we are evaluating. (We assume that divxx3 insns exist under the
3928 same conditions that modxx3 insns and that these insns don't normally
3929 fail. If these assumptions are not correct, we may generate less
3930 efficient code in some cases.)
3932 Then see if we find a mode in which we can open-code that operation
3933 (either a division, modulus, or shift). Finally, check for the smallest
3934 mode for which we can do the operation with a library call. */
3936 /* We might want to refine this now that we have division-by-constant
3937 optimization. Since expand_mult_highpart tries so many variants, it is
3938 not straightforward to generalize this. Maybe we should make an array
3939 of possible modes in init_expmed? Save this for GCC 2.7. */
3941 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3942 ? (unsignedp ? lshr_optab : ashr_optab)
3943 : (unsignedp ? udiv_optab : sdiv_optab));
3944 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3946 : (unsignedp ? udivmod_optab : sdivmod_optab));
3948 for (compute_mode = mode; compute_mode != VOIDmode;
3949 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3950 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3951 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3954 if (compute_mode == VOIDmode)
3955 for (compute_mode = mode; compute_mode != VOIDmode;
3956 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3957 if (optab_libfunc (optab1, compute_mode)
3958 || optab_libfunc (optab2, compute_mode))
3961 /* If we still couldn't find a mode, use MODE, but expand_binop will
3963 if (compute_mode == VOIDmode)
3964 compute_mode = mode;
3966 if (target && GET_MODE (target) == compute_mode)
3969 tquotient = gen_reg_rtx (compute_mode);
3971 size = GET_MODE_BITSIZE (compute_mode);
3973 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3974 (mode), and thereby get better code when OP1 is a constant. Do that
3975 later. It will require going over all usages of SIZE below. */
3976 size = GET_MODE_BITSIZE (mode);
3979 /* Only deduct something for a REM if the last divide done was
3980 for a different constant. Then set the constant of the last
3982 max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3983 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3984 && INTVAL (op1) == last_div_const))
3985 max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3987 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3989 /* Now convert to the best mode to use. */
3990 if (compute_mode != mode)
3992 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3993 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3995 /* convert_modes may have placed op1 into a register, so we
3996 must recompute the following. */
3997 op1_is_constant = CONST_INT_P (op1);
3998 op1_is_pow2 = (op1_is_constant
3999 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4001 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4004 /* If one of the operands is a volatile MEM, copy it into a register. */
4006 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4007 op0 = force_reg (compute_mode, op0);
4008 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4009 op1 = force_reg (compute_mode, op1);
4011 /* If we need the remainder or if OP1 is constant, we need to
4012 put OP0 in a register in case it has any queued subexpressions. */
4013 if (rem_flag || op1_is_constant)
4014 op0 = force_reg (compute_mode, op0);
4016 last = get_last_insn ();
4018 /* Promote floor rounding to trunc rounding for unsigned operations. */
4021 if (code == FLOOR_DIV_EXPR)
4022 code = TRUNC_DIV_EXPR;
4023 if (code == FLOOR_MOD_EXPR)
4024 code = TRUNC_MOD_EXPR;
4025 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4026 code = TRUNC_DIV_EXPR;
4029 if (op1 != const0_rtx)
4032 case TRUNC_MOD_EXPR:
4033 case TRUNC_DIV_EXPR:
4034 if (op1_is_constant)
4038 unsigned HOST_WIDE_INT mh;
4039 int pre_shift, post_shift;
4042 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4043 & GET_MODE_MASK (compute_mode));
4045 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4047 pre_shift = floor_log2 (d);
4051 = expand_binop (compute_mode, and_optab, op0,
4052 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4056 return gen_lowpart (mode, remainder);
4058 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4059 pre_shift, tquotient, 1);
4061 else if (size <= HOST_BITS_PER_WIDE_INT)
4063 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4065 /* Most significant bit of divisor is set; emit an scc
4067 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4068 compute_mode, 1, 1);
4072 /* Find a suitable multiplier and right shift count
4073 instead of multiplying with D. */
4075 mh = choose_multiplier (d, size, size,
4076 &ml, &post_shift, &dummy);
4078 /* If the suggested multiplier is more than SIZE bits,
4079 we can do better for even divisors, using an
4080 initial right shift. */
4081 if (mh != 0 && (d & 1) == 0)
4083 pre_shift = floor_log2 (d & -d);
4084 mh = choose_multiplier (d >> pre_shift, size,
4086 &ml, &post_shift, &dummy);
4096 if (post_shift - 1 >= BITS_PER_WORD)
4100 = (shift_cost[speed][compute_mode][post_shift - 1]
4101 + shift_cost[speed][compute_mode][1]
4102 + 2 * add_cost[speed][compute_mode]);
4103 t1 = expand_mult_highpart (compute_mode, op0, ml,
4105 max_cost - extra_cost);
4108 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4111 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4112 t2, 1, NULL_RTX, 1);
4113 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4116 quotient = expand_shift
4117 (RSHIFT_EXPR, compute_mode, t4,
4118 post_shift - 1, tquotient, 1);
4124 if (pre_shift >= BITS_PER_WORD
4125 || post_shift >= BITS_PER_WORD)
4129 (RSHIFT_EXPR, compute_mode, op0,
4130 pre_shift, NULL_RTX, 1);
4132 = (shift_cost[speed][compute_mode][pre_shift]
4133 + shift_cost[speed][compute_mode][post_shift]);
4134 t2 = expand_mult_highpart (compute_mode, t1, ml,
4136 max_cost - extra_cost);
4139 quotient = expand_shift
4140 (RSHIFT_EXPR, compute_mode, t2,
4141 post_shift, tquotient, 1);
4145 else /* Too wide mode to use tricky code */
4148 insn = get_last_insn ();
4150 set_dst_reg_note (insn, REG_EQUAL,
4151 gen_rtx_UDIV (compute_mode, op0, op1),
4154 else /* TRUNC_DIV, signed */
4156 unsigned HOST_WIDE_INT ml;
4157 int lgup, post_shift;
4159 HOST_WIDE_INT d = INTVAL (op1);
4160 unsigned HOST_WIDE_INT abs_d;
4162 /* Since d might be INT_MIN, we have to cast to
4163 unsigned HOST_WIDE_INT before negating to avoid
4164 undefined signed overflow. */
4166 ? (unsigned HOST_WIDE_INT) d
4167 : - (unsigned HOST_WIDE_INT) d);
4169 /* n rem d = n rem -d */
4170 if (rem_flag && d < 0)
4173 op1 = gen_int_mode (abs_d, compute_mode);
4179 quotient = expand_unop (compute_mode, neg_optab, op0,
4181 else if (HOST_BITS_PER_WIDE_INT >= size
4182 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4184 /* This case is not handled correctly below. */
4185 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4186 compute_mode, 1, 1);
4190 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4191 && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4192 : sdiv_pow2_cheap[speed][compute_mode])
4193 /* We assume that cheap metric is true if the
4194 optab has an expander for this mode. */
4195 && ((optab_handler ((rem_flag ? smod_optab
4198 != CODE_FOR_nothing)
4199 || (optab_handler (sdivmod_optab,
4201 != CODE_FOR_nothing)))
4203 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4207 remainder = expand_smod_pow2 (compute_mode, op0, d);
4209 return gen_lowpart (mode, remainder);
4212 if (sdiv_pow2_cheap[speed][compute_mode]
4213 && ((optab_handler (sdiv_optab, compute_mode)
4214 != CODE_FOR_nothing)
4215 || (optab_handler (sdivmod_optab, compute_mode)
4216 != CODE_FOR_nothing)))
4217 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4219 gen_int_mode (abs_d,
4223 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4225 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4226 negate the quotient. */
4229 insn = get_last_insn ();
4231 && abs_d < ((unsigned HOST_WIDE_INT) 1
4232 << (HOST_BITS_PER_WIDE_INT - 1)))
4233 set_dst_reg_note (insn, REG_EQUAL,
4234 gen_rtx_DIV (compute_mode, op0,
4240 quotient = expand_unop (compute_mode, neg_optab,
4241 quotient, quotient, 0);
4244 else if (size <= HOST_BITS_PER_WIDE_INT)
4246 choose_multiplier (abs_d, size, size - 1,
4247 &mlr, &post_shift, &lgup);
4248 ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4249 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4253 if (post_shift >= BITS_PER_WORD
4254 || size - 1 >= BITS_PER_WORD)
4257 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4258 + shift_cost[speed][compute_mode][size - 1]
4259 + add_cost[speed][compute_mode]);
4260 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4262 max_cost - extra_cost);
4266 (RSHIFT_EXPR, compute_mode, t1,
4267 post_shift, NULL_RTX, 0);
4269 (RSHIFT_EXPR, compute_mode, op0,
4270 size - 1, NULL_RTX, 0);
4273 = force_operand (gen_rtx_MINUS (compute_mode,
4278 = force_operand (gen_rtx_MINUS (compute_mode,
4286 if (post_shift >= BITS_PER_WORD
4287 || size - 1 >= BITS_PER_WORD)
4290 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4291 mlr = gen_int_mode (ml, compute_mode);
4292 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4293 + shift_cost[speed][compute_mode][size - 1]
4294 + 2 * add_cost[speed][compute_mode]);
4295 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4297 max_cost - extra_cost);
4300 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4304 (RSHIFT_EXPR, compute_mode, t2,
4305 post_shift, NULL_RTX, 0);
4307 (RSHIFT_EXPR, compute_mode, op0,
4308 size - 1, NULL_RTX, 0);
4311 = force_operand (gen_rtx_MINUS (compute_mode,
4316 = force_operand (gen_rtx_MINUS (compute_mode,
4321 else /* Too wide mode to use tricky code */
4324 insn = get_last_insn ();
4326 set_dst_reg_note (insn, REG_EQUAL,
4327 gen_rtx_DIV (compute_mode, op0, op1),
4333 delete_insns_since (last);
4336 case FLOOR_DIV_EXPR:
4337 case FLOOR_MOD_EXPR:
4338 /* We will come here only for signed operations. */
4339 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4341 unsigned HOST_WIDE_INT mh;
4342 int pre_shift, lgup, post_shift;
4343 HOST_WIDE_INT d = INTVAL (op1);
4348 /* We could just as easily deal with negative constants here,
4349 but it does not seem worth the trouble for GCC 2.6. */
4350 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4352 pre_shift = floor_log2 (d);
4355 remainder = expand_binop (compute_mode, and_optab, op0,
4356 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4357 remainder, 0, OPTAB_LIB_WIDEN);
4359 return gen_lowpart (mode, remainder);
4361 quotient = expand_shift
4362 (RSHIFT_EXPR, compute_mode, op0,
4363 pre_shift, tquotient, 0);
4369 mh = choose_multiplier (d, size, size - 1,
4370 &ml, &post_shift, &lgup);
4373 if (post_shift < BITS_PER_WORD
4374 && size - 1 < BITS_PER_WORD)
4377 (RSHIFT_EXPR, compute_mode, op0,
4378 size - 1, NULL_RTX, 0);
4379 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4380 NULL_RTX, 0, OPTAB_WIDEN);
4381 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4382 + shift_cost[speed][compute_mode][size - 1]
4383 + 2 * add_cost[speed][compute_mode]);
4384 t3 = expand_mult_highpart (compute_mode, t2, ml,
4386 max_cost - extra_cost);
4390 (RSHIFT_EXPR, compute_mode, t3,
4391 post_shift, NULL_RTX, 1);
4392 quotient = expand_binop (compute_mode, xor_optab,
4393 t4, t1, tquotient, 0,
4401 rtx nsign, t1, t2, t3, t4;
4402 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4403 op0, constm1_rtx), NULL_RTX);
4404 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4406 nsign = expand_shift
4407 (RSHIFT_EXPR, compute_mode, t2,
4408 size - 1, NULL_RTX, 0);
4409 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4411 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4416 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4418 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4427 delete_insns_since (last);
4429 /* Try using an instruction that produces both the quotient and
4430 remainder, using truncation. We can easily compensate the quotient
4431 or remainder to get floor rounding, once we have the remainder.
4432 Notice that we compute also the final remainder value here,
4433 and return the result right away. */
4434 if (target == 0 || GET_MODE (target) != compute_mode)
4435 target = gen_reg_rtx (compute_mode);
4440 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4441 quotient = gen_reg_rtx (compute_mode);
4446 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4447 remainder = gen_reg_rtx (compute_mode);
4450 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4451 quotient, remainder, 0))
4453 /* This could be computed with a branch-less sequence.
4454 Save that for later. */
4456 rtx label = gen_label_rtx ();
4457 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4458 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4459 NULL_RTX, 0, OPTAB_WIDEN);
4460 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4461 expand_dec (quotient, const1_rtx);
4462 expand_inc (remainder, op1);
4464 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4467 /* No luck with division elimination or divmod. Have to do it
4468 by conditionally adjusting op0 *and* the result. */
4470 rtx label1, label2, label3, label4, label5;
4474 quotient = gen_reg_rtx (compute_mode);
4475 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4476 label1 = gen_label_rtx ();
4477 label2 = gen_label_rtx ();
4478 label3 = gen_label_rtx ();
4479 label4 = gen_label_rtx ();
4480 label5 = gen_label_rtx ();
4481 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4482 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4483 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4484 quotient, 0, OPTAB_LIB_WIDEN);
4485 if (tem != quotient)
4486 emit_move_insn (quotient, tem);
4487 emit_jump_insn (gen_jump (label5));
4489 emit_label (label1);
4490 expand_inc (adjusted_op0, const1_rtx);
4491 emit_jump_insn (gen_jump (label4));
4493 emit_label (label2);
4494 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4495 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4496 quotient, 0, OPTAB_LIB_WIDEN);
4497 if (tem != quotient)
4498 emit_move_insn (quotient, tem);
4499 emit_jump_insn (gen_jump (label5));
4501 emit_label (label3);
4502 expand_dec (adjusted_op0, const1_rtx);
4503 emit_label (label4);
4504 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4505 quotient, 0, OPTAB_LIB_WIDEN);
4506 if (tem != quotient)
4507 emit_move_insn (quotient, tem);
4508 expand_dec (quotient, const1_rtx);
4509 emit_label (label5);
4517 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4520 unsigned HOST_WIDE_INT d = INTVAL (op1);
4521 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4522 floor_log2 (d), tquotient, 1);
4523 t2 = expand_binop (compute_mode, and_optab, op0,
4525 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4526 t3 = gen_reg_rtx (compute_mode);
4527 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4528 compute_mode, 1, 1);
4532 lab = gen_label_rtx ();
4533 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4534 expand_inc (t1, const1_rtx);
4539 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4545 /* Try using an instruction that produces both the quotient and
4546 remainder, using truncation. We can easily compensate the
4547 quotient or remainder to get ceiling rounding, once we have the
4548 remainder. Notice that we compute also the final remainder
4549 value here, and return the result right away. */
4550 if (target == 0 || GET_MODE (target) != compute_mode)
4551 target = gen_reg_rtx (compute_mode);
4555 remainder = (REG_P (target)
4556 ? target : gen_reg_rtx (compute_mode));
4557 quotient = gen_reg_rtx (compute_mode);
4561 quotient = (REG_P (target)
4562 ? target : gen_reg_rtx (compute_mode));
4563 remainder = gen_reg_rtx (compute_mode);
4566 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4569 /* This could be computed with a branch-less sequence.
4570 Save that for later. */
4571 rtx label = gen_label_rtx ();
4572 do_cmp_and_jump (remainder, const0_rtx, EQ,
4573 compute_mode, label);
4574 expand_inc (quotient, const1_rtx);
4575 expand_dec (remainder, op1);
4577 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4580 /* No luck with division elimination or divmod. Have to do it
4581 by conditionally adjusting op0 *and* the result. */
4584 rtx adjusted_op0, tem;
4586 quotient = gen_reg_rtx (compute_mode);
4587 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4588 label1 = gen_label_rtx ();
4589 label2 = gen_label_rtx ();
4590 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4591 compute_mode, label1);
4592 emit_move_insn (quotient, const0_rtx);
4593 emit_jump_insn (gen_jump (label2));
4595 emit_label (label1);
4596 expand_dec (adjusted_op0, const1_rtx);
4597 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4598 quotient, 1, OPTAB_LIB_WIDEN);
4599 if (tem != quotient)
4600 emit_move_insn (quotient, tem);
4601 expand_inc (quotient, const1_rtx);
4602 emit_label (label2);
4607 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4608 && INTVAL (op1) >= 0)
4610 /* This is extremely similar to the code for the unsigned case
4611 above. For 2.7 we should merge these variants, but for
4612 2.6.1 I don't want to touch the code for unsigned since that
4613 get used in C. The signed case will only be used by other
4617 unsigned HOST_WIDE_INT d = INTVAL (op1);
4618 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4619 floor_log2 (d), tquotient, 0);
4620 t2 = expand_binop (compute_mode, and_optab, op0,
4622 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4623 t3 = gen_reg_rtx (compute_mode);
4624 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4625 compute_mode, 1, 1);
4629 lab = gen_label_rtx ();
4630 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4631 expand_inc (t1, const1_rtx);
4636 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4642 /* Try using an instruction that produces both the quotient and
4643 remainder, using truncation. We can easily compensate the
4644 quotient or remainder to get ceiling rounding, once we have the
4645 remainder. Notice that we compute also the final remainder
4646 value here, and return the result right away. */
4647 if (target == 0 || GET_MODE (target) != compute_mode)
4648 target = gen_reg_rtx (compute_mode);
4651 remainder= (REG_P (target)
4652 ? target : gen_reg_rtx (compute_mode));
4653 quotient = gen_reg_rtx (compute_mode);
4657 quotient = (REG_P (target)
4658 ? target : gen_reg_rtx (compute_mode));
4659 remainder = gen_reg_rtx (compute_mode);
4662 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4665 /* This could be computed with a branch-less sequence.
4666 Save that for later. */
4668 rtx label = gen_label_rtx ();
4669 do_cmp_and_jump (remainder, const0_rtx, EQ,
4670 compute_mode, label);
4671 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4672 NULL_RTX, 0, OPTAB_WIDEN);
4673 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4674 expand_inc (quotient, const1_rtx);
4675 expand_dec (remainder, op1);
4677 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4680 /* No luck with division elimination or divmod. Have to do it
4681 by conditionally adjusting op0 *and* the result. */
4683 rtx label1, label2, label3, label4, label5;
4687 quotient = gen_reg_rtx (compute_mode);
4688 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4689 label1 = gen_label_rtx ();
4690 label2 = gen_label_rtx ();
4691 label3 = gen_label_rtx ();
4692 label4 = gen_label_rtx ();
4693 label5 = gen_label_rtx ();
4694 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4695 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4696 compute_mode, label1);
4697 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4698 quotient, 0, OPTAB_LIB_WIDEN);
4699 if (tem != quotient)
4700 emit_move_insn (quotient, tem);
4701 emit_jump_insn (gen_jump (label5));
4703 emit_label (label1);
4704 expand_dec (adjusted_op0, const1_rtx);
4705 emit_jump_insn (gen_jump (label4));
4707 emit_label (label2);
4708 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4709 compute_mode, label3);
4710 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4711 quotient, 0, OPTAB_LIB_WIDEN);
4712 if (tem != quotient)
4713 emit_move_insn (quotient, tem);
4714 emit_jump_insn (gen_jump (label5));
4716 emit_label (label3);
4717 expand_inc (adjusted_op0, const1_rtx);
4718 emit_label (label4);
4719 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4720 quotient, 0, OPTAB_LIB_WIDEN);
4721 if (tem != quotient)
4722 emit_move_insn (quotient, tem);
4723 expand_inc (quotient, const1_rtx);
4724 emit_label (label5);
4729 case EXACT_DIV_EXPR:
4730 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4732 HOST_WIDE_INT d = INTVAL (op1);
4733 unsigned HOST_WIDE_INT ml;
4737 pre_shift = floor_log2 (d & -d);
4738 ml = invert_mod2n (d >> pre_shift, size);
4739 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4740 pre_shift, NULL_RTX, unsignedp);
4741 quotient = expand_mult (compute_mode, t1,
4742 gen_int_mode (ml, compute_mode),
4745 insn = get_last_insn ();
4746 set_dst_reg_note (insn, REG_EQUAL,
4747 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4748 compute_mode, op0, op1),
4753 case ROUND_DIV_EXPR:
4754 case ROUND_MOD_EXPR:
4759 label = gen_label_rtx ();
4760 quotient = gen_reg_rtx (compute_mode);
4761 remainder = gen_reg_rtx (compute_mode);
4762 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4765 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4766 quotient, 1, OPTAB_LIB_WIDEN);
4767 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4768 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4769 remainder, 1, OPTAB_LIB_WIDEN);
4771 tem = plus_constant (op1, -1);
4772 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4773 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4774 expand_inc (quotient, const1_rtx);
4775 expand_dec (remainder, op1);
4780 rtx abs_rem, abs_op1, tem, mask;
4782 label = gen_label_rtx ();
4783 quotient = gen_reg_rtx (compute_mode);
4784 remainder = gen_reg_rtx (compute_mode);
4785 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4788 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4789 quotient, 0, OPTAB_LIB_WIDEN);
4790 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4791 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4792 remainder, 0, OPTAB_LIB_WIDEN);
4794 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4795 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4796 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4798 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4799 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4800 NULL_RTX, 0, OPTAB_WIDEN);
4801 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4802 size - 1, NULL_RTX, 0);
4803 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4804 NULL_RTX, 0, OPTAB_WIDEN);
4805 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4806 NULL_RTX, 0, OPTAB_WIDEN);
4807 expand_inc (quotient, tem);
4808 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4809 NULL_RTX, 0, OPTAB_WIDEN);
4810 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4811 NULL_RTX, 0, OPTAB_WIDEN);
4812 expand_dec (remainder, tem);
4815 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4823 if (target && GET_MODE (target) != compute_mode)
4828 /* Try to produce the remainder without producing the quotient.
4829 If we seem to have a divmod pattern that does not require widening,
4830 don't try widening here. We should really have a WIDEN argument
4831 to expand_twoval_binop, since what we'd really like to do here is
4832 1) try a mod insn in compute_mode
4833 2) try a divmod insn in compute_mode
4834 3) try a div insn in compute_mode and multiply-subtract to get
4836 4) try the same things with widening allowed. */
4838 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4841 ((optab_handler (optab2, compute_mode)
4842 != CODE_FOR_nothing)
4843 ? OPTAB_DIRECT : OPTAB_WIDEN));
4846 /* No luck there. Can we do remainder and divide at once
4847 without a library call? */
4848 remainder = gen_reg_rtx (compute_mode);
4849 if (! expand_twoval_binop ((unsignedp
4853 NULL_RTX, remainder, unsignedp))
4858 return gen_lowpart (mode, remainder);
4861 /* Produce the quotient. Try a quotient insn, but not a library call.
4862 If we have a divmod in this mode, use it in preference to widening
4863 the div (for this test we assume it will not fail). Note that optab2
4864 is set to the one of the two optabs that the call below will use. */
4866 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4867 op0, op1, rem_flag ? NULL_RTX : target,
4869 ((optab_handler (optab2, compute_mode)
4870 != CODE_FOR_nothing)
4871 ? OPTAB_DIRECT : OPTAB_WIDEN));
4875 /* No luck there. Try a quotient-and-remainder insn,
4876 keeping the quotient alone. */
4877 quotient = gen_reg_rtx (compute_mode);
4878 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4880 quotient, NULL_RTX, unsignedp))
4884 /* Still no luck. If we are not computing the remainder,
4885 use a library call for the quotient. */
4886 quotient = sign_expand_binop (compute_mode,
4887 udiv_optab, sdiv_optab,
4889 unsignedp, OPTAB_LIB_WIDEN);
4896 if (target && GET_MODE (target) != compute_mode)
4901 /* No divide instruction either. Use library for remainder. */
4902 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4904 unsignedp, OPTAB_LIB_WIDEN);
4905 /* No remainder function. Try a quotient-and-remainder
4906 function, keeping the remainder. */
4909 remainder = gen_reg_rtx (compute_mode);
4910 if (!expand_twoval_binop_libfunc
4911 (unsignedp ? udivmod_optab : sdivmod_optab,
4913 NULL_RTX, remainder,
4914 unsignedp ? UMOD : MOD))
4915 remainder = NULL_RTX;
4920 /* We divided. Now finish doing X - Y * (X / Y). */
4921 remainder = expand_mult (compute_mode, quotient, op1,
4922 NULL_RTX, unsignedp);
4923 remainder = expand_binop (compute_mode, sub_optab, op0,
4924 remainder, target, unsignedp,
4929 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4932 /* Return a tree node with data type TYPE, describing the value of X.
4933 Usually this is an VAR_DECL, if there is no obvious better choice.
4934 X may be an expression, however we only support those expressions
4935 generated by loop.c. */
4938 make_tree (tree type, rtx x)
4942 switch (GET_CODE (x))
4946 HOST_WIDE_INT hi = 0;
4949 && !(TYPE_UNSIGNED (type)
4950 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4951 < HOST_BITS_PER_WIDE_INT)))
4954 t = build_int_cst_wide (type, INTVAL (x), hi);
4960 if (GET_MODE (x) == VOIDmode)
4961 t = build_int_cst_wide (type,
4962 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4967 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4968 t = build_real (type, d);
4975 int units = CONST_VECTOR_NUNITS (x);
4976 tree itype = TREE_TYPE (type);
4981 /* Build a tree with vector elements. */
4982 for (i = units - 1; i >= 0; --i)
4984 rtx elt = CONST_VECTOR_ELT (x, i);
4985 t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4988 return build_vector (type, t);
4992 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4993 make_tree (type, XEXP (x, 1)));
4996 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4997 make_tree (type, XEXP (x, 1)));
5000 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5003 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5004 make_tree (type, XEXP (x, 1)));
5007 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5008 make_tree (type, XEXP (x, 1)));
5011 t = unsigned_type_for (type);
5012 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5013 make_tree (t, XEXP (x, 0)),
5014 make_tree (type, XEXP (x, 1))));
5017 t = signed_type_for (type);
5018 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5019 make_tree (t, XEXP (x, 0)),
5020 make_tree (type, XEXP (x, 1))));
5023 if (TREE_CODE (type) != REAL_TYPE)
5024 t = signed_type_for (type);
5028 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5029 make_tree (t, XEXP (x, 0)),
5030 make_tree (t, XEXP (x, 1))));
5032 t = unsigned_type_for (type);
5033 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5034 make_tree (t, XEXP (x, 0)),
5035 make_tree (t, XEXP (x, 1))));
5039 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5040 GET_CODE (x) == ZERO_EXTEND);
5041 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5044 return make_tree (type, XEXP (x, 0));
5047 t = SYMBOL_REF_DECL (x);
5049 return fold_convert (type, build_fold_addr_expr (t));
5050 /* else fall through. */
5053 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5055 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5056 address mode to pointer mode. */
5057 if (POINTER_TYPE_P (type))
5058 x = convert_memory_address_addr_space
5059 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5061 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5062 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5063 t->decl_with_rtl.rtl = x;
5069 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5070 and returning TARGET.
5072 If TARGET is 0, a pseudo-register or constant is returned. */
5075 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5079 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5080 tem = simplify_binary_operation (AND, mode, op0, op1);
5082 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5086 else if (tem != target)
5087 emit_move_insn (target, tem);
5091 /* Helper function for emit_store_flag. */
5093 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5094 enum machine_mode mode, enum machine_mode compare_mode,
5095 int unsignedp, rtx x, rtx y, int normalizep,
5096 enum machine_mode target_mode)
5098 struct expand_operand ops[4];
5099 rtx op0, last, comparison, subtarget;
5100 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5102 last = get_last_insn ();
5103 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5104 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5107 delete_insns_since (last);
5111 if (target_mode == VOIDmode)
5112 target_mode = result_mode;
5114 target = gen_reg_rtx (target_mode);
5116 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5118 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5119 create_fixed_operand (&ops[1], comparison);
5120 create_fixed_operand (&ops[2], x);
5121 create_fixed_operand (&ops[3], y);
5122 if (!maybe_expand_insn (icode, 4, ops))
5124 delete_insns_since (last);
5127 subtarget = ops[0].value;
5129 /* If we are converting to a wider mode, first convert to
5130 TARGET_MODE, then normalize. This produces better combining
5131 opportunities on machines that have a SIGN_EXTRACT when we are
5132 testing a single bit. This mostly benefits the 68k.
5134 If STORE_FLAG_VALUE does not have the sign bit set when
5135 interpreted in MODE, we can do this conversion as unsigned, which
5136 is usually more efficient. */
5137 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5139 convert_move (target, subtarget,
5140 val_signbit_known_clear_p (result_mode,
5143 result_mode = target_mode;
5148 /* If we want to keep subexpressions around, don't reuse our last
5153 /* Now normalize to the proper value in MODE. Sometimes we don't
5154 have to do anything. */
5155 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5157 /* STORE_FLAG_VALUE might be the most negative number, so write
5158 the comparison this way to avoid a compiler-time warning. */
5159 else if (- normalizep == STORE_FLAG_VALUE)
5160 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5162 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5163 it hard to use a value of just the sign bit due to ANSI integer
5164 constant typing rules. */
5165 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5166 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5167 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5171 gcc_assert (STORE_FLAG_VALUE & 1);
5173 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5174 if (normalizep == -1)
5175 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5178 /* If we were converting to a smaller mode, do the conversion now. */
5179 if (target_mode != result_mode)
5181 convert_move (target, op0, 0);
5189 /* A subroutine of emit_store_flag only including "tricks" that do not
5190 need a recursive call. These are kept separate to avoid infinite
5194 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5195 enum machine_mode mode, int unsignedp, int normalizep,
5196 enum machine_mode target_mode)
5199 enum insn_code icode;
5200 enum machine_mode compare_mode;
5201 enum mode_class mclass;
5202 enum rtx_code scode;
5206 code = unsigned_condition (code);
5207 scode = swap_condition (code);
5209 /* If one operand is constant, make it the second one. Only do this
5210 if the other operand is not constant as well. */
5212 if (swap_commutative_operands_p (op0, op1))
5217 code = swap_condition (code);
5220 if (mode == VOIDmode)
5221 mode = GET_MODE (op0);
5223 /* For some comparisons with 1 and -1, we can convert this to
5224 comparisons with zero. This will often produce more opportunities for
5225 store-flag insns. */
5230 if (op1 == const1_rtx)
5231 op1 = const0_rtx, code = LE;
5234 if (op1 == constm1_rtx)
5235 op1 = const0_rtx, code = LT;
5238 if (op1 == const1_rtx)
5239 op1 = const0_rtx, code = GT;
5242 if (op1 == constm1_rtx)
5243 op1 = const0_rtx, code = GE;
5246 if (op1 == const1_rtx)
5247 op1 = const0_rtx, code = NE;
5250 if (op1 == const1_rtx)
5251 op1 = const0_rtx, code = EQ;
5257 /* If we are comparing a double-word integer with zero or -1, we can
5258 convert the comparison into one involving a single word. */
5259 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5260 && GET_MODE_CLASS (mode) == MODE_INT
5261 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5263 if ((code == EQ || code == NE)
5264 && (op1 == const0_rtx || op1 == constm1_rtx))
5268 /* Do a logical OR or AND of the two words and compare the
5270 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5271 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5272 tem = expand_binop (word_mode,
5273 op1 == const0_rtx ? ior_optab : and_optab,
5274 op00, op01, NULL_RTX, unsignedp,
5278 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5279 unsignedp, normalizep);
5281 else if ((code == LT || code == GE) && op1 == const0_rtx)
5285 /* If testing the sign bit, can just test on high word. */
5286 op0h = simplify_gen_subreg (word_mode, op0, mode,
5287 subreg_highpart_offset (word_mode,
5289 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5290 unsignedp, normalizep);
5297 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5300 target = gen_reg_rtx (target_mode);
5302 convert_move (target, tem,
5303 !val_signbit_known_set_p (word_mode,
5304 (normalizep ? normalizep
5305 : STORE_FLAG_VALUE)));
5310 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5311 complement of A (for GE) and shifting the sign bit to the low bit. */
5312 if (op1 == const0_rtx && (code == LT || code == GE)
5313 && GET_MODE_CLASS (mode) == MODE_INT
5314 && (normalizep || STORE_FLAG_VALUE == 1
5315 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5322 /* If the result is to be wider than OP0, it is best to convert it
5323 first. If it is to be narrower, it is *incorrect* to convert it
5325 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5327 op0 = convert_modes (target_mode, mode, op0, 0);
5331 if (target_mode != mode)
5335 op0 = expand_unop (mode, one_cmpl_optab, op0,
5336 ((STORE_FLAG_VALUE == 1 || normalizep)
5337 ? 0 : subtarget), 0);
5339 if (STORE_FLAG_VALUE == 1 || normalizep)
5340 /* If we are supposed to produce a 0/1 value, we want to do
5341 a logical shift from the sign bit to the low-order bit; for
5342 a -1/0 value, we do an arithmetic shift. */
5343 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5344 GET_MODE_BITSIZE (mode) - 1,
5345 subtarget, normalizep != -1);
5347 if (mode != target_mode)
5348 op0 = convert_modes (target_mode, mode, op0, 0);
5353 mclass = GET_MODE_CLASS (mode);
5354 for (compare_mode = mode; compare_mode != VOIDmode;
5355 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5357 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5358 icode = optab_handler (cstore_optab, optab_mode);
5359 if (icode != CODE_FOR_nothing)
5361 do_pending_stack_adjust ();
5362 tem = emit_cstore (target, icode, code, mode, compare_mode,
5363 unsignedp, op0, op1, normalizep, target_mode);
5367 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5369 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5370 unsignedp, op1, op0, normalizep, target_mode);
5381 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5382 and storing in TARGET. Normally return TARGET.
5383 Return 0 if that cannot be done.
5385 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5386 it is VOIDmode, they cannot both be CONST_INT.
5388 UNSIGNEDP is for the case where we have to widen the operands
5389 to perform the operation. It says to use zero-extension.
5391 NORMALIZEP is 1 if we should convert the result to be either zero
5392 or one. Normalize is -1 if we should convert the result to be
5393 either zero or -1. If NORMALIZEP is zero, the result will be left
5394 "raw" out of the scc insn. */
5397 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5398 enum machine_mode mode, int unsignedp, int normalizep)
5400 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5401 enum rtx_code rcode;
5403 rtx tem, last, trueval;
5405 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5410 /* If we reached here, we can't do this with a scc insn, however there
5411 are some comparisons that can be done in other ways. Don't do any
5412 of these cases if branches are very cheap. */
5413 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5416 /* See what we need to return. We can only return a 1, -1, or the
5419 if (normalizep == 0)
5421 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5422 normalizep = STORE_FLAG_VALUE;
5424 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5430 last = get_last_insn ();
5432 /* If optimizing, use different pseudo registers for each insn, instead
5433 of reusing the same pseudo. This leads to better CSE, but slows
5434 down the compiler, since there are more pseudos */
5435 subtarget = (!optimize
5436 && (target_mode == mode)) ? target : NULL_RTX;
5437 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5439 /* For floating-point comparisons, try the reverse comparison or try
5440 changing the "orderedness" of the comparison. */
5441 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5443 enum rtx_code first_code;
5446 rcode = reverse_condition_maybe_unordered (code);
5447 if (can_compare_p (rcode, mode, ccp_store_flag)
5448 && (code == ORDERED || code == UNORDERED
5449 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5450 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5452 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5453 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5455 /* For the reverse comparison, use either an addition or a XOR. */
5457 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5458 optimize_insn_for_speed_p ()) == 0)
5460 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5461 STORE_FLAG_VALUE, target_mode);
5463 return expand_binop (target_mode, add_optab, tem,
5464 GEN_INT (normalizep),
5465 target, 0, OPTAB_WIDEN);
5468 && rtx_cost (trueval, XOR, 1,
5469 optimize_insn_for_speed_p ()) == 0)
5471 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5472 normalizep, target_mode);
5474 return expand_binop (target_mode, xor_optab, tem, trueval,
5475 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5479 delete_insns_since (last);
5481 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5482 if (code == ORDERED || code == UNORDERED)
5485 and_them = split_comparison (code, mode, &first_code, &code);
5487 /* If there are no NaNs, the first comparison should always fall through.
5488 Effectively change the comparison to the other one. */
5489 if (!HONOR_NANS (mode))
5491 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5492 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5496 #ifdef HAVE_conditional_move
5497 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5498 conditional move. */
5499 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5500 normalizep, target_mode);
5505 tem = emit_conditional_move (target, code, op0, op1, mode,
5506 tem, const0_rtx, GET_MODE (tem), 0);
5508 tem = emit_conditional_move (target, code, op0, op1, mode,
5509 trueval, tem, GET_MODE (tem), 0);
5512 delete_insns_since (last);
5519 /* The remaining tricks only apply to integer comparisons. */
5521 if (GET_MODE_CLASS (mode) != MODE_INT)
5524 /* If this is an equality comparison of integers, we can try to exclusive-or
5525 (or subtract) the two operands and use a recursive call to try the
5526 comparison with zero. Don't do any of these cases if branches are
5529 if ((code == EQ || code == NE) && op1 != const0_rtx)
5531 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5535 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5538 tem = emit_store_flag (target, code, tem, const0_rtx,
5539 mode, unsignedp, normalizep);
5543 delete_insns_since (last);
5546 /* For integer comparisons, try the reverse comparison. However, for
5547 small X and if we'd have anyway to extend, implementing "X != 0"
5548 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5549 rcode = reverse_condition (code);
5550 if (can_compare_p (rcode, mode, ccp_store_flag)
5551 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5553 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5554 && op1 == const0_rtx))
5556 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5557 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5559 /* Again, for the reverse comparison, use either an addition or a XOR. */
5561 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5562 optimize_insn_for_speed_p ()) == 0)
5564 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5565 STORE_FLAG_VALUE, target_mode);
5567 tem = expand_binop (target_mode, add_optab, tem,
5568 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5571 && rtx_cost (trueval, XOR, 1,
5572 optimize_insn_for_speed_p ()) == 0)
5574 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5575 normalizep, target_mode);
5577 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5578 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5583 delete_insns_since (last);
5586 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5587 the constant zero. Reject all other comparisons at this point. Only
5588 do LE and GT if branches are expensive since they are expensive on
5589 2-operand machines. */
5591 if (op1 != const0_rtx
5592 || (code != EQ && code != NE
5593 && (BRANCH_COST (optimize_insn_for_speed_p (),
5594 false) <= 1 || (code != LE && code != GT))))
5597 /* Try to put the result of the comparison in the sign bit. Assume we can't
5598 do the necessary operation below. */
5602 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5603 the sign bit set. */
5607 /* This is destructive, so SUBTARGET can't be OP0. */
5608 if (rtx_equal_p (subtarget, op0))
5611 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5614 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5618 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5619 number of bits in the mode of OP0, minus one. */
5623 if (rtx_equal_p (subtarget, op0))
5626 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5627 GET_MODE_BITSIZE (mode) - 1,
5629 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5633 if (code == EQ || code == NE)
5635 /* For EQ or NE, one way to do the comparison is to apply an operation
5636 that converts the operand into a positive number if it is nonzero
5637 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5638 for NE we negate. This puts the result in the sign bit. Then we
5639 normalize with a shift, if needed.
5641 Two operations that can do the above actions are ABS and FFS, so try
5642 them. If that doesn't work, and MODE is smaller than a full word,
5643 we can use zero-extension to the wider mode (an unsigned conversion)
5644 as the operation. */
5646 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5647 that is compensated by the subsequent overflow when subtracting
5650 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5651 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5652 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5653 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5654 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5656 tem = convert_modes (word_mode, mode, op0, 1);
5663 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5666 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5669 /* If we couldn't do it that way, for NE we can "or" the two's complement
5670 of the value with itself. For EQ, we take the one's complement of
5671 that "or", which is an extra insn, so we only handle EQ if branches
5676 || BRANCH_COST (optimize_insn_for_speed_p (),
5679 if (rtx_equal_p (subtarget, op0))
5682 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5683 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5686 if (tem && code == EQ)
5687 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5691 if (tem && normalizep)
5692 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5693 GET_MODE_BITSIZE (mode) - 1,
5694 subtarget, normalizep == 1);
5700 else if (GET_MODE (tem) != target_mode)
5702 convert_move (target, tem, 0);
5705 else if (!subtarget)
5707 emit_move_insn (target, tem);
5712 delete_insns_since (last);
5717 /* Like emit_store_flag, but always succeeds. */
5720 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5721 enum machine_mode mode, int unsignedp, int normalizep)
5724 rtx trueval, falseval;
5726 /* First see if emit_store_flag can do the job. */
5727 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5732 target = gen_reg_rtx (word_mode);
5734 /* If this failed, we have to do this with set/compare/jump/set code.
5735 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5736 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5738 && GET_MODE_CLASS (mode) == MODE_INT
5741 && op1 == const0_rtx)
5743 label = gen_label_rtx ();
5744 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5745 mode, NULL_RTX, NULL_RTX, label, -1);
5746 emit_move_insn (target, trueval);
5752 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5753 target = gen_reg_rtx (GET_MODE (target));
5755 /* Jump in the right direction if the target cannot implement CODE
5756 but can jump on its reverse condition. */
5757 falseval = const0_rtx;
5758 if (! can_compare_p (code, mode, ccp_jump)
5759 && (! FLOAT_MODE_P (mode)
5760 || code == ORDERED || code == UNORDERED
5761 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5762 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5764 enum rtx_code rcode;
5765 if (FLOAT_MODE_P (mode))
5766 rcode = reverse_condition_maybe_unordered (code);
5768 rcode = reverse_condition (code);
5770 /* Canonicalize to UNORDERED for the libcall. */
5771 if (can_compare_p (rcode, mode, ccp_jump)
5772 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5775 trueval = const0_rtx;
5780 emit_move_insn (target, trueval);
5781 label = gen_label_rtx ();
5782 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5783 NULL_RTX, label, -1);
5785 emit_move_insn (target, falseval);
5791 /* Perform possibly multi-word comparison and conditional jump to LABEL
5792 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5793 now a thin wrapper around do_compare_rtx_and_jump. */
5796 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5799 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5800 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5801 NULL_RTX, NULL_RTX, label, -1);