gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  55
  56 /* Nonzero means divides or modulus operations are relatively cheap for
  57    powers of two, so don't use branches; emit the operation instead.
  58    Usually, this will mean that the MD file will emit non-branch
  59    sequences.  */
  60
  61 static int sdiv_pow2_cheap[NUM_MACHINE_MODES];
  62 static int smod_pow2_cheap[NUM_MACHINE_MODES];
  63
  64 #ifndef SLOW_UNALIGNED_ACCESS
  65 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  66 #endif
  67
  68 /* For compilers that support multiple targets with different word sizes,
  69    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  70    is the H8/300(H) compiler.  */
  71
  72 #ifndef MAX_BITS_PER_WORD
  73 #define MAX_BITS_PER_WORD BITS_PER_WORD
  74 #endif
  75
  76 /* Reduce conditional compilation elsewhere.  */
  77 #ifndef HAVE_insv
  78 #define HAVE_insv       0
  79 #define CODE_FOR_insv   CODE_FOR_nothing
  80 #define gen_insv(a,b,c,d) NULL_RTX
  81 #endif
  82 #ifndef HAVE_extv
  83 #define HAVE_extv       0
  84 #define CODE_FOR_extv   CODE_FOR_nothing
  85 #define gen_extv(a,b,c,d) NULL_RTX
  86 #endif
  87 #ifndef HAVE_extzv
  88 #define HAVE_extzv      0
  89 #define CODE_FOR_extzv  CODE_FOR_nothing
  90 #define gen_extzv(a,b,c,d) NULL_RTX
  91 #endif
  92
  93 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  94    shift count and some by mode.  */
  95 static int zero_cost;
  96 static int add_cost[NUM_MACHINE_MODES];
  97 static int neg_cost[NUM_MACHINE_MODES];
  98 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
  99 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 100 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 101 static int mul_cost[NUM_MACHINE_MODES];
 102 static int div_cost[NUM_MACHINE_MODES];
 103 static int mul_widen_cost[NUM_MACHINE_MODES];
 104 static int mul_highpart_cost[NUM_MACHINE_MODES];
 105
 106 void
 107 init_expmed (void)
 108 {
 109   rtx reg, shift_insn, shiftadd_insn, shiftsub_insn;
 110   rtx shift_pat, shiftadd_pat, shiftsub_pat;
 111   rtx pow2[MAX_BITS_PER_WORD];
 112   rtx cint[MAX_BITS_PER_WORD];
 113   int dummy;
 114   int m, n;
 115   enum machine_mode mode, wider_mode;
 116
 117   start_sequence ();
 118
 119   zero_cost = rtx_cost (const0_rtx, 0);
 120
 121   init_recog ();
 122
 123   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 124     {
 125       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 126       cint[m] = GEN_INT (m);
 127     }
 128
 129   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 130        mode != VOIDmode;
 131        mode = GET_MODE_WIDER_MODE (mode))
 132     {
 133       reg = gen_rtx_REG (mode, 10000);
 134       add_cost[mode] = rtx_cost (gen_rtx_PLUS (mode, reg, reg), SET);
 135       neg_cost[mode] = rtx_cost (gen_rtx_NEG (mode, reg), SET);
 136       div_cost[mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
 137       mul_cost[mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
 138
 139       sdiv_pow2_cheap[mode]
 140         = (rtx_cost (gen_rtx_DIV (mode, reg, GEN_INT (32)), SET)
 141            <= 2 * add_cost[mode]);
 142       smod_pow2_cheap[mode]
 143         = (rtx_cost (gen_rtx_MOD (mode, reg, GEN_INT (32)), SET)
 144            <= 2 * add_cost[mode]);
 145
 146       wider_mode = GET_MODE_WIDER_MODE (mode);
 147       if (wider_mode != VOIDmode)
 148         {
 149           mul_widen_cost[wider_mode]
 150             = rtx_cost (gen_rtx_MULT (wider_mode,
 151                                       gen_rtx_ZERO_EXTEND (wider_mode, reg),
 152                                       gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 153                         SET);
 154           mul_highpart_cost[mode]
 155             = rtx_cost (gen_rtx_TRUNCATE
 156                         (mode,
 157                          gen_rtx_LSHIFTRT (wider_mode,
 158                                            gen_rtx_MULT (wider_mode,
 159                                                          gen_rtx_ZERO_EXTEND
 160                                                          (wider_mode, reg),
 161                                                          gen_rtx_ZERO_EXTEND
 162                                                          (wider_mode, reg)),
 163                                            GEN_INT (GET_MODE_BITSIZE (mode)))),
 164                         SET);
 165         }
 166
 167         shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
 168                                              gen_rtx_ASHIFT (mode, reg,
 169                                                              const0_rtx)));
 170
 171         shiftadd_insn
 172           = emit_insn (gen_rtx_SET (VOIDmode, reg,
 173                                     gen_rtx_PLUS (mode,
 174                                                   gen_rtx_MULT (mode,
 175                                                                 reg,
 176                                                                 const0_rtx),
 177                                                   reg)));
 178
 179         shiftsub_insn
 180           = emit_insn (gen_rtx_SET (VOIDmode, reg,
 181                                     gen_rtx_MINUS (mode,
 182                                                    gen_rtx_MULT (mode,
 183                                                                  reg,
 184                                                                  const0_rtx),
 185                                                    reg)));
 186
 187         shift_pat = PATTERN (shift_insn);
 188         shiftadd_pat = PATTERN (shiftadd_insn);
 189         shiftsub_pat = PATTERN (shiftsub_insn);
 190
 191         shift_cost[mode][0] = 0;
 192         shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 193
 194         n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 195         for (m = 1; m < n; m++)
 196           {
 197             shift_cost[mode][m] = 32000;
 198             XEXP (SET_SRC (shift_pat), 1) = cint[m];
 199             if (recog (shift_pat, shift_insn, &dummy) >= 0)
 200               shift_cost[mode][m] = rtx_cost (SET_SRC (shift_pat), SET);
 201
 202             shiftadd_cost[mode][m] = 32000;
 203             XEXP (XEXP (SET_SRC (shiftadd_pat), 0), 1) = pow2[m];
 204             if (recog (shiftadd_pat, shiftadd_insn, &dummy) >= 0)
 205               shiftadd_cost[mode][m] = rtx_cost (SET_SRC (shiftadd_pat), SET);
 206
 207             shiftsub_cost[mode][m] = 32000;
 208             XEXP (XEXP (SET_SRC (shiftsub_pat), 0), 1) = pow2[m];
 209             if (recog (shiftsub_pat, shiftsub_insn, &dummy) >= 0)
 210               shiftsub_cost[mode][m] = rtx_cost (SET_SRC (shiftsub_pat), SET);
 211           }
 212     }
 213
 214   end_sequence ();
 215 }
 216
 217 /* Return an rtx representing minus the value of X.
 218    MODE is the intended mode of the result,
 219    useful if X is a CONST_INT.  */
 220
 221 rtx
 222 negate_rtx (enum machine_mode mode, rtx x)
 223 {
 224   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 225
 226   if (result == 0)
 227     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 228
 229   return result;
 230 }
 231
 232 /* Report on the availability of insv/extv/extzv and the desired mode
 233    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 234    is false; else the mode of the specified operand.  If OPNO is -1,
 235    all the caller cares about is whether the insn is available.  */
 236 enum machine_mode
 237 mode_for_extraction (enum extraction_pattern pattern, int opno)
 238 {
 239   const struct insn_data *data;
 240
 241   switch (pattern)
 242     {
 243     case EP_insv:
 244       if (HAVE_insv)
 245         {
 246           data = &insn_data[CODE_FOR_insv];
 247           break;
 248         }
 249       return MAX_MACHINE_MODE;
 250
 251     case EP_extv:
 252       if (HAVE_extv)
 253         {
 254           data = &insn_data[CODE_FOR_extv];
 255           break;
 256         }
 257       return MAX_MACHINE_MODE;
 258
 259     case EP_extzv:
 260       if (HAVE_extzv)
 261         {
 262           data = &insn_data[CODE_FOR_extzv];
 263           break;
 264         }
 265       return MAX_MACHINE_MODE;
 266
 267     default:
 268       abort ();
 269     }
 270
 271   if (opno == -1)
 272     return VOIDmode;
 273
 274   /* Everyone who uses this function used to follow it with
 275      if (result == VOIDmode) result = word_mode; */
 276   if (data->operand[opno].mode == VOIDmode)
 277     return word_mode;
 278   return data->operand[opno].mode;
 279 }
 280
 281 \f
 282 /* Generate code to store value from rtx VALUE
 283    into a bit-field within structure STR_RTX
 284    containing BITSIZE bits starting at bit BITNUM.
 285    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 286    ALIGN is the alignment that STR_RTX is known to have.
 287    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 288
 289 /* ??? Note that there are two different ideas here for how
 290    to determine the size to count bits within, for a register.
 291    One is BITS_PER_WORD, and the other is the size of operand 3
 292    of the insv pattern.
 293
 294    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 295    else, we use the mode of operand 3.  */
 296
 297 rtx
 298 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 299                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 300                  rtx value, HOST_WIDE_INT total_size)
 301 {
 302   unsigned int unit
 303     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 304   unsigned HOST_WIDE_INT offset = bitnum / unit;
 305   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 306   rtx op0 = str_rtx;
 307   int byte_offset;
 308
 309   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 310
 311   /* Discount the part of the structure before the desired byte.
 312      We need to know how many bytes are safe to reference after it.  */
 313   if (total_size >= 0)
 314     total_size -= (bitpos / BIGGEST_ALIGNMENT
 315                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 316
 317   while (GET_CODE (op0) == SUBREG)
 318     {
 319       /* The following line once was done only if WORDS_BIG_ENDIAN,
 320          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 321          meaningful at a much higher level; when structures are copied
 322          between memory and regs, the higher-numbered regs
 323          always get higher addresses.  */
 324       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 325       /* We used to adjust BITPOS here, but now we do the whole adjustment
 326          right after the loop.  */
 327       op0 = SUBREG_REG (op0);
 328     }
 329
 330   value = protect_from_queue (value, 0);
 331
 332   /* Use vec_extract patterns for extracting parts of vectors whenever
 333      available.  */
 334   if (VECTOR_MODE_P (GET_MODE (op0))
 335       && !MEM_P (op0)
 336       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 337           != CODE_FOR_nothing)
 338       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 339       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 340       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 341     {
 342       enum machine_mode outermode = GET_MODE (op0);
 343       enum machine_mode innermode = GET_MODE_INNER (outermode);
 344       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 345       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 346       rtx rtxpos = GEN_INT (pos);
 347       rtx src = value;
 348       rtx dest = op0;
 349       rtx pat, seq;
 350       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 351       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 352       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 353
 354       start_sequence ();
 355
 356       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 357         src = copy_to_mode_reg (mode1, src);
 358
 359       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 360         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 361
 362       /* We could handle this, but we should always be called with a pseudo
 363          for our targets and all insns should take them as outputs.  */
 364       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
 365           || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
 366           || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 367         abort ();
 368       pat = GEN_FCN (icode) (dest, src, rtxpos);
 369       seq = get_insns ();
 370       end_sequence ();
 371       if (pat)
 372         {
 373           emit_insn (seq);
 374           emit_insn (pat);
 375           return dest;
 376         }
 377     }
 378
 379   if (flag_force_mem)
 380     {
 381       int old_generating_concat_p = generating_concat_p;
 382       generating_concat_p = 0;
 383       value = force_not_mem (value);
 384       generating_concat_p = old_generating_concat_p;
 385     }
 386
 387   /* If the target is a register, overwriting the entire object, or storing
 388      a full-word or multi-word field can be done with just a SUBREG.
 389
 390      If the target is memory, storing any naturally aligned field can be
 391      done with a simple store.  For targets that support fast unaligned
 392      memory, any naturally sized, unit aligned field can be done directly.  */
 393
 394   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 395                 + (offset * UNITS_PER_WORD);
 396
 397   if (bitpos == 0
 398       && bitsize == GET_MODE_BITSIZE (fieldmode)
 399       && (!MEM_P (op0)
 400           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 401              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 402              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 403           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 404              || (offset * BITS_PER_UNIT % bitsize == 0
 405                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 406     {
 407       if (GET_MODE (op0) != fieldmode)
 408         {
 409           if (GET_CODE (op0) == SUBREG)
 410             {
 411               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 412                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 413                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 414                 op0 = SUBREG_REG (op0);
 415               else
 416                 /* Else we've got some float mode source being extracted into
 417                    a different float mode destination -- this combination of
 418                    subregs results in Severe Tire Damage.  */
 419                 abort ();
 420             }
 421           if (REG_P (op0))
 422             op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 423           else
 424             op0 = adjust_address (op0, fieldmode, offset);
 425         }
 426       emit_move_insn (op0, value);
 427       return value;
 428     }
 429
 430   /* Make sure we are playing with integral modes.  Pun with subregs
 431      if we aren't.  This must come after the entire register case above,
 432      since that case is valid for any mode.  The following cases are only
 433      valid for integral modes.  */
 434   {
 435     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 436     if (imode != GET_MODE (op0))
 437       {
 438         if (MEM_P (op0))
 439           op0 = adjust_address (op0, imode, 0);
 440         else if (imode != BLKmode)
 441           op0 = gen_lowpart (imode, op0);
 442         else
 443           abort ();
 444       }
 445   }
 446
 447   /* We may be accessing data outside the field, which means
 448      we can alias adjacent data.  */
 449   if (MEM_P (op0))
 450     {
 451       op0 = shallow_copy_rtx (op0);
 452       set_mem_alias_set (op0, 0);
 453       set_mem_expr (op0, 0);
 454     }
 455
 456   /* If OP0 is a register, BITPOS must count within a word.
 457      But as we have it, it counts within whatever size OP0 now has.
 458      On a bigendian machine, these are not the same, so convert.  */
 459   if (BYTES_BIG_ENDIAN
 460       && !MEM_P (op0)
 461       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 462     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 463
 464   /* Storing an lsb-aligned field in a register
 465      can be done with a movestrict instruction.  */
 466
 467   if (!MEM_P (op0)
 468       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 469       && bitsize == GET_MODE_BITSIZE (fieldmode)
 470       && (movstrict_optab->handlers[fieldmode].insn_code
 471           != CODE_FOR_nothing))
 472     {
 473       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 474
 475       /* Get appropriate low part of the value being stored.  */
 476       if (GET_CODE (value) == CONST_INT || REG_P (value))
 477         value = gen_lowpart (fieldmode, value);
 478       else if (!(GET_CODE (value) == SYMBOL_REF
 479                  || GET_CODE (value) == LABEL_REF
 480                  || GET_CODE (value) == CONST))
 481         value = convert_to_mode (fieldmode, value, 0);
 482
 483       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 484         value = copy_to_mode_reg (fieldmode, value);
 485
 486       if (GET_CODE (op0) == SUBREG)
 487         {
 488           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 489               || GET_MODE_CLASS (fieldmode) == MODE_INT
 490               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 491             op0 = SUBREG_REG (op0);
 492           else
 493             /* Else we've got some float mode source being extracted into
 494                a different float mode destination -- this combination of
 495                subregs results in Severe Tire Damage.  */
 496             abort ();
 497         }
 498
 499       emit_insn (GEN_FCN (icode)
 500                  (gen_rtx_SUBREG (fieldmode, op0,
 501                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 502                                   + (offset * UNITS_PER_WORD)),
 503                                   value));
 504
 505       return value;
 506     }
 507
 508   /* Handle fields bigger than a word.  */
 509
 510   if (bitsize > BITS_PER_WORD)
 511     {
 512       /* Here we transfer the words of the field
 513          in the order least significant first.
 514          This is because the most significant word is the one which may
 515          be less than full.
 516          However, only do that if the value is not BLKmode.  */
 517
 518       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 519       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 520       unsigned int i;
 521
 522       /* This is the mode we must force value to, so that there will be enough
 523          subwords to extract.  Note that fieldmode will often (always?) be
 524          VOIDmode, because that is what store_field uses to indicate that this
 525          is a bit field, but passing VOIDmode to operand_subword_force will
 526          result in an abort.  */
 527       fieldmode = GET_MODE (value);
 528       if (fieldmode == VOIDmode)
 529         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 530
 531       for (i = 0; i < nwords; i++)
 532         {
 533           /* If I is 0, use the low-order word in both field and target;
 534              if I is 1, use the next to lowest word; and so on.  */
 535           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 536           unsigned int bit_offset = (backwards
 537                                      ? MAX ((int) bitsize - ((int) i + 1)
 538                                             * BITS_PER_WORD,
 539                                             0)
 540                                      : (int) i * BITS_PER_WORD);
 541
 542           store_bit_field (op0, MIN (BITS_PER_WORD,
 543                                      bitsize - i * BITS_PER_WORD),
 544                            bitnum + bit_offset, word_mode,
 545                            operand_subword_force (value, wordnum, fieldmode),
 546                            total_size);
 547         }
 548       return value;
 549     }
 550
 551   /* From here on we can assume that the field to be stored in is
 552      a full-word (whatever type that is), since it is shorter than a word.  */
 553
 554   /* OFFSET is the number of words or bytes (UNIT says which)
 555      from STR_RTX to the first word or byte containing part of the field.  */
 556
 557   if (!MEM_P (op0))
 558     {
 559       if (offset != 0
 560           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 561         {
 562           if (!REG_P (op0))
 563             {
 564               /* Since this is a destination (lvalue), we can't copy it to a
 565                  pseudo.  We can trivially remove a SUBREG that does not
 566                  change the size of the operand.  Such a SUBREG may have been
 567                  added above.  Otherwise, abort.  */
 568               if (GET_CODE (op0) == SUBREG
 569                   && (GET_MODE_SIZE (GET_MODE (op0))
 570                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 571                 op0 = SUBREG_REG (op0);
 572               else
 573                 abort ();
 574             }
 575           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 576                                 op0, (offset * UNITS_PER_WORD));
 577         }
 578       offset = 0;
 579     }
 580   else
 581     op0 = protect_from_queue (op0, 1);
 582
 583   /* If VALUE is a floating-point mode, access it as an integer of the
 584      corresponding size.  This can occur on a machine with 64 bit registers
 585      that uses SFmode for float.  This can also occur for unaligned float
 586      structure fields.  */
 587   if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 588       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 589     value = gen_lowpart ((GET_MODE (value) == VOIDmode
 590                           ? word_mode : int_mode_for_mode (GET_MODE (value))),
 591                          value);
 592
 593   /* Now OFFSET is nonzero only if OP0 is memory
 594      and is therefore always measured in bytes.  */
 595
 596   if (HAVE_insv
 597       && GET_MODE (value) != BLKmode
 598       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 599       /* Ensure insv's size is wide enough for this field.  */
 600       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 601       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 602             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 603     {
 604       int xbitpos = bitpos;
 605       rtx value1;
 606       rtx xop0 = op0;
 607       rtx last = get_last_insn ();
 608       rtx pat;
 609       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 610       int save_volatile_ok = volatile_ok;
 611
 612       volatile_ok = 1;
 613
 614       /* If this machine's insv can only insert into a register, copy OP0
 615          into a register and save it back later.  */
 616       /* This used to check flag_force_mem, but that was a serious
 617          de-optimization now that flag_force_mem is enabled by -O2.  */
 618       if (MEM_P (op0)
 619           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 620                 (op0, VOIDmode)))
 621         {
 622           rtx tempreg;
 623           enum machine_mode bestmode;
 624
 625           /* Get the mode to use for inserting into this field.  If OP0 is
 626              BLKmode, get the smallest mode consistent with the alignment. If
 627              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 628              mode. Otherwise, use the smallest mode containing the field.  */
 629
 630           if (GET_MODE (op0) == BLKmode
 631               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 632             bestmode
 633               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 634                                MEM_VOLATILE_P (op0));
 635           else
 636             bestmode = GET_MODE (op0);
 637
 638           if (bestmode == VOIDmode
 639               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 640                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 641             goto insv_loses;
 642
 643           /* Adjust address to point to the containing unit of that mode.
 644              Compute offset as multiple of this unit, counting in bytes.  */
 645           unit = GET_MODE_BITSIZE (bestmode);
 646           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 647           bitpos = bitnum % unit;
 648           op0 = adjust_address (op0, bestmode,  offset);
 649
 650           /* Fetch that unit, store the bitfield in it, then store
 651              the unit.  */
 652           tempreg = copy_to_reg (op0);
 653           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 654                            total_size);
 655           emit_move_insn (op0, tempreg);
 656           return value;
 657         }
 658       volatile_ok = save_volatile_ok;
 659
 660       /* Add OFFSET into OP0's address.  */
 661       if (MEM_P (xop0))
 662         xop0 = adjust_address (xop0, byte_mode, offset);
 663
 664       /* If xop0 is a register, we need it in MAXMODE
 665          to make it acceptable to the format of insv.  */
 666       if (GET_CODE (xop0) == SUBREG)
 667         /* We can't just change the mode, because this might clobber op0,
 668            and we will need the original value of op0 if insv fails.  */
 669         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 670       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 671         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 672
 673       /* On big-endian machines, we count bits from the most significant.
 674          If the bit field insn does not, we must invert.  */
 675
 676       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 677         xbitpos = unit - bitsize - xbitpos;
 678
 679       /* We have been counting XBITPOS within UNIT.
 680          Count instead within the size of the register.  */
 681       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 682         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 683
 684       unit = GET_MODE_BITSIZE (maxmode);
 685
 686       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 687       value1 = value;
 688       if (GET_MODE (value) != maxmode)
 689         {
 690           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 691             {
 692               /* Optimization: Don't bother really extending VALUE
 693                  if it has all the bits we will actually use.  However,
 694                  if we must narrow it, be sure we do it correctly.  */
 695
 696               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 697                 {
 698                   rtx tmp;
 699
 700                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 701                   if (! tmp)
 702                     tmp = simplify_gen_subreg (maxmode,
 703                                                force_reg (GET_MODE (value),
 704                                                           value1),
 705                                                GET_MODE (value), 0);
 706                   value1 = tmp;
 707                 }
 708               else
 709                 value1 = gen_lowpart (maxmode, value1);
 710             }
 711           else if (GET_CODE (value) == CONST_INT)
 712             value1 = gen_int_mode (INTVAL (value), maxmode);
 713           else if (!CONSTANT_P (value))
 714             /* Parse phase is supposed to make VALUE's data type
 715                match that of the component reference, which is a type
 716                at least as wide as the field; so VALUE should have
 717                a mode that corresponds to that type.  */
 718             abort ();
 719         }
 720
 721       /* If this machine's insv insists on a register,
 722          get VALUE1 into a register.  */
 723       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 724              (value1, maxmode)))
 725         value1 = force_reg (maxmode, value1);
 726
 727       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 728       if (pat)
 729         emit_insn (pat);
 730       else
 731         {
 732           delete_insns_since (last);
 733           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 734         }
 735     }
 736   else
 737     insv_loses:
 738     /* Insv is not available; store using shifts and boolean ops.  */
 739     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 740   return value;
 741 }
 742 \f
 743 /* Use shifts and boolean operations to store VALUE
 744    into a bit field of width BITSIZE
 745    in a memory location specified by OP0 except offset by OFFSET bytes.
 746      (OFFSET must be 0 if OP0 is a register.)
 747    The field starts at position BITPOS within the byte.
 748     (If OP0 is a register, it may be a full word or a narrower mode,
 749      but BITPOS still counts within a full word,
 750      which is significant on bigendian machines.)
 751
 752    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 753
 754 static void
 755 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 756                        unsigned HOST_WIDE_INT bitsize,
 757                        unsigned HOST_WIDE_INT bitpos, rtx value)
 758 {
 759   enum machine_mode mode;
 760   unsigned int total_bits = BITS_PER_WORD;
 761   rtx subtarget, temp;
 762   int all_zero = 0;
 763   int all_one = 0;
 764
 765   /* There is a case not handled here:
 766      a structure with a known alignment of just a halfword
 767      and a field split across two aligned halfwords within the structure.
 768      Or likewise a structure with a known alignment of just a byte
 769      and a field split across two bytes.
 770      Such cases are not supposed to be able to occur.  */
 771
 772   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 773     {
 774       if (offset != 0)
 775         abort ();
 776       /* Special treatment for a bit field split across two registers.  */
 777       if (bitsize + bitpos > BITS_PER_WORD)
 778         {
 779           store_split_bit_field (op0, bitsize, bitpos, value);
 780           return;
 781         }
 782     }
 783   else
 784     {
 785       /* Get the proper mode to use for this field.  We want a mode that
 786          includes the entire field.  If such a mode would be larger than
 787          a word, we won't be doing the extraction the normal way.
 788          We don't want a mode bigger than the destination.  */
 789
 790       mode = GET_MODE (op0);
 791       if (GET_MODE_BITSIZE (mode) == 0
 792           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 793         mode = word_mode;
 794       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 795                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 796
 797       if (mode == VOIDmode)
 798         {
 799           /* The only way this should occur is if the field spans word
 800              boundaries.  */
 801           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 802                                  value);
 803           return;
 804         }
 805
 806       total_bits = GET_MODE_BITSIZE (mode);
 807
 808       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 809          be in the range 0 to total_bits-1, and put any excess bytes in
 810          OFFSET.  */
 811       if (bitpos >= total_bits)
 812         {
 813           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 814           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 815                      * BITS_PER_UNIT);
 816         }
 817
 818       /* Get ref to an aligned byte, halfword, or word containing the field.
 819          Adjust BITPOS to be position within a word,
 820          and OFFSET to be the offset of that word.
 821          Then alter OP0 to refer to that word.  */
 822       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 823       offset -= (offset % (total_bits / BITS_PER_UNIT));
 824       op0 = adjust_address (op0, mode, offset);
 825     }
 826
 827   mode = GET_MODE (op0);
 828
 829   /* Now MODE is either some integral mode for a MEM as OP0,
 830      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 831      The bit field is contained entirely within OP0.
 832      BITPOS is the starting bit number within OP0.
 833      (OP0's mode may actually be narrower than MODE.)  */
 834
 835   if (BYTES_BIG_ENDIAN)
 836       /* BITPOS is the distance between our msb
 837          and that of the containing datum.
 838          Convert it to the distance from the lsb.  */
 839       bitpos = total_bits - bitsize - bitpos;
 840
 841   /* Now BITPOS is always the distance between our lsb
 842      and that of OP0.  */
 843
 844   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 845      we must first convert its mode to MODE.  */
 846
 847   if (GET_CODE (value) == CONST_INT)
 848     {
 849       HOST_WIDE_INT v = INTVAL (value);
 850
 851       if (bitsize < HOST_BITS_PER_WIDE_INT)
 852         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 853
 854       if (v == 0)
 855         all_zero = 1;
 856       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 857                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 858                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 859         all_one = 1;
 860
 861       value = lshift_value (mode, value, bitpos, bitsize);
 862     }
 863   else
 864     {
 865       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 866                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 867
 868       if (GET_MODE (value) != mode)
 869         {
 870           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 871               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 872             value = gen_lowpart (mode, value);
 873           else
 874             value = convert_to_mode (mode, value, 1);
 875         }
 876
 877       if (must_and)
 878         value = expand_binop (mode, and_optab, value,
 879                               mask_rtx (mode, 0, bitsize, 0),
 880                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 881       if (bitpos > 0)
 882         value = expand_shift (LSHIFT_EXPR, mode, value,
 883                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 884     }
 885
 886   /* Now clear the chosen bits in OP0,
 887      except that if VALUE is -1 we need not bother.  */
 888
 889   subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0;
 890
 891   if (! all_one)
 892     {
 893       temp = expand_binop (mode, and_optab, op0,
 894                            mask_rtx (mode, bitpos, bitsize, 1),
 895                            subtarget, 1, OPTAB_LIB_WIDEN);
 896       subtarget = temp;
 897     }
 898   else
 899     temp = op0;
 900
 901   /* Now logical-or VALUE into OP0, unless it is zero.  */
 902
 903   if (! all_zero)
 904     temp = expand_binop (mode, ior_optab, temp, value,
 905                          subtarget, 1, OPTAB_LIB_WIDEN);
 906   if (op0 != temp)
 907     emit_move_insn (op0, temp);
 908 }
 909 \f
 910 /* Store a bit field that is split across multiple accessible memory objects.
 911
 912    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 913    BITSIZE is the field width; BITPOS the position of its first bit
 914    (within the word).
 915    VALUE is the value to store.
 916
 917    This does not yet handle fields wider than BITS_PER_WORD.  */
 918
 919 static void
 920 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 921                        unsigned HOST_WIDE_INT bitpos, rtx value)
 922 {
 923   unsigned int unit;
 924   unsigned int bitsdone = 0;
 925
 926   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 927      much at a time.  */
 928   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 929     unit = BITS_PER_WORD;
 930   else
 931     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 932
 933   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 934      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 935      that VALUE might be a floating-point constant.  */
 936   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 937     {
 938       rtx word = gen_lowpart_common (word_mode, value);
 939
 940       if (word && (value != word))
 941         value = word;
 942       else
 943         value = gen_lowpart_common (word_mode,
 944                                     force_reg (GET_MODE (value) != VOIDmode
 945                                                ? GET_MODE (value)
 946                                                : word_mode, value));
 947     }
 948
 949   while (bitsdone < bitsize)
 950     {
 951       unsigned HOST_WIDE_INT thissize;
 952       rtx part, word;
 953       unsigned HOST_WIDE_INT thispos;
 954       unsigned HOST_WIDE_INT offset;
 955
 956       offset = (bitpos + bitsdone) / unit;
 957       thispos = (bitpos + bitsdone) % unit;
 958
 959       /* THISSIZE must not overrun a word boundary.  Otherwise,
 960          store_fixed_bit_field will call us again, and we will mutually
 961          recurse forever.  */
 962       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 963       thissize = MIN (thissize, unit - thispos);
 964
 965       if (BYTES_BIG_ENDIAN)
 966         {
 967           int total_bits;
 968
 969           /* We must do an endian conversion exactly the same way as it is
 970              done in extract_bit_field, so that the two calls to
 971              extract_fixed_bit_field will have comparable arguments.  */
 972           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
 973             total_bits = BITS_PER_WORD;
 974           else
 975             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 976
 977           /* Fetch successively less significant portions.  */
 978           if (GET_CODE (value) == CONST_INT)
 979             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 980                              >> (bitsize - bitsdone - thissize))
 981                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 982           else
 983             /* The args are chosen so that the last part includes the
 984                lsb.  Give extract_bit_field the value it needs (with
 985                endianness compensation) to fetch the piece we want.  */
 986             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 987                                             total_bits - bitsize + bitsdone,
 988                                             NULL_RTX, 1);
 989         }
 990       else
 991         {
 992           /* Fetch successively more significant portions.  */
 993           if (GET_CODE (value) == CONST_INT)
 994             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 995                              >> bitsdone)
 996                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 997           else
 998             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 999                                             bitsdone, NULL_RTX, 1);
1000         }
1001
1002       /* If OP0 is a register, then handle OFFSET here.
1003
1004          When handling multiword bitfields, extract_bit_field may pass
1005          down a word_mode SUBREG of a larger REG for a bitfield that actually
1006          crosses a word boundary.  Thus, for a SUBREG, we must find
1007          the current word starting from the base register.  */
1008       if (GET_CODE (op0) == SUBREG)
1009         {
1010           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1011           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1012                                         GET_MODE (SUBREG_REG (op0)));
1013           offset = 0;
1014         }
1015       else if (REG_P (op0))
1016         {
1017           word = operand_subword_force (op0, offset, GET_MODE (op0));
1018           offset = 0;
1019         }
1020       else
1021         word = op0;
1022
1023       /* OFFSET is in UNITs, and UNIT is in bits.
1024          store_fixed_bit_field wants offset in bytes.  */
1025       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1026                              thispos, part);
1027       bitsdone += thissize;
1028     }
1029 }
1030 \f
1031 /* Generate code to extract a byte-field from STR_RTX
1032    containing BITSIZE bits, starting at BITNUM,
1033    and put it in TARGET if possible (if TARGET is nonzero).
1034    Regardless of TARGET, we return the rtx for where the value is placed.
1035    It may be a QUEUED.
1036
1037    STR_RTX is the structure containing the byte (a REG or MEM).
1038    UNSIGNEDP is nonzero if this is an unsigned bit field.
1039    MODE is the natural mode of the field value once extracted.
1040    TMODE is the mode the caller would like the value to have;
1041    but the value may be returned with type MODE instead.
1042
1043    TOTAL_SIZE is the size in bytes of the containing structure,
1044    or -1 if varying.
1045
1046    If a TARGET is specified and we can store in it at no extra cost,
1047    we do so, and return TARGET.
1048    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1049    if they are equally easy.  */
1050
1051 rtx
1052 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1053                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1054                    enum machine_mode mode, enum machine_mode tmode,
1055                    HOST_WIDE_INT total_size)
1056 {
1057   unsigned int unit
1058     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1059   unsigned HOST_WIDE_INT offset = bitnum / unit;
1060   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1061   rtx op0 = str_rtx;
1062   rtx spec_target = target;
1063   rtx spec_target_subreg = 0;
1064   enum machine_mode int_mode;
1065   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1066   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1067   enum machine_mode mode1;
1068   int byte_offset;
1069
1070   /* Discount the part of the structure before the desired byte.
1071      We need to know how many bytes are safe to reference after it.  */
1072   if (total_size >= 0)
1073     total_size -= (bitpos / BIGGEST_ALIGNMENT
1074                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
1075
1076   if (tmode == VOIDmode)
1077     tmode = mode;
1078
1079   while (GET_CODE (op0) == SUBREG)
1080     {
1081       bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1082       if (bitpos > unit)
1083         {
1084           offset += (bitpos / unit);
1085           bitpos %= unit;
1086         }
1087       op0 = SUBREG_REG (op0);
1088     }
1089
1090   if (REG_P (op0)
1091       && mode == GET_MODE (op0)
1092       && bitnum == 0
1093       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1094     {
1095       /* We're trying to extract a full register from itself.  */
1096       return op0;
1097     }
1098
1099   /* Use vec_extract patterns for extracting parts of vectors whenever
1100      available.  */
1101   if (VECTOR_MODE_P (GET_MODE (op0))
1102       && !MEM_P (op0)
1103       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1104           != CODE_FOR_nothing)
1105       && ((bitsize + bitnum) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1106           == bitsize / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1107     {
1108       enum machine_mode outermode = GET_MODE (op0);
1109       enum machine_mode innermode = GET_MODE_INNER (outermode);
1110       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1111       int pos = bitnum / GET_MODE_BITSIZE (innermode);
1112       rtx rtxpos = GEN_INT (pos);
1113       rtx src = op0;
1114       rtx dest = NULL, pat, seq;
1115       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1116       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1117       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1118
1119       if (innermode == tmode || innermode == mode)
1120         dest = target;
1121
1122       if (!dest)
1123         dest = gen_reg_rtx (innermode);
1124
1125       start_sequence ();
1126
1127       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1128         dest = copy_to_mode_reg (mode0, dest);
1129
1130       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1131         src = copy_to_mode_reg (mode1, src);
1132
1133       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1134         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1135
1136       /* We could handle this, but we should always be called with a pseudo
1137          for our targets and all insns should take them as outputs.  */
1138       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
1139           || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
1140           || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1141         abort ();
1142
1143       pat = GEN_FCN (icode) (dest, src, rtxpos);
1144       seq = get_insns ();
1145       end_sequence ();
1146       if (pat)
1147         {
1148           emit_insn (seq);
1149           emit_insn (pat);
1150           return dest;
1151         }
1152     }
1153
1154   /* Make sure we are playing with integral modes.  Pun with subregs
1155      if we aren't.  */
1156   {
1157     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1158     if (imode != GET_MODE (op0))
1159       {
1160         if (MEM_P (op0))
1161           op0 = adjust_address (op0, imode, 0);
1162         else if (imode != BLKmode)
1163           op0 = gen_lowpart (imode, op0);
1164         else
1165           abort ();
1166       }
1167   }
1168
1169   /* We may be accessing data outside the field, which means
1170      we can alias adjacent data.  */
1171   if (MEM_P (op0))
1172     {
1173       op0 = shallow_copy_rtx (op0);
1174       set_mem_alias_set (op0, 0);
1175       set_mem_expr (op0, 0);
1176     }
1177
1178   /* Extraction of a full-word or multi-word value from a structure
1179      in a register or aligned memory can be done with just a SUBREG.
1180      A subword value in the least significant part of a register
1181      can also be extracted with a SUBREG.  For this, we need the
1182      byte offset of the value in op0.  */
1183
1184   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1185
1186   /* If OP0 is a register, BITPOS must count within a word.
1187      But as we have it, it counts within whatever size OP0 now has.
1188      On a bigendian machine, these are not the same, so convert.  */
1189   if (BYTES_BIG_ENDIAN
1190       && !MEM_P (op0)
1191       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1192     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1193
1194   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1195      If that's wrong, the solution is to test for it and set TARGET to 0
1196      if needed.  */
1197
1198   /* Only scalar integer modes can be converted via subregs.  There is an
1199      additional problem for FP modes here in that they can have a precision
1200      which is different from the size.  mode_for_size uses precision, but
1201      we want a mode based on the size, so we must avoid calling it for FP
1202      modes.  */
1203   mode1  = (SCALAR_INT_MODE_P (tmode)
1204             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1205             : mode);
1206
1207   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1208         && bitpos % BITS_PER_WORD == 0)
1209        || (mode1 != BLKmode
1210            /* ??? The big endian test here is wrong.  This is correct
1211               if the value is in a register, and if mode_for_size is not
1212               the same mode as op0.  This causes us to get unnecessarily
1213               inefficient code from the Thumb port when -mbig-endian.  */
1214            && (BYTES_BIG_ENDIAN
1215                ? bitpos + bitsize == BITS_PER_WORD
1216                : bitpos == 0)))
1217       && ((!MEM_P (op0)
1218            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1219                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1220            && GET_MODE_SIZE (mode1) != 0
1221            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1222           || (MEM_P (op0)
1223               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1224                   || (offset * BITS_PER_UNIT % bitsize == 0
1225                       && MEM_ALIGN (op0) % bitsize == 0)))))
1226     {
1227       if (mode1 != GET_MODE (op0))
1228         {
1229           if (GET_CODE (op0) == SUBREG)
1230             {
1231               if (GET_MODE (SUBREG_REG (op0)) == mode1
1232                   || GET_MODE_CLASS (mode1) == MODE_INT
1233                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1234                 op0 = SUBREG_REG (op0);
1235               else
1236                 /* Else we've got some float mode source being extracted into
1237                    a different float mode destination -- this combination of
1238                    subregs results in Severe Tire Damage.  */
1239                 goto no_subreg_mode_swap;
1240             }
1241           if (REG_P (op0))
1242             op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1243           else
1244             op0 = adjust_address (op0, mode1, offset);
1245         }
1246       if (mode1 != mode)
1247         return convert_to_mode (tmode, op0, unsignedp);
1248       return op0;
1249     }
1250  no_subreg_mode_swap:
1251
1252   /* Handle fields bigger than a word.  */
1253
1254   if (bitsize > BITS_PER_WORD)
1255     {
1256       /* Here we transfer the words of the field
1257          in the order least significant first.
1258          This is because the most significant word is the one which may
1259          be less than full.  */
1260
1261       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1262       unsigned int i;
1263
1264       if (target == 0 || !REG_P (target))
1265         target = gen_reg_rtx (mode);
1266
1267       /* Indicate for flow that the entire target reg is being set.  */
1268       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1269
1270       for (i = 0; i < nwords; i++)
1271         {
1272           /* If I is 0, use the low-order word in both field and target;
1273              if I is 1, use the next to lowest word; and so on.  */
1274           /* Word number in TARGET to use.  */
1275           unsigned int wordnum
1276             = (WORDS_BIG_ENDIAN
1277                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1278                : i);
1279           /* Offset from start of field in OP0.  */
1280           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1281                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1282                                                 * (int) BITS_PER_WORD))
1283                                      : (int) i * BITS_PER_WORD);
1284           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1285           rtx result_part
1286             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1287                                            bitsize - i * BITS_PER_WORD),
1288                                  bitnum + bit_offset, 1, target_part, mode,
1289                                  word_mode, total_size);
1290
1291           if (target_part == 0)
1292             abort ();
1293
1294           if (result_part != target_part)
1295             emit_move_insn (target_part, result_part);
1296         }
1297
1298       if (unsignedp)
1299         {
1300           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1301              need to be zero'd out.  */
1302           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1303             {
1304               unsigned int i, total_words;
1305
1306               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1307               for (i = nwords; i < total_words; i++)
1308                 emit_move_insn
1309                   (operand_subword (target,
1310                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1311                                     1, VOIDmode),
1312                    const0_rtx);
1313             }
1314           return target;
1315         }
1316
1317       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1318       target = expand_shift (LSHIFT_EXPR, mode, target,
1319                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1320                              NULL_RTX, 0);
1321       return expand_shift (RSHIFT_EXPR, mode, target,
1322                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1323                            NULL_RTX, 0);
1324     }
1325
1326   /* From here on we know the desired field is smaller than a word.  */
1327
1328   /* Check if there is a correspondingly-sized integer field, so we can
1329      safely extract it as one size of integer, if necessary; then
1330      truncate or extend to the size that is wanted; then use SUBREGs or
1331      convert_to_mode to get one of the modes we really wanted.  */
1332
1333   int_mode = int_mode_for_mode (tmode);
1334   if (int_mode == BLKmode)
1335     int_mode = int_mode_for_mode (mode);
1336   if (int_mode == BLKmode)
1337     abort ();    /* Should probably push op0 out to memory and then
1338                     do a load.  */
1339
1340   /* OFFSET is the number of words or bytes (UNIT says which)
1341      from STR_RTX to the first word or byte containing part of the field.  */
1342
1343   if (!MEM_P (op0))
1344     {
1345       if (offset != 0
1346           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1347         {
1348           if (!REG_P (op0))
1349             op0 = copy_to_reg (op0);
1350           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1351                                 op0, (offset * UNITS_PER_WORD));
1352         }
1353       offset = 0;
1354     }
1355   else
1356     op0 = protect_from_queue (str_rtx, 1);
1357
1358   /* Now OFFSET is nonzero only for memory operands.  */
1359
1360   if (unsignedp)
1361     {
1362       if (HAVE_extzv
1363           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1364           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1365                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1366         {
1367           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1368           rtx bitsize_rtx, bitpos_rtx;
1369           rtx last = get_last_insn ();
1370           rtx xop0 = op0;
1371           rtx xtarget = target;
1372           rtx xspec_target = spec_target;
1373           rtx xspec_target_subreg = spec_target_subreg;
1374           rtx pat;
1375           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1376
1377           if (MEM_P (xop0))
1378             {
1379               int save_volatile_ok = volatile_ok;
1380               volatile_ok = 1;
1381
1382               /* Is the memory operand acceptable?  */
1383               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1384                      (xop0, GET_MODE (xop0))))
1385                 {
1386                   /* No, load into a reg and extract from there.  */
1387                   enum machine_mode bestmode;
1388
1389                   /* Get the mode to use for inserting into this field.  If
1390                      OP0 is BLKmode, get the smallest mode consistent with the
1391                      alignment. If OP0 is a non-BLKmode object that is no
1392                      wider than MAXMODE, use its mode. Otherwise, use the
1393                      smallest mode containing the field.  */
1394
1395                   if (GET_MODE (xop0) == BLKmode
1396                       || (GET_MODE_SIZE (GET_MODE (op0))
1397                           > GET_MODE_SIZE (maxmode)))
1398                     bestmode = get_best_mode (bitsize, bitnum,
1399                                               MEM_ALIGN (xop0), maxmode,
1400                                               MEM_VOLATILE_P (xop0));
1401                   else
1402                     bestmode = GET_MODE (xop0);
1403
1404                   if (bestmode == VOIDmode
1405                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1406                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1407                     goto extzv_loses;
1408
1409                   /* Compute offset as multiple of this unit,
1410                      counting in bytes.  */
1411                   unit = GET_MODE_BITSIZE (bestmode);
1412                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1413                   xbitpos = bitnum % unit;
1414                   xop0 = adjust_address (xop0, bestmode, xoffset);
1415
1416                   /* Fetch it to a register in that size.  */
1417                   xop0 = force_reg (bestmode, xop0);
1418
1419                   /* XBITPOS counts within UNIT, which is what is expected.  */
1420                 }
1421               else
1422                 /* Get ref to first byte containing part of the field.  */
1423                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1424
1425               volatile_ok = save_volatile_ok;
1426             }
1427
1428           /* If op0 is a register, we need it in MAXMODE (which is usually
1429              SImode). to make it acceptable to the format of extzv.  */
1430           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1431             goto extzv_loses;
1432           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1433             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1434
1435           /* On big-endian machines, we count bits from the most significant.
1436              If the bit field insn does not, we must invert.  */
1437           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1438             xbitpos = unit - bitsize - xbitpos;
1439
1440           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1441           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1442             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1443
1444           unit = GET_MODE_BITSIZE (maxmode);
1445
1446           if (xtarget == 0
1447               || (flag_force_mem && MEM_P (xtarget)))
1448             xtarget = xspec_target = gen_reg_rtx (tmode);
1449
1450           if (GET_MODE (xtarget) != maxmode)
1451             {
1452               if (REG_P (xtarget))
1453                 {
1454                   int wider = (GET_MODE_SIZE (maxmode)
1455                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1456                   xtarget = gen_lowpart (maxmode, xtarget);
1457                   if (wider)
1458                     xspec_target_subreg = xtarget;
1459                 }
1460               else
1461                 xtarget = gen_reg_rtx (maxmode);
1462             }
1463
1464           /* If this machine's extzv insists on a register target,
1465              make sure we have one.  */
1466           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1467                  (xtarget, maxmode)))
1468             xtarget = gen_reg_rtx (maxmode);
1469
1470           bitsize_rtx = GEN_INT (bitsize);
1471           bitpos_rtx = GEN_INT (xbitpos);
1472
1473           pat = gen_extzv (protect_from_queue (xtarget, 1),
1474                            xop0, bitsize_rtx, bitpos_rtx);
1475           if (pat)
1476             {
1477               emit_insn (pat);
1478               target = xtarget;
1479               spec_target = xspec_target;
1480               spec_target_subreg = xspec_target_subreg;
1481             }
1482           else
1483             {
1484               delete_insns_since (last);
1485               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1486                                                 bitpos, target, 1);
1487             }
1488         }
1489       else
1490       extzv_loses:
1491         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1492                                           bitpos, target, 1);
1493     }
1494   else
1495     {
1496       if (HAVE_extv
1497           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1498           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1499                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1500         {
1501           int xbitpos = bitpos, xoffset = offset;
1502           rtx bitsize_rtx, bitpos_rtx;
1503           rtx last = get_last_insn ();
1504           rtx xop0 = op0, xtarget = target;
1505           rtx xspec_target = spec_target;
1506           rtx xspec_target_subreg = spec_target_subreg;
1507           rtx pat;
1508           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1509
1510           if (MEM_P (xop0))
1511             {
1512               /* Is the memory operand acceptable?  */
1513               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1514                      (xop0, GET_MODE (xop0))))
1515                 {
1516                   /* No, load into a reg and extract from there.  */
1517                   enum machine_mode bestmode;
1518
1519                   /* Get the mode to use for inserting into this field.  If
1520                      OP0 is BLKmode, get the smallest mode consistent with the
1521                      alignment. If OP0 is a non-BLKmode object that is no
1522                      wider than MAXMODE, use its mode. Otherwise, use the
1523                      smallest mode containing the field.  */
1524
1525                   if (GET_MODE (xop0) == BLKmode
1526                       || (GET_MODE_SIZE (GET_MODE (op0))
1527                           > GET_MODE_SIZE (maxmode)))
1528                     bestmode = get_best_mode (bitsize, bitnum,
1529                                               MEM_ALIGN (xop0), maxmode,
1530                                               MEM_VOLATILE_P (xop0));
1531                   else
1532                     bestmode = GET_MODE (xop0);
1533
1534                   if (bestmode == VOIDmode
1535                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1536                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1537                     goto extv_loses;
1538
1539                   /* Compute offset as multiple of this unit,
1540                      counting in bytes.  */
1541                   unit = GET_MODE_BITSIZE (bestmode);
1542                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1543                   xbitpos = bitnum % unit;
1544                   xop0 = adjust_address (xop0, bestmode, xoffset);
1545
1546                   /* Fetch it to a register in that size.  */
1547                   xop0 = force_reg (bestmode, xop0);
1548
1549                   /* XBITPOS counts within UNIT, which is what is expected.  */
1550                 }
1551               else
1552                 /* Get ref to first byte containing part of the field.  */
1553                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1554             }
1555
1556           /* If op0 is a register, we need it in MAXMODE (which is usually
1557              SImode) to make it acceptable to the format of extv.  */
1558           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1559             goto extv_loses;
1560           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1561             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1562
1563           /* On big-endian machines, we count bits from the most significant.
1564              If the bit field insn does not, we must invert.  */
1565           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1566             xbitpos = unit - bitsize - xbitpos;
1567
1568           /* XBITPOS counts within a size of UNIT.
1569              Adjust to count within a size of MAXMODE.  */
1570           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1571             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1572
1573           unit = GET_MODE_BITSIZE (maxmode);
1574
1575           if (xtarget == 0
1576               || (flag_force_mem && MEM_P (xtarget)))
1577             xtarget = xspec_target = gen_reg_rtx (tmode);
1578
1579           if (GET_MODE (xtarget) != maxmode)
1580             {
1581               if (REG_P (xtarget))
1582                 {
1583                   int wider = (GET_MODE_SIZE (maxmode)
1584                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1585                   xtarget = gen_lowpart (maxmode, xtarget);
1586                   if (wider)
1587                     xspec_target_subreg = xtarget;
1588                 }
1589               else
1590                 xtarget = gen_reg_rtx (maxmode);
1591             }
1592
1593           /* If this machine's extv insists on a register target,
1594              make sure we have one.  */
1595           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1596                  (xtarget, maxmode)))
1597             xtarget = gen_reg_rtx (maxmode);
1598
1599           bitsize_rtx = GEN_INT (bitsize);
1600           bitpos_rtx = GEN_INT (xbitpos);
1601
1602           pat = gen_extv (protect_from_queue (xtarget, 1),
1603                           xop0, bitsize_rtx, bitpos_rtx);
1604           if (pat)
1605             {
1606               emit_insn (pat);
1607               target = xtarget;
1608               spec_target = xspec_target;
1609               spec_target_subreg = xspec_target_subreg;
1610             }
1611           else
1612             {
1613               delete_insns_since (last);
1614               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1615                                                 bitpos, target, 0);
1616             }
1617         }
1618       else
1619       extv_loses:
1620         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1621                                           bitpos, target, 0);
1622     }
1623   if (target == spec_target)
1624     return target;
1625   if (target == spec_target_subreg)
1626     return spec_target;
1627   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1628     {
1629       /* If the target mode is floating-point, first convert to the
1630          integer mode of that size and then access it as a floating-point
1631          value via a SUBREG.  */
1632       if (GET_MODE_CLASS (tmode) != MODE_INT
1633           && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1634         {
1635           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1636                                                    MODE_INT, 0),
1637                                     target, unsignedp);
1638           return gen_lowpart (tmode, target);
1639         }
1640       else
1641         return convert_to_mode (tmode, target, unsignedp);
1642     }
1643   return target;
1644 }
1645 \f
1646 /* Extract a bit field using shifts and boolean operations
1647    Returns an rtx to represent the value.
1648    OP0 addresses a register (word) or memory (byte).
1649    BITPOS says which bit within the word or byte the bit field starts in.
1650    OFFSET says how many bytes farther the bit field starts;
1651     it is 0 if OP0 is a register.
1652    BITSIZE says how many bits long the bit field is.
1653     (If OP0 is a register, it may be narrower than a full word,
1654      but BITPOS still counts within a full word,
1655      which is significant on bigendian machines.)
1656
1657    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1658    If TARGET is nonzero, attempts to store the value there
1659    and return TARGET, but this is not guaranteed.
1660    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1661
1662 static rtx
1663 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1664                          unsigned HOST_WIDE_INT offset,
1665                          unsigned HOST_WIDE_INT bitsize,
1666                          unsigned HOST_WIDE_INT bitpos, rtx target,
1667                          int unsignedp)
1668 {
1669   unsigned int total_bits = BITS_PER_WORD;
1670   enum machine_mode mode;
1671
1672   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1673     {
1674       /* Special treatment for a bit field split across two registers.  */
1675       if (bitsize + bitpos > BITS_PER_WORD)
1676         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1677     }
1678   else
1679     {
1680       /* Get the proper mode to use for this field.  We want a mode that
1681          includes the entire field.  If such a mode would be larger than
1682          a word, we won't be doing the extraction the normal way.  */
1683
1684       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1685                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1686
1687       if (mode == VOIDmode)
1688         /* The only way this should occur is if the field spans word
1689            boundaries.  */
1690         return extract_split_bit_field (op0, bitsize,
1691                                         bitpos + offset * BITS_PER_UNIT,
1692                                         unsignedp);
1693
1694       total_bits = GET_MODE_BITSIZE (mode);
1695
1696       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1697          be in the range 0 to total_bits-1, and put any excess bytes in
1698          OFFSET.  */
1699       if (bitpos >= total_bits)
1700         {
1701           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1702           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1703                      * BITS_PER_UNIT);
1704         }
1705
1706       /* Get ref to an aligned byte, halfword, or word containing the field.
1707          Adjust BITPOS to be position within a word,
1708          and OFFSET to be the offset of that word.
1709          Then alter OP0 to refer to that word.  */
1710       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1711       offset -= (offset % (total_bits / BITS_PER_UNIT));
1712       op0 = adjust_address (op0, mode, offset);
1713     }
1714
1715   mode = GET_MODE (op0);
1716
1717   if (BYTES_BIG_ENDIAN)
1718     /* BITPOS is the distance between our msb and that of OP0.
1719        Convert it to the distance from the lsb.  */
1720     bitpos = total_bits - bitsize - bitpos;
1721
1722   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1723      We have reduced the big-endian case to the little-endian case.  */
1724
1725   if (unsignedp)
1726     {
1727       if (bitpos)
1728         {
1729           /* If the field does not already start at the lsb,
1730              shift it so it does.  */
1731           tree amount = build_int_2 (bitpos, 0);
1732           /* Maybe propagate the target for the shift.  */
1733           /* But not if we will return it--could confuse integrate.c.  */
1734           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1735           if (tmode != mode) subtarget = 0;
1736           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1737         }
1738       /* Convert the value to the desired mode.  */
1739       if (mode != tmode)
1740         op0 = convert_to_mode (tmode, op0, 1);
1741
1742       /* Unless the msb of the field used to be the msb when we shifted,
1743          mask out the upper bits.  */
1744
1745       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1746         return expand_binop (GET_MODE (op0), and_optab, op0,
1747                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1748                              target, 1, OPTAB_LIB_WIDEN);
1749       return op0;
1750     }
1751
1752   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1753      then arithmetic-shift its lsb to the lsb of the word.  */
1754   op0 = force_reg (mode, op0);
1755   if (mode != tmode)
1756     target = 0;
1757
1758   /* Find the narrowest integer mode that contains the field.  */
1759
1760   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1761        mode = GET_MODE_WIDER_MODE (mode))
1762     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1763       {
1764         op0 = convert_to_mode (mode, op0, 0);
1765         break;
1766       }
1767
1768   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1769     {
1770       tree amount
1771         = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1772       /* Maybe propagate the target for the shift.  */
1773       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1774       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1775     }
1776
1777   return expand_shift (RSHIFT_EXPR, mode, op0,
1778                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1779                        target, 0);
1780 }
1781 \f
1782 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1783    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1784    complement of that if COMPLEMENT.  The mask is truncated if
1785    necessary to the width of mode MODE.  The mask is zero-extended if
1786    BITSIZE+BITPOS is too small for MODE.  */
1787
1788 static rtx
1789 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1790 {
1791   HOST_WIDE_INT masklow, maskhigh;
1792
1793   if (bitsize == 0)
1794     masklow = 0;
1795   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1796     masklow = (HOST_WIDE_INT) -1 << bitpos;
1797   else
1798     masklow = 0;
1799
1800   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1801     masklow &= ((unsigned HOST_WIDE_INT) -1
1802                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1803
1804   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1805     maskhigh = -1;
1806   else
1807     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1808
1809   if (bitsize == 0)
1810     maskhigh = 0;
1811   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1812     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1813                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1814   else
1815     maskhigh = 0;
1816
1817   if (complement)
1818     {
1819       maskhigh = ~maskhigh;
1820       masklow = ~masklow;
1821     }
1822
1823   return immed_double_const (masklow, maskhigh, mode);
1824 }
1825
1826 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1827    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1828
1829 static rtx
1830 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1831 {
1832   unsigned HOST_WIDE_INT v = INTVAL (value);
1833   HOST_WIDE_INT low, high;
1834
1835   if (bitsize < HOST_BITS_PER_WIDE_INT)
1836     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1837
1838   if (bitpos < HOST_BITS_PER_WIDE_INT)
1839     {
1840       low = v << bitpos;
1841       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1842     }
1843   else
1844     {
1845       low = 0;
1846       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1847     }
1848
1849   return immed_double_const (low, high, mode);
1850 }
1851 \f
1852 /* Extract a bit field that is split across two words
1853    and return an RTX for the result.
1854
1855    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1856    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1857    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1858
1859 static rtx
1860 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1861                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1862 {
1863   unsigned int unit;
1864   unsigned int bitsdone = 0;
1865   rtx result = NULL_RTX;
1866   int first = 1;
1867
1868   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1869      much at a time.  */
1870   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1871     unit = BITS_PER_WORD;
1872   else
1873     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1874
1875   while (bitsdone < bitsize)
1876     {
1877       unsigned HOST_WIDE_INT thissize;
1878       rtx part, word;
1879       unsigned HOST_WIDE_INT thispos;
1880       unsigned HOST_WIDE_INT offset;
1881
1882       offset = (bitpos + bitsdone) / unit;
1883       thispos = (bitpos + bitsdone) % unit;
1884
1885       /* THISSIZE must not overrun a word boundary.  Otherwise,
1886          extract_fixed_bit_field will call us again, and we will mutually
1887          recurse forever.  */
1888       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1889       thissize = MIN (thissize, unit - thispos);
1890
1891       /* If OP0 is a register, then handle OFFSET here.
1892
1893          When handling multiword bitfields, extract_bit_field may pass
1894          down a word_mode SUBREG of a larger REG for a bitfield that actually
1895          crosses a word boundary.  Thus, for a SUBREG, we must find
1896          the current word starting from the base register.  */
1897       if (GET_CODE (op0) == SUBREG)
1898         {
1899           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1900           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1901                                         GET_MODE (SUBREG_REG (op0)));
1902           offset = 0;
1903         }
1904       else if (REG_P (op0))
1905         {
1906           word = operand_subword_force (op0, offset, GET_MODE (op0));
1907           offset = 0;
1908         }
1909       else
1910         word = op0;
1911
1912       /* Extract the parts in bit-counting order,
1913          whose meaning is determined by BYTES_PER_UNIT.
1914          OFFSET is in UNITs, and UNIT is in bits.
1915          extract_fixed_bit_field wants offset in bytes.  */
1916       part = extract_fixed_bit_field (word_mode, word,
1917                                       offset * unit / BITS_PER_UNIT,
1918                                       thissize, thispos, 0, 1);
1919       bitsdone += thissize;
1920
1921       /* Shift this part into place for the result.  */
1922       if (BYTES_BIG_ENDIAN)
1923         {
1924           if (bitsize != bitsdone)
1925             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1926                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1927         }
1928       else
1929         {
1930           if (bitsdone != thissize)
1931             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1932                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1933         }
1934
1935       if (first)
1936         result = part;
1937       else
1938         /* Combine the parts with bitwise or.  This works
1939            because we extracted each part as an unsigned bit field.  */
1940         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1941                                OPTAB_LIB_WIDEN);
1942
1943       first = 0;
1944     }
1945
1946   /* Unsigned bit field: we are done.  */
1947   if (unsignedp)
1948     return result;
1949   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1950   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1951                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1952                          NULL_RTX, 0);
1953   return expand_shift (RSHIFT_EXPR, word_mode, result,
1954                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1955 }
1956 \f
1957 /* Add INC into TARGET.  */
1958
1959 void
1960 expand_inc (rtx target, rtx inc)
1961 {
1962   rtx value = expand_binop (GET_MODE (target), add_optab,
1963                             target, inc,
1964                             target, 0, OPTAB_LIB_WIDEN);
1965   if (value != target)
1966     emit_move_insn (target, value);
1967 }
1968
1969 /* Subtract DEC from TARGET.  */
1970
1971 void
1972 expand_dec (rtx target, rtx dec)
1973 {
1974   rtx value = expand_binop (GET_MODE (target), sub_optab,
1975                             target, dec,
1976                             target, 0, OPTAB_LIB_WIDEN);
1977   if (value != target)
1978     emit_move_insn (target, value);
1979 }
1980 \f
1981 /* Output a shift instruction for expression code CODE,
1982    with SHIFTED being the rtx for the value to shift,
1983    and AMOUNT the tree for the amount to shift by.
1984    Store the result in the rtx TARGET, if that is convenient.
1985    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1986    Return the rtx for where the value is.  */
1987
1988 rtx
1989 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
1990               tree amount, rtx target, int unsignedp)
1991 {
1992   rtx op1, temp = 0;
1993   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1994   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1995   int try;
1996
1997   /* Previously detected shift-counts computed by NEGATE_EXPR
1998      and shifted in the other direction; but that does not work
1999      on all machines.  */
2000
2001   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2002
2003   if (SHIFT_COUNT_TRUNCATED)
2004     {
2005       if (GET_CODE (op1) == CONST_INT
2006           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2007               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2008         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2009                        % GET_MODE_BITSIZE (mode));
2010       else if (GET_CODE (op1) == SUBREG
2011                && subreg_lowpart_p (op1))
2012         op1 = SUBREG_REG (op1);
2013     }
2014
2015   if (op1 == const0_rtx)
2016     return shifted;
2017
2018   /* Check whether its cheaper to implement a left shift by a constant
2019      bit count by a sequence of additions.  */
2020   if (code == LSHIFT_EXPR
2021       && GET_CODE (op1) == CONST_INT
2022       && INTVAL (op1) > 0
2023       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2024       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2025     {
2026       int i;
2027       for (i = 0; i < INTVAL (op1); i++)
2028         {
2029           temp = force_reg (mode, shifted);
2030           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2031                                   unsignedp, OPTAB_LIB_WIDEN);
2032         }
2033       return shifted;
2034     }
2035
2036   for (try = 0; temp == 0 && try < 3; try++)
2037     {
2038       enum optab_methods methods;
2039
2040       if (try == 0)
2041         methods = OPTAB_DIRECT;
2042       else if (try == 1)
2043         methods = OPTAB_WIDEN;
2044       else
2045         methods = OPTAB_LIB_WIDEN;
2046
2047       if (rotate)
2048         {
2049           /* Widening does not work for rotation.  */
2050           if (methods == OPTAB_WIDEN)
2051             continue;
2052           else if (methods == OPTAB_LIB_WIDEN)
2053             {
2054               /* If we have been unable to open-code this by a rotation,
2055                  do it as the IOR of two shifts.  I.e., to rotate A
2056                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2057                  where C is the bitsize of A.
2058
2059                  It is theoretically possible that the target machine might
2060                  not be able to perform either shift and hence we would
2061                  be making two libcalls rather than just the one for the
2062                  shift (similarly if IOR could not be done).  We will allow
2063                  this extremely unlikely lossage to avoid complicating the
2064                  code below.  */
2065
2066               rtx subtarget = target == shifted ? 0 : target;
2067               rtx temp1;
2068               tree type = TREE_TYPE (amount);
2069               tree new_amount = make_tree (type, op1);
2070               tree other_amount
2071                 = fold (build (MINUS_EXPR, type,
2072                                convert (type,
2073                                         build_int_2 (GET_MODE_BITSIZE (mode),
2074                                                      0)),
2075                                amount));
2076
2077               shifted = force_reg (mode, shifted);
2078
2079               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2080                                    mode, shifted, new_amount, subtarget, 1);
2081               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2082                                     mode, shifted, other_amount, 0, 1);
2083               return expand_binop (mode, ior_optab, temp, temp1, target,
2084                                    unsignedp, methods);
2085             }
2086
2087           temp = expand_binop (mode,
2088                                left ? rotl_optab : rotr_optab,
2089                                shifted, op1, target, unsignedp, methods);
2090
2091           /* If we don't have the rotate, but we are rotating by a constant
2092              that is in range, try a rotate in the opposite direction.  */
2093
2094           if (temp == 0 && GET_CODE (op1) == CONST_INT
2095               && INTVAL (op1) > 0
2096               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2097             temp = expand_binop (mode,
2098                                  left ? rotr_optab : rotl_optab,
2099                                  shifted,
2100                                  GEN_INT (GET_MODE_BITSIZE (mode)
2101                                           - INTVAL (op1)),
2102                                  target, unsignedp, methods);
2103         }
2104       else if (unsignedp)
2105         temp = expand_binop (mode,
2106                              left ? ashl_optab : lshr_optab,
2107                              shifted, op1, target, unsignedp, methods);
2108
2109       /* Do arithmetic shifts.
2110          Also, if we are going to widen the operand, we can just as well
2111          use an arithmetic right-shift instead of a logical one.  */
2112       if (temp == 0 && ! rotate
2113           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2114         {
2115           enum optab_methods methods1 = methods;
2116
2117           /* If trying to widen a log shift to an arithmetic shift,
2118              don't accept an arithmetic shift of the same size.  */
2119           if (unsignedp)
2120             methods1 = OPTAB_MUST_WIDEN;
2121
2122           /* Arithmetic shift */
2123
2124           temp = expand_binop (mode,
2125                                left ? ashl_optab : ashr_optab,
2126                                shifted, op1, target, unsignedp, methods1);
2127         }
2128
2129       /* We used to try extzv here for logical right shifts, but that was
2130          only useful for one machine, the VAX, and caused poor code
2131          generation there for lshrdi3, so the code was deleted and a
2132          define_expand for lshrsi3 was added to vax.md.  */
2133     }
2134
2135   if (temp == 0)
2136     abort ();
2137   return temp;
2138 }
2139 \f
2140 enum alg_code { alg_zero, alg_m, alg_shift,
2141                   alg_add_t_m2, alg_sub_t_m2,
2142                   alg_add_factor, alg_sub_factor,
2143                   alg_add_t2_m, alg_sub_t2_m,
2144                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2145
2146 /* This structure records a sequence of operations.
2147    `ops' is the number of operations recorded.
2148    `cost' is their total cost.
2149    The operations are stored in `op' and the corresponding
2150    logarithms of the integer coefficients in `log'.
2151
2152    These are the operations:
2153    alg_zero             total := 0;
2154    alg_m                total := multiplicand;
2155    alg_shift            total := total * coeff
2156    alg_add_t_m2         total := total + multiplicand * coeff;
2157    alg_sub_t_m2         total := total - multiplicand * coeff;
2158    alg_add_factor       total := total * coeff + total;
2159    alg_sub_factor       total := total * coeff - total;
2160    alg_add_t2_m         total := total * coeff + multiplicand;
2161    alg_sub_t2_m         total := total * coeff - multiplicand;
2162
2163    The first operand must be either alg_zero or alg_m.  */
2164
2165 struct algorithm
2166 {
2167   short cost;
2168   short ops;
2169   /* The size of the OP and LOG fields are not directly related to the
2170      word size, but the worst-case algorithms will be if we have few
2171      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2172      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2173      in total wordsize operations.  */
2174   enum alg_code op[MAX_BITS_PER_WORD];
2175   char log[MAX_BITS_PER_WORD];
2176 };
2177
2178 /* Indicates the type of fixup needed after a constant multiplication.
2179    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2180    the result should be negated, and ADD_VARIANT means that the
2181    multiplicand should be added to the result.  */
2182 enum mult_variant {basic_variant, negate_variant, add_variant};
2183
2184 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2185                         int, enum machine_mode mode);
2186 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2187                                  struct algorithm *, enum mult_variant *, int);
2188 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2189                               const struct algorithm *, enum mult_variant);
2190 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2191                                                  int, unsigned HOST_WIDE_INT *,
2192                                                  int *, int *);
2193 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2194 static rtx extract_high_half (enum machine_mode, rtx);
2195 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2196                                        int, int);
2197 /* Compute and return the best algorithm for multiplying by T.
2198    The algorithm must cost less than cost_limit
2199    If retval.cost >= COST_LIMIT, no algorithm was found and all
2200    other field of the returned struct are undefined.
2201    MODE is the machine mode of the multiplication.  */
2202
2203 static void
2204 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2205             int cost_limit, enum machine_mode mode)
2206 {
2207   int m;
2208   struct algorithm *alg_in, *best_alg;
2209   int cost;
2210   unsigned HOST_WIDE_INT q;
2211   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2212
2213   /* Indicate that no algorithm is yet found.  If no algorithm
2214      is found, this value will be returned and indicate failure.  */
2215   alg_out->cost = cost_limit;
2216
2217   if (cost_limit <= 0)
2218     return;
2219
2220   /* Restrict the bits of "t" to the multiplication's mode.  */
2221   t &= GET_MODE_MASK (mode);
2222
2223   /* t == 1 can be done in zero cost.  */
2224   if (t == 1)
2225     {
2226       alg_out->ops = 1;
2227       alg_out->cost = 0;
2228       alg_out->op[0] = alg_m;
2229       return;
2230     }
2231
2232   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2233      fail now.  */
2234   if (t == 0)
2235     {
2236       if (zero_cost >= cost_limit)
2237         return;
2238       else
2239         {
2240           alg_out->ops = 1;
2241           alg_out->cost = zero_cost;
2242           alg_out->op[0] = alg_zero;
2243           return;
2244         }
2245     }
2246
2247   /* We'll be needing a couple extra algorithm structures now.  */
2248
2249   alg_in = alloca (sizeof (struct algorithm));
2250   best_alg = alloca (sizeof (struct algorithm));
2251
2252   /* If we have a group of zero bits at the low-order part of T, try
2253      multiplying by the remaining bits and then doing a shift.  */
2254
2255   if ((t & 1) == 0)
2256     {
2257       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2258       if (m < maxm)
2259         {
2260           q = t >> m;
2261           /* The function expand_shift will choose between a shift and
2262              a sequence of additions, so the observed cost is given as
2263              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2264           cost = m * add_cost[mode];
2265           if (shift_cost[mode][m] < cost)
2266             cost = shift_cost[mode][m];
2267           synth_mult (alg_in, q, cost_limit - cost, mode);
2268
2269           cost += alg_in->cost;
2270           if (cost < cost_limit)
2271             {
2272               struct algorithm *x;
2273               x = alg_in, alg_in = best_alg, best_alg = x;
2274               best_alg->log[best_alg->ops] = m;
2275               best_alg->op[best_alg->ops] = alg_shift;
2276               cost_limit = cost;
2277             }
2278         }
2279     }
2280
2281   /* If we have an odd number, add or subtract one.  */
2282   if ((t & 1) != 0)
2283     {
2284       unsigned HOST_WIDE_INT w;
2285
2286       for (w = 1; (w & t) != 0; w <<= 1)
2287         ;
2288       /* If T was -1, then W will be zero after the loop.  This is another
2289          case where T ends with ...111.  Handling this with (T + 1) and
2290          subtract 1 produces slightly better code and results in algorithm
2291          selection much faster than treating it like the ...0111 case
2292          below.  */
2293       if (w == 0
2294           || (w > 2
2295               /* Reject the case where t is 3.
2296                  Thus we prefer addition in that case.  */
2297               && t != 3))
2298         {
2299           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2300
2301           cost = add_cost[mode];
2302           synth_mult (alg_in, t + 1, cost_limit - cost, mode);
2303
2304           cost += alg_in->cost;
2305           if (cost < cost_limit)
2306             {
2307               struct algorithm *x;
2308               x = alg_in, alg_in = best_alg, best_alg = x;
2309               best_alg->log[best_alg->ops] = 0;
2310               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2311               cost_limit = cost;
2312             }
2313         }
2314       else
2315         {
2316           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2317
2318           cost = add_cost[mode];
2319           synth_mult (alg_in, t - 1, cost_limit - cost, mode);
2320
2321           cost += alg_in->cost;
2322           if (cost < cost_limit)
2323             {
2324               struct algorithm *x;
2325               x = alg_in, alg_in = best_alg, best_alg = x;
2326               best_alg->log[best_alg->ops] = 0;
2327               best_alg->op[best_alg->ops] = alg_add_t_m2;
2328               cost_limit = cost;
2329             }
2330         }
2331     }
2332
2333   /* Look for factors of t of the form
2334      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2335      If we find such a factor, we can multiply by t using an algorithm that
2336      multiplies by q, shift the result by m and add/subtract it to itself.
2337
2338      We search for large factors first and loop down, even if large factors
2339      are less probable than small; if we find a large factor we will find a
2340      good sequence quickly, and therefore be able to prune (by decreasing
2341      COST_LIMIT) the search.  */
2342
2343   for (m = floor_log2 (t - 1); m >= 2; m--)
2344     {
2345       unsigned HOST_WIDE_INT d;
2346
2347       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2348       if (t % d == 0 && t > d && m < maxm)
2349         {
2350           cost = add_cost[mode] + shift_cost[mode][m];
2351           if (shiftadd_cost[mode][m] < cost)
2352             cost = shiftadd_cost[mode][m];
2353           synth_mult (alg_in, t / d, cost_limit - cost, mode);
2354
2355           cost += alg_in->cost;
2356           if (cost < cost_limit)
2357             {
2358               struct algorithm *x;
2359               x = alg_in, alg_in = best_alg, best_alg = x;
2360               best_alg->log[best_alg->ops] = m;
2361               best_alg->op[best_alg->ops] = alg_add_factor;
2362               cost_limit = cost;
2363             }
2364           /* Other factors will have been taken care of in the recursion.  */
2365           break;
2366         }
2367
2368       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2369       if (t % d == 0 && t > d && m < maxm)
2370         {
2371           cost = add_cost[mode] + shift_cost[mode][m];
2372           if (shiftsub_cost[mode][m] < cost)
2373             cost = shiftsub_cost[mode][m];
2374           synth_mult (alg_in, t / d, cost_limit - cost, mode);
2375
2376           cost += alg_in->cost;
2377           if (cost < cost_limit)
2378             {
2379               struct algorithm *x;
2380               x = alg_in, alg_in = best_alg, best_alg = x;
2381               best_alg->log[best_alg->ops] = m;
2382               best_alg->op[best_alg->ops] = alg_sub_factor;
2383               cost_limit = cost;
2384             }
2385           break;
2386         }
2387     }
2388
2389   /* Try shift-and-add (load effective address) instructions,
2390      i.e. do a*3, a*5, a*9.  */
2391   if ((t & 1) != 0)
2392     {
2393       q = t - 1;
2394       q = q & -q;
2395       m = exact_log2 (q);
2396       if (m >= 0 && m < maxm)
2397         {
2398           cost = shiftadd_cost[mode][m];
2399           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost, mode);
2400
2401           cost += alg_in->cost;
2402           if (cost < cost_limit)
2403             {
2404               struct algorithm *x;
2405               x = alg_in, alg_in = best_alg, best_alg = x;
2406               best_alg->log[best_alg->ops] = m;
2407               best_alg->op[best_alg->ops] = alg_add_t2_m;
2408               cost_limit = cost;
2409             }
2410         }
2411
2412       q = t + 1;
2413       q = q & -q;
2414       m = exact_log2 (q);
2415       if (m >= 0 && m < maxm)
2416         {
2417           cost = shiftsub_cost[mode][m];
2418           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost, mode);
2419
2420           cost += alg_in->cost;
2421           if (cost < cost_limit)
2422             {
2423               struct algorithm *x;
2424               x = alg_in, alg_in = best_alg, best_alg = x;
2425               best_alg->log[best_alg->ops] = m;
2426               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2427               cost_limit = cost;
2428             }
2429         }
2430     }
2431
2432   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2433      we have not found any algorithm.  */
2434   if (cost_limit == alg_out->cost)
2435     return;
2436
2437   /* If we are getting a too long sequence for `struct algorithm'
2438      to record, make this search fail.  */
2439   if (best_alg->ops == MAX_BITS_PER_WORD)
2440     return;
2441
2442   /* Copy the algorithm from temporary space to the space at alg_out.
2443      We avoid using structure assignment because the majority of
2444      best_alg is normally undefined, and this is a critical function.  */
2445   alg_out->ops = best_alg->ops + 1;
2446   alg_out->cost = cost_limit;
2447   memcpy (alg_out->op, best_alg->op,
2448           alg_out->ops * sizeof *alg_out->op);
2449   memcpy (alg_out->log, best_alg->log,
2450           alg_out->ops * sizeof *alg_out->log);
2451 }
2452 \f
2453 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2454    Try three variations:
2455
2456        - a shift/add sequence based on VAL itself
2457        - a shift/add sequence based on -VAL, followed by a negation
2458        - a shift/add sequence based on VAL - 1, followed by an addition.
2459
2460    Return true if the cheapest of these cost less than MULT_COST,
2461    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2462
2463 static bool
2464 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2465                      struct algorithm *alg, enum mult_variant *variant,
2466                      int mult_cost)
2467 {
2468   struct algorithm alg2;
2469
2470   *variant = basic_variant;
2471   synth_mult (alg, val, mult_cost, mode);
2472
2473   /* This works only if the inverted value actually fits in an
2474      `unsigned int' */
2475   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2476     {
2477       synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - neg_cost[mode],
2478                   mode);
2479       alg2.cost += neg_cost[mode];
2480       if (alg2.cost < alg->cost)
2481         *alg = alg2, *variant = negate_variant;
2482     }
2483
2484   /* This proves very useful for division-by-constant.  */
2485   synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost[mode],
2486               mode);
2487   alg2.cost += add_cost[mode];
2488   if (alg2.cost < alg->cost)
2489     *alg = alg2, *variant = add_variant;
2490
2491   return alg->cost < mult_cost;
2492 }
2493
2494 /* A subroutine of expand_mult, used for constant multiplications.
2495    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2496    convenient.  Use the shift/add sequence described by ALG and apply
2497    the final fixup specified by VARIANT.  */
2498
2499 static rtx
2500 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2501                    rtx target, const struct algorithm *alg,
2502                    enum mult_variant variant)
2503 {
2504   HOST_WIDE_INT val_so_far;
2505   rtx insn, accum, tem;
2506   int opno;
2507   enum machine_mode nmode;
2508
2509   /* op0 must be register to make mult_cost match the precomputed
2510      shiftadd_cost array.  */
2511   op0 = protect_from_queue (op0, 0);
2512
2513   /* Avoid referencing memory over and over.
2514      For speed, but also for correctness when mem is volatile.  */
2515   if (MEM_P (op0))
2516     op0 = force_reg (mode, op0);
2517
2518   /* ACCUM starts out either as OP0 or as a zero, depending on
2519      the first operation.  */
2520
2521   if (alg->op[0] == alg_zero)
2522     {
2523       accum = copy_to_mode_reg (mode, const0_rtx);
2524       val_so_far = 0;
2525     }
2526   else if (alg->op[0] == alg_m)
2527     {
2528       accum = copy_to_mode_reg (mode, op0);
2529       val_so_far = 1;
2530     }
2531   else
2532     abort ();
2533
2534   for (opno = 1; opno < alg->ops; opno++)
2535     {
2536       int log = alg->log[opno];
2537       int preserve = preserve_subexpressions_p ();
2538       rtx shift_subtarget = preserve ? 0 : accum;
2539       rtx add_target
2540         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2541            && ! preserve)
2542           ? target : 0;
2543       rtx accum_target = preserve ? 0 : accum;
2544
2545       switch (alg->op[opno])
2546         {
2547         case alg_shift:
2548           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2549                                 build_int_2 (log, 0), NULL_RTX, 0);
2550           val_so_far <<= log;
2551           break;
2552
2553         case alg_add_t_m2:
2554           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2555                               build_int_2 (log, 0), NULL_RTX, 0);
2556           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2557                                  add_target ? add_target : accum_target);
2558           val_so_far += (HOST_WIDE_INT) 1 << log;
2559           break;
2560
2561         case alg_sub_t_m2:
2562           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2563                               build_int_2 (log, 0), NULL_RTX, 0);
2564           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2565                                  add_target ? add_target : accum_target);
2566           val_so_far -= (HOST_WIDE_INT) 1 << log;
2567           break;
2568
2569         case alg_add_t2_m:
2570           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2571                                 build_int_2 (log, 0), shift_subtarget,
2572                                 0);
2573           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2574                                  add_target ? add_target : accum_target);
2575           val_so_far = (val_so_far << log) + 1;
2576           break;
2577
2578         case alg_sub_t2_m:
2579           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2580                                 build_int_2 (log, 0), shift_subtarget, 0);
2581           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2582                                  add_target ? add_target : accum_target);
2583           val_so_far = (val_so_far << log) - 1;
2584           break;
2585
2586         case alg_add_factor:
2587           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2588                               build_int_2 (log, 0), NULL_RTX, 0);
2589           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2590                                  add_target ? add_target : accum_target);
2591           val_so_far += val_so_far << log;
2592           break;
2593
2594         case alg_sub_factor:
2595           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2596                               build_int_2 (log, 0), NULL_RTX, 0);
2597           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2598                                  (add_target ? add_target
2599                                   : preserve ? 0 : tem));
2600           val_so_far = (val_so_far << log) - val_so_far;
2601           break;
2602
2603         default:
2604           abort ();
2605         }
2606
2607       /* Write a REG_EQUAL note on the last insn so that we can cse
2608          multiplication sequences.  Note that if ACCUM is a SUBREG,
2609          we've set the inner register and must properly indicate
2610          that.  */
2611
2612       tem = op0, nmode = mode;
2613       if (GET_CODE (accum) == SUBREG)
2614         {
2615           nmode = GET_MODE (SUBREG_REG (accum));
2616           tem = gen_lowpart (nmode, op0);
2617         }
2618
2619       insn = get_last_insn ();
2620       set_unique_reg_note (insn, REG_EQUAL,
2621                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
2622     }
2623
2624   if (variant == negate_variant)
2625     {
2626       val_so_far = -val_so_far;
2627       accum = expand_unop (mode, neg_optab, accum, target, 0);
2628     }
2629   else if (variant == add_variant)
2630     {
2631       val_so_far = val_so_far + 1;
2632       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2633     }
2634
2635   /* Compare only the bits of val and val_so_far that are significant
2636      in the result mode, to avoid sign-/zero-extension confusion.  */
2637   val &= GET_MODE_MASK (mode);
2638   val_so_far &= GET_MODE_MASK (mode);
2639   if (val != val_so_far)
2640     abort ();
2641
2642   return accum;
2643 }
2644
2645 /* Perform a multiplication and return an rtx for the result.
2646    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2647    TARGET is a suggestion for where to store the result (an rtx).
2648
2649    We check specially for a constant integer as OP1.
2650    If you want this check for OP0 as well, then before calling
2651    you should swap the two operands if OP0 would be constant.  */
2652
2653 rtx
2654 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2655              int unsignedp)
2656 {
2657   rtx const_op1 = op1;
2658   enum mult_variant variant;
2659   struct algorithm algorithm;
2660
2661   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2662      less than or equal in size to `unsigned int' this doesn't matter.
2663      If the mode is larger than `unsigned int', then synth_mult works only
2664      if the constant value exactly fits in an `unsigned int' without any
2665      truncation.  This means that multiplying by negative values does
2666      not work; results are off by 2^32 on a 32 bit machine.  */
2667
2668   /* If we are multiplying in DImode, it may still be a win
2669      to try to work with shifts and adds.  */
2670   if (GET_CODE (op1) == CONST_DOUBLE
2671       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2672       && HOST_BITS_PER_INT >= BITS_PER_WORD
2673       && CONST_DOUBLE_HIGH (op1) == 0)
2674     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2675   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2676            && GET_CODE (op1) == CONST_INT
2677            && INTVAL (op1) < 0)
2678     const_op1 = 0;
2679
2680   /* We used to test optimize here, on the grounds that it's better to
2681      produce a smaller program when -O is not used.
2682      But this causes such a terrible slowdown sometimes
2683      that it seems better to use synth_mult always.  */
2684
2685   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2686       && (unsignedp || !flag_trapv))
2687     {
2688       int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2689
2690       if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
2691                                mult_cost))
2692         return expand_mult_const (mode, op0, INTVAL (const_op1), target,
2693                                   &algorithm, variant);
2694     }
2695
2696   if (GET_CODE (op0) == CONST_DOUBLE)
2697     {
2698       rtx temp = op0;
2699       op0 = op1;
2700       op1 = temp;
2701     }
2702
2703   /* Expand x*2.0 as x+x.  */
2704   if (GET_CODE (op1) == CONST_DOUBLE
2705       && GET_MODE_CLASS (mode) == MODE_FLOAT)
2706     {
2707       REAL_VALUE_TYPE d;
2708       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
2709
2710       if (REAL_VALUES_EQUAL (d, dconst2))
2711         {
2712           op0 = force_reg (GET_MODE (op0), op0);
2713           return expand_binop (mode, add_optab, op0, op0,
2714                                target, unsignedp, OPTAB_LIB_WIDEN);
2715         }
2716     }
2717
2718   /* This used to use umul_optab if unsigned, but for non-widening multiply
2719      there is no difference between signed and unsigned.  */
2720   op0 = expand_binop (mode,
2721                       ! unsignedp
2722                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2723                       ? smulv_optab : smul_optab,
2724                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2725   if (op0 == 0)
2726     abort ();
2727   return op0;
2728 }
2729 \f
2730 /* Return the smallest n such that 2**n >= X.  */
2731
2732 int
2733 ceil_log2 (unsigned HOST_WIDE_INT x)
2734 {
2735   return floor_log2 (x - 1) + 1;
2736 }
2737
2738 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2739    replace division by D, and put the least significant N bits of the result
2740    in *MULTIPLIER_PTR and return the most significant bit.
2741
2742    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2743    needed precision is in PRECISION (should be <= N).
2744
2745    PRECISION should be as small as possible so this function can choose
2746    multiplier more freely.
2747
2748    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2749    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2750
2751    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2752    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2753
2754 static
2755 unsigned HOST_WIDE_INT
2756 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
2757                    unsigned HOST_WIDE_INT *multiplier_ptr,
2758                    int *post_shift_ptr, int *lgup_ptr)
2759 {
2760   HOST_WIDE_INT mhigh_hi, mlow_hi;
2761   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2762   int lgup, post_shift;
2763   int pow, pow2;
2764   unsigned HOST_WIDE_INT nl, dummy1;
2765   HOST_WIDE_INT nh, dummy2;
2766
2767   /* lgup = ceil(log2(divisor)); */
2768   lgup = ceil_log2 (d);
2769
2770   if (lgup > n)
2771     abort ();
2772
2773   pow = n + lgup;
2774   pow2 = n + lgup - precision;
2775
2776   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2777     {
2778       /* We could handle this with some effort, but this case is much better
2779          handled directly with a scc insn, so rely on caller using that.  */
2780       abort ();
2781     }
2782
2783   /* mlow = 2^(N + lgup)/d */
2784  if (pow >= HOST_BITS_PER_WIDE_INT)
2785     {
2786       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2787       nl = 0;
2788     }
2789   else
2790     {
2791       nh = 0;
2792       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2793     }
2794   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2795                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2796
2797   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2798   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2799     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2800   else
2801     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2802   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2803                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2804
2805   if (mhigh_hi && nh - d >= d)
2806     abort ();
2807   if (mhigh_hi > 1 || mlow_hi > 1)
2808     abort ();
2809   /* Assert that mlow < mhigh.  */
2810   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2811     abort ();
2812
2813   /* If precision == N, then mlow, mhigh exceed 2^N
2814      (but they do not exceed 2^(N+1)).  */
2815
2816   /* Reduce to lowest terms.  */
2817   for (post_shift = lgup; post_shift > 0; post_shift--)
2818     {
2819       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2820       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2821       if (ml_lo >= mh_lo)
2822         break;
2823
2824       mlow_hi = 0;
2825       mlow_lo = ml_lo;
2826       mhigh_hi = 0;
2827       mhigh_lo = mh_lo;
2828     }
2829
2830   *post_shift_ptr = post_shift;
2831   *lgup_ptr = lgup;
2832   if (n < HOST_BITS_PER_WIDE_INT)
2833     {
2834       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2835       *multiplier_ptr = mhigh_lo & mask;
2836       return mhigh_lo >= mask;
2837     }
2838   else
2839     {
2840       *multiplier_ptr = mhigh_lo;
2841       return mhigh_hi;
2842     }
2843 }
2844
2845 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2846    congruent to 1 (mod 2**N).  */
2847
2848 static unsigned HOST_WIDE_INT
2849 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
2850 {
2851   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2852
2853   /* The algorithm notes that the choice y = x satisfies
2854      x*y == 1 mod 2^3, since x is assumed odd.
2855      Each iteration doubles the number of bits of significance in y.  */
2856
2857   unsigned HOST_WIDE_INT mask;
2858   unsigned HOST_WIDE_INT y = x;
2859   int nbit = 3;
2860
2861   mask = (n == HOST_BITS_PER_WIDE_INT
2862           ? ~(unsigned HOST_WIDE_INT) 0
2863           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2864
2865   while (nbit < n)
2866     {
2867       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2868       nbit *= 2;
2869     }
2870   return y;
2871 }
2872
2873 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2874    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2875    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2876    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2877    become signed.
2878
2879    The result is put in TARGET if that is convenient.
2880
2881    MODE is the mode of operation.  */
2882
2883 rtx
2884 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
2885                              rtx op1, rtx target, int unsignedp)
2886 {
2887   rtx tem;
2888   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2889
2890   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2891                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2892                       NULL_RTX, 0);
2893   tem = expand_and (mode, tem, op1, NULL_RTX);
2894   adj_operand
2895     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2896                      adj_operand);
2897
2898   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2899                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2900                       NULL_RTX, 0);
2901   tem = expand_and (mode, tem, op0, NULL_RTX);
2902   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2903                           target);
2904
2905   return target;
2906 }
2907
2908 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
2909
2910 static rtx
2911 extract_high_half (enum machine_mode mode, rtx op)
2912 {
2913   enum machine_mode wider_mode;
2914
2915   if (mode == word_mode)
2916     return gen_highpart (mode, op);
2917
2918   wider_mode = GET_MODE_WIDER_MODE (mode);
2919   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
2920                      build_int_2 (GET_MODE_BITSIZE (mode), 0), 0, 1);
2921   return convert_modes (mode, wider_mode, op, 0);
2922 }
2923
2924 /* Like expand_mult_highpart, but only consider using a multiplication
2925    optab.  OP1 is an rtx for the constant operand.  */
2926
2927 static rtx
2928 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
2929                             rtx target, int unsignedp, int max_cost)
2930 {
2931   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
2932   enum machine_mode wider_mode;
2933   optab moptab;
2934   rtx tem;
2935   int size;
2936
2937   wider_mode = GET_MODE_WIDER_MODE (mode);
2938   size = GET_MODE_BITSIZE (mode);
2939
2940   /* Firstly, try using a multiplication insn that only generates the needed
2941      high part of the product, and in the sign flavor of unsignedp.  */
2942   if (mul_highpart_cost[mode] < max_cost)
2943     {
2944       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2945       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
2946                           unsignedp, OPTAB_DIRECT);
2947       if (tem)
2948         return tem;
2949     }
2950
2951   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2952      Need to adjust the result after the multiplication.  */
2953   if (size - 1 < BITS_PER_WORD
2954       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
2955           + 4 * add_cost[mode] < max_cost))
2956     {
2957       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2958       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
2959                           unsignedp, OPTAB_DIRECT);
2960       if (tem)
2961         /* We used the wrong signedness.  Adjust the result.  */
2962         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
2963                                             tem, unsignedp);
2964     }
2965
2966   /* Try widening multiplication.  */
2967   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2968   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2969       && mul_widen_cost[wider_mode] < max_cost)
2970     {
2971       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
2972                           unsignedp, OPTAB_WIDEN);
2973       if (tem)
2974         return extract_high_half (mode, tem);
2975     }
2976
2977   /* Try widening the mode and perform a non-widening multiplication.  */
2978   moptab = smul_optab;
2979   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2980       && size - 1 < BITS_PER_WORD
2981       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
2982     {
2983       tem = expand_binop (wider_mode, moptab, op0, op1, 0,
2984                           unsignedp, OPTAB_WIDEN);
2985       if (tem)
2986         return extract_high_half (mode, tem);
2987     }
2988
2989   /* Try widening multiplication of opposite signedness, and adjust.  */
2990   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2991   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2992       && size - 1 < BITS_PER_WORD
2993       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
2994           + 4 * add_cost[mode] < max_cost))
2995     {
2996       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
2997                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2998       if (tem != 0)
2999         {
3000           tem = extract_high_half (mode, tem);
3001           /* We used the wrong signedness.  Adjust the result.  */
3002           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3003                                               target, unsignedp);
3004         }
3005     }
3006
3007   return 0;
3008 }
3009
3010 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
3011    in TARGET if that is convenient, and return where the result is.  If the
3012    operation can not be performed, 0 is returned.
3013
3014    MODE is the mode of operation and result.
3015
3016    UNSIGNEDP nonzero means unsigned multiply.
3017
3018    MAX_COST is the total allowed cost for the expanded RTL.  */
3019
3020 rtx
3021 expand_mult_highpart (enum machine_mode mode, rtx op0,
3022                       unsigned HOST_WIDE_INT cnst1, rtx target,
3023                       int unsignedp, int max_cost)
3024 {
3025   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3026   int extra_cost;
3027   bool sign_adjust = false;
3028   enum mult_variant variant;
3029   struct algorithm alg;
3030   rtx op1, tem;
3031
3032   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3033   if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3034     abort ();
3035
3036   op1 = gen_int_mode (cnst1, wider_mode);
3037   cnst1 &= GET_MODE_MASK (mode);
3038
3039   /* We can't optimize modes wider than BITS_PER_WORD.
3040      ??? We might be able to perform double-word arithmetic if
3041      mode == word_mode, however all the cost calculations in
3042      synth_mult etc. assume single-word operations.  */
3043   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3044     return expand_mult_highpart_optab (mode, op0, op1, target,
3045                                        unsignedp, max_cost);
3046
3047   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3048
3049   /* Check whether we try to multiply by a negative constant.  */
3050   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3051     {
3052       sign_adjust = true;
3053       extra_cost += add_cost[mode];
3054     }
3055
3056   /* See whether shift/add multiplication is cheap enough.  */
3057   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3058                            max_cost - extra_cost))
3059     {
3060       /* See whether the specialized multiplication optabs are
3061          cheaper than the shift/add version.  */
3062       tem = expand_mult_highpart_optab (mode, op0, op1, target,
3063                                         unsignedp, alg.cost + extra_cost);
3064       if (tem)
3065         return tem;
3066
3067       tem = convert_to_mode (wider_mode, op0, unsignedp);
3068       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3069       tem = extract_high_half (mode, tem);
3070
3071       /* Adjust result for signedness.  */
3072       if (sign_adjust)
3073         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3074
3075       return tem;
3076     }
3077   return expand_mult_highpart_optab (mode, op0, op1, target,
3078                                      unsignedp, max_cost);
3079 }
3080
3081
3082 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3083
3084 static rtx
3085 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3086 {
3087   unsigned HOST_WIDE_INT mask;
3088   rtx result, temp, shift, label;
3089   int logd;
3090
3091   logd = floor_log2 (d);
3092   result = gen_reg_rtx (mode);
3093
3094   /* Avoid conditional branches when they're expensive.  */
3095   if (BRANCH_COST >= 2
3096       && !optimize_size)
3097     {
3098       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3099                                       mode, 0, -1);
3100       if (signmask)
3101         {
3102           signmask = force_reg (mode, signmask);
3103           mask = ((HOST_WIDE_INT) 1 << logd) - 1;
3104           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3105
3106           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3107              which instruction sequence to use.  If logical right shifts
3108              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3109              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3110
3111           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3112           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3113               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3114             {
3115               temp = expand_binop (mode, xor_optab, op0, signmask,
3116                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3117               temp = expand_binop (mode, sub_optab, temp, signmask,
3118                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3119               temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3120                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3121               temp = expand_binop (mode, xor_optab, temp, signmask,
3122                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3123               temp = expand_binop (mode, sub_optab, temp, signmask,
3124                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3125             }
3126           else
3127             {
3128               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3129                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3130               signmask = force_reg (mode, signmask);
3131
3132               temp = expand_binop (mode, add_optab, op0, signmask,
3133                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3134               temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3135                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3136               temp = expand_binop (mode, sub_optab, temp, signmask,
3137                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3138             }
3139           return temp;
3140         }
3141     }
3142
3143   /* Mask contains the mode's signbit and the significant bits of the
3144      modulus.  By including the signbit in the operation, many targets
3145      can avoid an explicit compare operation in the following comparison
3146      against zero.  */
3147
3148   mask = (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1)
3149          | (((HOST_WIDE_INT) 1 << logd) - 1);
3150
3151   temp = expand_binop (mode, and_optab, op0, GEN_INT (mask), result,
3152                        1, OPTAB_LIB_WIDEN);
3153   if (temp != result)
3154     emit_move_insn (result, temp);
3155
3156   label = gen_label_rtx ();
3157   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3158
3159   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3160                        0, OPTAB_LIB_WIDEN);
3161   mask = (HOST_WIDE_INT) -1 << logd;
3162   temp = expand_binop (mode, ior_optab, temp, GEN_INT (mask), result,
3163                        1, OPTAB_LIB_WIDEN);
3164   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3165                        0, OPTAB_LIB_WIDEN);
3166   if (temp != result)
3167     emit_move_insn (result, temp);
3168   emit_label (label);
3169   return result;
3170 }
3171 \f
3172 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3173    if that is convenient, and returning where the result is.
3174    You may request either the quotient or the remainder as the result;
3175    specify REM_FLAG nonzero to get the remainder.
3176
3177    CODE is the expression code for which kind of division this is;
3178    it controls how rounding is done.  MODE is the machine mode to use.
3179    UNSIGNEDP nonzero means do unsigned division.  */
3180
3181 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3182    and then correct it by or'ing in missing high bits
3183    if result of ANDI is nonzero.
3184    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3185    This could optimize to a bfexts instruction.
3186    But C doesn't use these operations, so their optimizations are
3187    left for later.  */
3188 /* ??? For modulo, we don't actually need the highpart of the first product,
3189    the low part will do nicely.  And for small divisors, the second multiply
3190    can also be a low-part only multiply or even be completely left out.
3191    E.g. to calculate the remainder of a division by 3 with a 32 bit
3192    multiply, multiply with 0x55555556 and extract the upper two bits;
3193    the result is exact for inputs up to 0x1fffffff.
3194    The input range can be reduced by using cross-sum rules.
3195    For odd divisors >= 3, the following table gives right shift counts
3196    so that if a number is shifted by an integer multiple of the given
3197    amount, the remainder stays the same:
3198    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3199    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3200    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3201    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3202    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3203
3204    Cross-sum rules for even numbers can be derived by leaving as many bits
3205    to the right alone as the divisor has zeros to the right.
3206    E.g. if x is an unsigned 32 bit number:
3207    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3208    */
3209
3210 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
3211
3212 rtx
3213 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3214                rtx op0, rtx op1, rtx target, int unsignedp)
3215 {
3216   enum machine_mode compute_mode;
3217   rtx tquotient;
3218   rtx quotient = 0, remainder = 0;
3219   rtx last;
3220   int size;
3221   rtx insn, set;
3222   optab optab1, optab2;
3223   int op1_is_constant, op1_is_pow2 = 0;
3224   int max_cost, extra_cost;
3225   static HOST_WIDE_INT last_div_const = 0;
3226   static HOST_WIDE_INT ext_op1;
3227
3228   op1_is_constant = GET_CODE (op1) == CONST_INT;
3229   if (op1_is_constant)
3230     {
3231       ext_op1 = INTVAL (op1);
3232       if (unsignedp)
3233         ext_op1 &= GET_MODE_MASK (mode);
3234       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3235                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3236     }
3237
3238   /*
3239      This is the structure of expand_divmod:
3240
3241      First comes code to fix up the operands so we can perform the operations
3242      correctly and efficiently.
3243
3244      Second comes a switch statement with code specific for each rounding mode.
3245      For some special operands this code emits all RTL for the desired
3246      operation, for other cases, it generates only a quotient and stores it in
3247      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3248      to indicate that it has not done anything.
3249
3250      Last comes code that finishes the operation.  If QUOTIENT is set and
3251      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3252      QUOTIENT is not set, it is computed using trunc rounding.
3253
3254      We try to generate special code for division and remainder when OP1 is a
3255      constant.  If |OP1| = 2**n we can use shifts and some other fast
3256      operations.  For other values of OP1, we compute a carefully selected
3257      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3258      by m.
3259
3260      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3261      half of the product.  Different strategies for generating the product are
3262      implemented in expand_mult_highpart.
3263
3264      If what we actually want is the remainder, we generate that by another
3265      by-constant multiplication and a subtraction.  */
3266
3267   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3268      code below will malfunction if we are, so check here and handle
3269      the special case if so.  */
3270   if (op1 == const1_rtx)
3271     return rem_flag ? const0_rtx : op0;
3272
3273     /* When dividing by -1, we could get an overflow.
3274      negv_optab can handle overflows.  */
3275   if (! unsignedp && op1 == constm1_rtx)
3276     {
3277       if (rem_flag)
3278         return const0_rtx;
3279       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3280                           ? negv_optab : neg_optab, op0, target, 0);
3281     }
3282
3283   if (target
3284       /* Don't use the function value register as a target
3285          since we have to read it as well as write it,
3286          and function-inlining gets confused by this.  */
3287       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3288           /* Don't clobber an operand while doing a multi-step calculation.  */
3289           || ((rem_flag || op1_is_constant)
3290               && (reg_mentioned_p (target, op0)
3291                   || (MEM_P (op0) && MEM_P (target))))
3292           || reg_mentioned_p (target, op1)
3293           || (MEM_P (op1) && MEM_P (target))))
3294     target = 0;
3295
3296   /* Get the mode in which to perform this computation.  Normally it will
3297      be MODE, but sometimes we can't do the desired operation in MODE.
3298      If so, pick a wider mode in which we can do the operation.  Convert
3299      to that mode at the start to avoid repeated conversions.
3300
3301      First see what operations we need.  These depend on the expression
3302      we are evaluating.  (We assume that divxx3 insns exist under the
3303      same conditions that modxx3 insns and that these insns don't normally
3304      fail.  If these assumptions are not correct, we may generate less
3305      efficient code in some cases.)
3306
3307      Then see if we find a mode in which we can open-code that operation
3308      (either a division, modulus, or shift).  Finally, check for the smallest
3309      mode for which we can do the operation with a library call.  */
3310
3311   /* We might want to refine this now that we have division-by-constant
3312      optimization.  Since expand_mult_highpart tries so many variants, it is
3313      not straightforward to generalize this.  Maybe we should make an array
3314      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3315
3316   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3317             ? (unsignedp ? lshr_optab : ashr_optab)
3318             : (unsignedp ? udiv_optab : sdiv_optab));
3319   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3320             ? optab1
3321             : (unsignedp ? udivmod_optab : sdivmod_optab));
3322
3323   for (compute_mode = mode; compute_mode != VOIDmode;
3324        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3325     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3326         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3327       break;
3328
3329   if (compute_mode == VOIDmode)
3330     for (compute_mode = mode; compute_mode != VOIDmode;
3331          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3332       if (optab1->handlers[compute_mode].libfunc
3333           || optab2->handlers[compute_mode].libfunc)
3334         break;
3335
3336   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3337      in expand_binop.  */
3338   if (compute_mode == VOIDmode)
3339     compute_mode = mode;
3340
3341   if (target && GET_MODE (target) == compute_mode)
3342     tquotient = target;
3343   else
3344     tquotient = gen_reg_rtx (compute_mode);
3345
3346   size = GET_MODE_BITSIZE (compute_mode);
3347 #if 0
3348   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3349      (mode), and thereby get better code when OP1 is a constant.  Do that
3350      later.  It will require going over all usages of SIZE below.  */
3351   size = GET_MODE_BITSIZE (mode);
3352 #endif
3353
3354   /* Only deduct something for a REM if the last divide done was
3355      for a different constant.   Then set the constant of the last
3356      divide.  */
3357   max_cost = div_cost[compute_mode]
3358     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3359                       && INTVAL (op1) == last_div_const)
3360        ? mul_cost[compute_mode] + add_cost[compute_mode]
3361        : 0);
3362
3363   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3364
3365   /* Now convert to the best mode to use.  */
3366   if (compute_mode != mode)
3367     {
3368       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3369       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3370
3371       /* convert_modes may have placed op1 into a register, so we
3372          must recompute the following.  */
3373       op1_is_constant = GET_CODE (op1) == CONST_INT;
3374       op1_is_pow2 = (op1_is_constant
3375                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3376                           || (! unsignedp
3377                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3378     }
3379
3380   /* If one of the operands is a volatile MEM, copy it into a register.  */
3381
3382   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3383     op0 = force_reg (compute_mode, op0);
3384   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3385     op1 = force_reg (compute_mode, op1);
3386
3387   /* If we need the remainder or if OP1 is constant, we need to
3388      put OP0 in a register in case it has any queued subexpressions.  */
3389   if (rem_flag || op1_is_constant)
3390     op0 = force_reg (compute_mode, op0);
3391
3392   last = get_last_insn ();
3393
3394   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3395   if (unsignedp)
3396     {
3397       if (code == FLOOR_DIV_EXPR)
3398         code = TRUNC_DIV_EXPR;
3399       if (code == FLOOR_MOD_EXPR)
3400         code = TRUNC_MOD_EXPR;
3401       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3402         code = TRUNC_DIV_EXPR;
3403     }
3404
3405   if (op1 != const0_rtx)
3406     switch (code)
3407       {
3408       case TRUNC_MOD_EXPR:
3409       case TRUNC_DIV_EXPR:
3410         if (op1_is_constant)
3411           {
3412             if (unsignedp)
3413               {
3414                 unsigned HOST_WIDE_INT mh, ml;
3415                 int pre_shift, post_shift;
3416                 int dummy;
3417                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3418                                             & GET_MODE_MASK (compute_mode));
3419
3420                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3421                   {
3422                     pre_shift = floor_log2 (d);
3423                     if (rem_flag)
3424                       {
3425                         remainder
3426                           = expand_binop (compute_mode, and_optab, op0,
3427                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3428                                           remainder, 1,
3429                                           OPTAB_LIB_WIDEN);
3430                         if (remainder)
3431                           return gen_lowpart (mode, remainder);
3432                       }
3433                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3434                                              build_int_2 (pre_shift, 0),
3435                                              tquotient, 1);
3436                   }
3437                 else if (size <= HOST_BITS_PER_WIDE_INT)
3438                   {
3439                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3440                       {
3441                         /* Most significant bit of divisor is set; emit an scc
3442                            insn.  */
3443                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3444                                                     compute_mode, 1, 1);
3445                         if (quotient == 0)
3446                           goto fail1;
3447                       }
3448                     else
3449                       {
3450                         /* Find a suitable multiplier and right shift count
3451                            instead of multiplying with D.  */
3452
3453                         mh = choose_multiplier (d, size, size,
3454                                                 &ml, &post_shift, &dummy);
3455
3456                         /* If the suggested multiplier is more than SIZE bits,
3457                            we can do better for even divisors, using an
3458                            initial right shift.  */
3459                         if (mh != 0 && (d & 1) == 0)
3460                           {
3461                             pre_shift = floor_log2 (d & -d);
3462                             mh = choose_multiplier (d >> pre_shift, size,
3463                                                     size - pre_shift,
3464                                                     &ml, &post_shift, &dummy);
3465                             if (mh)
3466                               abort ();
3467                           }
3468                         else
3469                           pre_shift = 0;
3470
3471                         if (mh != 0)
3472                           {
3473                             rtx t1, t2, t3, t4;
3474
3475                             if (post_shift - 1 >= BITS_PER_WORD)
3476                               goto fail1;
3477
3478                             extra_cost
3479                               = (shift_cost[compute_mode][post_shift - 1]
3480                                  + shift_cost[compute_mode][1]
3481                                  + 2 * add_cost[compute_mode]);
3482                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3483                                                        NULL_RTX, 1,
3484                                                        max_cost - extra_cost);
3485                             if (t1 == 0)
3486                               goto fail1;
3487                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3488                                                                op0, t1),
3489                                                 NULL_RTX);
3490                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3491                                                build_int_2 (1, 0), NULL_RTX,1);
3492                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3493                                                               t1, t3),
3494                                                 NULL_RTX);
3495                             quotient
3496                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3497                                               build_int_2 (post_shift - 1, 0),
3498                                               tquotient, 1);
3499                           }
3500                         else
3501                           {
3502                             rtx t1, t2;
3503
3504                             if (pre_shift >= BITS_PER_WORD
3505                                 || post_shift >= BITS_PER_WORD)
3506                               goto fail1;
3507
3508                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3509                                                build_int_2 (pre_shift, 0),
3510                                                NULL_RTX, 1);
3511                             extra_cost
3512                               = (shift_cost[compute_mode][pre_shift]
3513                                  + shift_cost[compute_mode][post_shift]);
3514                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3515                                                        NULL_RTX, 1,
3516                                                        max_cost - extra_cost);
3517                             if (t2 == 0)
3518                               goto fail1;
3519                             quotient
3520                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3521                                               build_int_2 (post_shift, 0),
3522                                               tquotient, 1);
3523                           }
3524                       }
3525                   }
3526                 else            /* Too wide mode to use tricky code */
3527                   break;
3528
3529                 insn = get_last_insn ();
3530                 if (insn != last
3531                     && (set = single_set (insn)) != 0
3532                     && SET_DEST (set) == quotient)
3533                   set_unique_reg_note (insn,
3534                                        REG_EQUAL,
3535                                        gen_rtx_UDIV (compute_mode, op0, op1));
3536               }
3537             else                /* TRUNC_DIV, signed */
3538               {
3539                 unsigned HOST_WIDE_INT ml;
3540                 int lgup, post_shift;
3541                 HOST_WIDE_INT d = INTVAL (op1);
3542                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3543
3544                 /* n rem d = n rem -d */
3545                 if (rem_flag && d < 0)
3546                   {
3547                     d = abs_d;
3548                     op1 = gen_int_mode (abs_d, compute_mode);
3549                   }
3550
3551                 if (d == 1)
3552                   quotient = op0;
3553                 else if (d == -1)
3554                   quotient = expand_unop (compute_mode, neg_optab, op0,
3555                                           tquotient, 0);
3556                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3557                   {
3558                     /* This case is not handled correctly below.  */
3559                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3560                                                 compute_mode, 1, 1);
3561                     if (quotient == 0)
3562                       goto fail1;
3563                   }
3564                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3565                          && (rem_flag ? smod_pow2_cheap[compute_mode]
3566                                       : sdiv_pow2_cheap[compute_mode])
3567                          /* We assume that cheap metric is true if the
3568                             optab has an expander for this mode.  */
3569                          && (((rem_flag ? smod_optab : sdiv_optab)
3570                               ->handlers[compute_mode].insn_code
3571                               != CODE_FOR_nothing)
3572                              || (sdivmod_optab->handlers[compute_mode]
3573                                  .insn_code != CODE_FOR_nothing)))
3574                   ;
3575                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3576                   {
3577                     if (rem_flag)
3578                       {
3579                         remainder = expand_smod_pow2 (compute_mode, op0, d);
3580                         if (remainder)
3581                           return gen_lowpart (mode, remainder);
3582                       }
3583                     lgup = floor_log2 (abs_d);
3584                     if (BRANCH_COST < 1 || (abs_d != 2 && BRANCH_COST < 3))
3585                       {
3586                         rtx label = gen_label_rtx ();
3587                         rtx t1;
3588
3589                         t1 = copy_to_mode_reg (compute_mode, op0);
3590                         do_cmp_and_jump (t1, const0_rtx, GE,
3591                                          compute_mode, label);
3592                         expand_inc (t1, gen_int_mode (abs_d - 1,
3593                                                       compute_mode));
3594                         emit_label (label);
3595                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3596                                                  build_int_2 (lgup, 0),
3597                                                  tquotient, 0);
3598                       }
3599                     else
3600                       {
3601                         rtx t1, t2, t3;
3602                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3603                                            build_int_2 (size - 1, 0),
3604                                            NULL_RTX, 0);
3605                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3606                                            build_int_2 (size - lgup, 0),
3607                                            NULL_RTX, 1);
3608                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3609                                                           op0, t2),
3610                                             NULL_RTX);
3611                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3612                                                  build_int_2 (lgup, 0),
3613                                                  tquotient, 0);
3614                       }
3615
3616                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
3617                        negate the quotient.  */
3618                     if (d < 0)
3619                       {
3620                         insn = get_last_insn ();
3621                         if (insn != last
3622                             && (set = single_set (insn)) != 0
3623                             && SET_DEST (set) == quotient
3624                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3625                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3626                           set_unique_reg_note (insn,
3627                                                REG_EQUAL,
3628                                                gen_rtx_DIV (compute_mode,
3629                                                             op0,
3630                                                             GEN_INT
3631                                                             (trunc_int_for_mode
3632                                                              (abs_d,
3633                                                               compute_mode))));
3634
3635                         quotient = expand_unop (compute_mode, neg_optab,
3636                                                 quotient, quotient, 0);
3637                       }
3638                   }
3639                 else if (size <= HOST_BITS_PER_WIDE_INT)
3640                   {
3641                     choose_multiplier (abs_d, size, size - 1,
3642                                        &ml, &post_shift, &lgup);
3643                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3644                       {
3645                         rtx t1, t2, t3;
3646
3647                         if (post_shift >= BITS_PER_WORD
3648                             || size - 1 >= BITS_PER_WORD)
3649                           goto fail1;
3650
3651                         extra_cost = (shift_cost[compute_mode][post_shift]
3652                                       + shift_cost[compute_mode][size - 1]
3653                                       + add_cost[compute_mode]);
3654                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3655                                                    NULL_RTX, 0,
3656                                                    max_cost - extra_cost);
3657                         if (t1 == 0)
3658                           goto fail1;
3659                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3660                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3661                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3662                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3663                         if (d < 0)
3664                           quotient
3665                             = force_operand (gen_rtx_MINUS (compute_mode,
3666                                                             t3, t2),
3667                                              tquotient);
3668                         else
3669                           quotient
3670                             = force_operand (gen_rtx_MINUS (compute_mode,
3671                                                             t2, t3),
3672                                              tquotient);
3673                       }
3674                     else
3675                       {
3676                         rtx t1, t2, t3, t4;
3677
3678                         if (post_shift >= BITS_PER_WORD
3679                             || size - 1 >= BITS_PER_WORD)
3680                           goto fail1;
3681
3682                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3683                         extra_cost = (shift_cost[compute_mode][post_shift]
3684                                       + shift_cost[compute_mode][size - 1]
3685                                       + 2 * add_cost[compute_mode]);
3686                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3687                                                    NULL_RTX, 0,
3688                                                    max_cost - extra_cost);
3689                         if (t1 == 0)
3690                           goto fail1;
3691                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3692                                                           t1, op0),
3693                                             NULL_RTX);
3694                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3695                                            build_int_2 (post_shift, 0),
3696                                            NULL_RTX, 0);
3697                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3698                                            build_int_2 (size - 1, 0),
3699                                            NULL_RTX, 0);
3700                         if (d < 0)
3701                           quotient
3702                             = force_operand (gen_rtx_MINUS (compute_mode,
3703                                                             t4, t3),
3704                                              tquotient);
3705                         else
3706                           quotient
3707                             = force_operand (gen_rtx_MINUS (compute_mode,
3708                                                             t3, t4),
3709                                              tquotient);
3710                       }
3711                   }
3712                 else            /* Too wide mode to use tricky code */
3713                   break;
3714
3715                 insn = get_last_insn ();
3716                 if (insn != last
3717                     && (set = single_set (insn)) != 0
3718                     && SET_DEST (set) == quotient)
3719                   set_unique_reg_note (insn,
3720                                        REG_EQUAL,
3721                                        gen_rtx_DIV (compute_mode, op0, op1));
3722               }
3723             break;
3724           }
3725       fail1:
3726         delete_insns_since (last);
3727         break;
3728
3729       case FLOOR_DIV_EXPR:
3730       case FLOOR_MOD_EXPR:
3731       /* We will come here only for signed operations.  */
3732         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3733           {
3734             unsigned HOST_WIDE_INT mh, ml;
3735             int pre_shift, lgup, post_shift;
3736             HOST_WIDE_INT d = INTVAL (op1);
3737
3738             if (d > 0)
3739               {
3740                 /* We could just as easily deal with negative constants here,
3741                    but it does not seem worth the trouble for GCC 2.6.  */
3742                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3743                   {
3744                     pre_shift = floor_log2 (d);
3745                     if (rem_flag)
3746                       {
3747                         remainder = expand_binop (compute_mode, and_optab, op0,
3748                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3749                                                   remainder, 0, OPTAB_LIB_WIDEN);
3750                         if (remainder)
3751                           return gen_lowpart (mode, remainder);
3752                       }
3753                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3754                                              build_int_2 (pre_shift, 0),
3755                                              tquotient, 0);
3756                   }
3757                 else
3758                   {
3759                     rtx t1, t2, t3, t4;
3760
3761                     mh = choose_multiplier (d, size, size - 1,
3762                                             &ml, &post_shift, &lgup);
3763                     if (mh)
3764                       abort ();
3765
3766                     if (post_shift < BITS_PER_WORD
3767                         && size - 1 < BITS_PER_WORD)
3768                       {
3769                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3770                                            build_int_2 (size - 1, 0),
3771                                            NULL_RTX, 0);
3772                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3773                                            NULL_RTX, 0, OPTAB_WIDEN);
3774                         extra_cost = (shift_cost[compute_mode][post_shift]
3775                                       + shift_cost[compute_mode][size - 1]
3776                                       + 2 * add_cost[compute_mode]);
3777                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3778                                                    NULL_RTX, 1,
3779                                                    max_cost - extra_cost);
3780                         if (t3 != 0)
3781                           {
3782                             t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3783                                                build_int_2 (post_shift, 0),
3784                                                NULL_RTX, 1);
3785                             quotient = expand_binop (compute_mode, xor_optab,
3786                                                      t4, t1, tquotient, 0,
3787                                                      OPTAB_WIDEN);
3788                           }
3789                       }
3790                   }
3791               }
3792             else
3793               {
3794                 rtx nsign, t1, t2, t3, t4;
3795                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3796                                                   op0, constm1_rtx), NULL_RTX);
3797                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3798                                    0, OPTAB_WIDEN);
3799                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3800                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3801                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3802                                     NULL_RTX);
3803                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3804                                     NULL_RTX, 0);
3805                 if (t4)
3806                   {
3807                     rtx t5;
3808                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3809                                       NULL_RTX, 0);
3810                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3811                                                             t4, t5),
3812                                               tquotient);
3813                   }
3814               }
3815           }
3816
3817         if (quotient != 0)
3818           break;
3819         delete_insns_since (last);
3820
3821         /* Try using an instruction that produces both the quotient and
3822            remainder, using truncation.  We can easily compensate the quotient
3823            or remainder to get floor rounding, once we have the remainder.
3824            Notice that we compute also the final remainder value here,
3825            and return the result right away.  */
3826         if (target == 0 || GET_MODE (target) != compute_mode)
3827           target = gen_reg_rtx (compute_mode);
3828
3829         if (rem_flag)
3830           {
3831             remainder
3832               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
3833             quotient = gen_reg_rtx (compute_mode);
3834           }
3835         else
3836           {
3837             quotient
3838               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
3839             remainder = gen_reg_rtx (compute_mode);
3840           }
3841
3842         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3843                                  quotient, remainder, 0))
3844           {
3845             /* This could be computed with a branch-less sequence.
3846                Save that for later.  */
3847             rtx tem;
3848             rtx label = gen_label_rtx ();
3849             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3850             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3851                                 NULL_RTX, 0, OPTAB_WIDEN);
3852             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3853             expand_dec (quotient, const1_rtx);
3854             expand_inc (remainder, op1);
3855             emit_label (label);
3856             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3857           }
3858
3859         /* No luck with division elimination or divmod.  Have to do it
3860            by conditionally adjusting op0 *and* the result.  */
3861         {
3862           rtx label1, label2, label3, label4, label5;
3863           rtx adjusted_op0;
3864           rtx tem;
3865
3866           quotient = gen_reg_rtx (compute_mode);
3867           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3868           label1 = gen_label_rtx ();
3869           label2 = gen_label_rtx ();
3870           label3 = gen_label_rtx ();
3871           label4 = gen_label_rtx ();
3872           label5 = gen_label_rtx ();
3873           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3874           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3875           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3876                               quotient, 0, OPTAB_LIB_WIDEN);
3877           if (tem != quotient)
3878             emit_move_insn (quotient, tem);
3879           emit_jump_insn (gen_jump (label5));
3880           emit_barrier ();
3881           emit_label (label1);
3882           expand_inc (adjusted_op0, const1_rtx);
3883           emit_jump_insn (gen_jump (label4));
3884           emit_barrier ();
3885           emit_label (label2);
3886           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3887           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3888                               quotient, 0, OPTAB_LIB_WIDEN);
3889           if (tem != quotient)
3890             emit_move_insn (quotient, tem);
3891           emit_jump_insn (gen_jump (label5));
3892           emit_barrier ();
3893           emit_label (label3);
3894           expand_dec (adjusted_op0, const1_rtx);
3895           emit_label (label4);
3896           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3897                               quotient, 0, OPTAB_LIB_WIDEN);
3898           if (tem != quotient)
3899             emit_move_insn (quotient, tem);
3900           expand_dec (quotient, const1_rtx);
3901           emit_label (label5);
3902         }
3903         break;
3904
3905       case CEIL_DIV_EXPR:
3906       case CEIL_MOD_EXPR:
3907         if (unsignedp)
3908           {
3909             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3910               {
3911                 rtx t1, t2, t3;
3912                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3913                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3914                                    build_int_2 (floor_log2 (d), 0),
3915                                    tquotient, 1);
3916                 t2 = expand_binop (compute_mode, and_optab, op0,
3917                                    GEN_INT (d - 1),
3918                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3919                 t3 = gen_reg_rtx (compute_mode);
3920                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3921                                       compute_mode, 1, 1);
3922                 if (t3 == 0)
3923                   {
3924                     rtx lab;
3925                     lab = gen_label_rtx ();
3926                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3927                     expand_inc (t1, const1_rtx);
3928                     emit_label (lab);
3929                     quotient = t1;
3930                   }
3931                 else
3932                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3933                                                           t1, t3),
3934                                             tquotient);
3935                 break;
3936               }
3937
3938             /* Try using an instruction that produces both the quotient and
3939                remainder, using truncation.  We can easily compensate the
3940                quotient or remainder to get ceiling rounding, once we have the
3941                remainder.  Notice that we compute also the final remainder
3942                value here, and return the result right away.  */
3943             if (target == 0 || GET_MODE (target) != compute_mode)
3944               target = gen_reg_rtx (compute_mode);
3945
3946             if (rem_flag)
3947               {
3948                 remainder = (REG_P (target)
3949                              ? target : gen_reg_rtx (compute_mode));
3950                 quotient = gen_reg_rtx (compute_mode);
3951               }
3952             else
3953               {
3954                 quotient = (REG_P (target)
3955                             ? target : gen_reg_rtx (compute_mode));
3956                 remainder = gen_reg_rtx (compute_mode);
3957               }
3958
3959             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3960                                      remainder, 1))
3961               {
3962                 /* This could be computed with a branch-less sequence.
3963                    Save that for later.  */
3964                 rtx label = gen_label_rtx ();
3965                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3966                                  compute_mode, label);
3967                 expand_inc (quotient, const1_rtx);
3968                 expand_dec (remainder, op1);
3969                 emit_label (label);
3970                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3971               }
3972
3973             /* No luck with division elimination or divmod.  Have to do it
3974                by conditionally adjusting op0 *and* the result.  */
3975             {
3976               rtx label1, label2;
3977               rtx adjusted_op0, tem;
3978
3979               quotient = gen_reg_rtx (compute_mode);
3980               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3981               label1 = gen_label_rtx ();
3982               label2 = gen_label_rtx ();
3983               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3984                                compute_mode, label1);
3985               emit_move_insn  (quotient, const0_rtx);
3986               emit_jump_insn (gen_jump (label2));
3987               emit_barrier ();
3988               emit_label (label1);
3989               expand_dec (adjusted_op0, const1_rtx);
3990               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3991                                   quotient, 1, OPTAB_LIB_WIDEN);
3992               if (tem != quotient)
3993                 emit_move_insn (quotient, tem);
3994               expand_inc (quotient, const1_rtx);
3995               emit_label (label2);
3996             }
3997           }
3998         else /* signed */
3999           {
4000             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4001                 && INTVAL (op1) >= 0)
4002               {
4003                 /* This is extremely similar to the code for the unsigned case
4004                    above.  For 2.7 we should merge these variants, but for
4005                    2.6.1 I don't want to touch the code for unsigned since that
4006                    get used in C.  The signed case will only be used by other
4007                    languages (Ada).  */
4008
4009                 rtx t1, t2, t3;
4010                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4011                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4012                                    build_int_2 (floor_log2 (d), 0),
4013                                    tquotient, 0);
4014                 t2 = expand_binop (compute_mode, and_optab, op0,
4015                                    GEN_INT (d - 1),
4016                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4017                 t3 = gen_reg_rtx (compute_mode);
4018                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4019                                       compute_mode, 1, 1);
4020                 if (t3 == 0)
4021                   {
4022                     rtx lab;
4023                     lab = gen_label_rtx ();
4024                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4025                     expand_inc (t1, const1_rtx);
4026                     emit_label (lab);
4027                     quotient = t1;
4028                   }
4029                 else
4030                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4031                                                           t1, t3),
4032                                             tquotient);
4033                 break;
4034               }
4035
4036             /* Try using an instruction that produces both the quotient and
4037                remainder, using truncation.  We can easily compensate the
4038                quotient or remainder to get ceiling rounding, once we have the
4039                remainder.  Notice that we compute also the final remainder
4040                value here, and return the result right away.  */
4041             if (target == 0 || GET_MODE (target) != compute_mode)
4042               target = gen_reg_rtx (compute_mode);
4043             if (rem_flag)
4044               {
4045                 remainder= (REG_P (target)
4046                             ? target : gen_reg_rtx (compute_mode));
4047                 quotient = gen_reg_rtx (compute_mode);
4048               }
4049             else
4050               {
4051                 quotient = (REG_P (target)
4052                             ? target : gen_reg_rtx (compute_mode));
4053                 remainder = gen_reg_rtx (compute_mode);
4054               }
4055
4056             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4057                                      remainder, 0))
4058               {
4059                 /* This could be computed with a branch-less sequence.
4060                    Save that for later.  */
4061                 rtx tem;
4062                 rtx label = gen_label_rtx ();
4063                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4064                                  compute_mode, label);
4065                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4066                                     NULL_RTX, 0, OPTAB_WIDEN);
4067                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4068                 expand_inc (quotient, const1_rtx);
4069                 expand_dec (remainder, op1);
4070                 emit_label (label);
4071                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4072               }
4073
4074             /* No luck with division elimination or divmod.  Have to do it
4075                by conditionally adjusting op0 *and* the result.  */
4076             {
4077               rtx label1, label2, label3, label4, label5;
4078               rtx adjusted_op0;
4079               rtx tem;
4080
4081               quotient = gen_reg_rtx (compute_mode);
4082               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4083               label1 = gen_label_rtx ();
4084               label2 = gen_label_rtx ();
4085               label3 = gen_label_rtx ();
4086               label4 = gen_label_rtx ();
4087               label5 = gen_label_rtx ();
4088               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4089               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4090                                compute_mode, label1);
4091               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4092                                   quotient, 0, OPTAB_LIB_WIDEN);
4093               if (tem != quotient)
4094                 emit_move_insn (quotient, tem);
4095               emit_jump_insn (gen_jump (label5));
4096               emit_barrier ();
4097               emit_label (label1);
4098               expand_dec (adjusted_op0, const1_rtx);
4099               emit_jump_insn (gen_jump (label4));
4100               emit_barrier ();
4101               emit_label (label2);
4102               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4103                                compute_mode, label3);
4104               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4105                                   quotient, 0, OPTAB_LIB_WIDEN);
4106               if (tem != quotient)
4107                 emit_move_insn (quotient, tem);
4108               emit_jump_insn (gen_jump (label5));
4109               emit_barrier ();
4110               emit_label (label3);
4111               expand_inc (adjusted_op0, const1_rtx);
4112               emit_label (label4);
4113               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4114                                   quotient, 0, OPTAB_LIB_WIDEN);
4115               if (tem != quotient)
4116                 emit_move_insn (quotient, tem);
4117               expand_inc (quotient, const1_rtx);
4118               emit_label (label5);
4119             }
4120           }
4121         break;
4122
4123       case EXACT_DIV_EXPR:
4124         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4125           {
4126             HOST_WIDE_INT d = INTVAL (op1);
4127             unsigned HOST_WIDE_INT ml;
4128             int pre_shift;
4129             rtx t1;
4130
4131             pre_shift = floor_log2 (d & -d);
4132             ml = invert_mod2n (d >> pre_shift, size);
4133             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4134                                build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
4135             quotient = expand_mult (compute_mode, t1,
4136                                     gen_int_mode (ml, compute_mode),
4137                                     NULL_RTX, 1);
4138
4139             insn = get_last_insn ();
4140             set_unique_reg_note (insn,
4141                                  REG_EQUAL,
4142                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4143                                                  compute_mode,
4144                                                  op0, op1));
4145           }
4146         break;
4147
4148       case ROUND_DIV_EXPR:
4149       case ROUND_MOD_EXPR:
4150         if (unsignedp)
4151           {
4152             rtx tem;
4153             rtx label;
4154             label = gen_label_rtx ();
4155             quotient = gen_reg_rtx (compute_mode);
4156             remainder = gen_reg_rtx (compute_mode);
4157             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4158               {
4159                 rtx tem;
4160                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4161                                          quotient, 1, OPTAB_LIB_WIDEN);
4162                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4163                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4164                                           remainder, 1, OPTAB_LIB_WIDEN);
4165               }
4166             tem = plus_constant (op1, -1);
4167             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4168                                 build_int_2 (1, 0), NULL_RTX, 1);
4169             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4170             expand_inc (quotient, const1_rtx);
4171             expand_dec (remainder, op1);
4172             emit_label (label);
4173           }
4174         else
4175           {
4176             rtx abs_rem, abs_op1, tem, mask;
4177             rtx label;
4178             label = gen_label_rtx ();
4179             quotient = gen_reg_rtx (compute_mode);
4180             remainder = gen_reg_rtx (compute_mode);
4181             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4182               {
4183                 rtx tem;
4184                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4185                                          quotient, 0, OPTAB_LIB_WIDEN);
4186                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4187                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4188                                           remainder, 0, OPTAB_LIB_WIDEN);
4189               }
4190             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4191             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4192             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4193                                 build_int_2 (1, 0), NULL_RTX, 1);
4194             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4195             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4196                                 NULL_RTX, 0, OPTAB_WIDEN);
4197             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4198                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
4199             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4200                                 NULL_RTX, 0, OPTAB_WIDEN);
4201             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4202                                 NULL_RTX, 0, OPTAB_WIDEN);
4203             expand_inc (quotient, tem);
4204             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4205                                 NULL_RTX, 0, OPTAB_WIDEN);
4206             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4207                                 NULL_RTX, 0, OPTAB_WIDEN);
4208             expand_dec (remainder, tem);
4209             emit_label (label);
4210           }
4211         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4212
4213       default:
4214         abort ();
4215       }
4216
4217   if (quotient == 0)
4218     {
4219       if (target && GET_MODE (target) != compute_mode)
4220         target = 0;
4221
4222       if (rem_flag)
4223         {
4224           /* Try to produce the remainder without producing the quotient.
4225              If we seem to have a divmod pattern that does not require widening,
4226              don't try widening here.  We should really have a WIDEN argument
4227              to expand_twoval_binop, since what we'd really like to do here is
4228              1) try a mod insn in compute_mode
4229              2) try a divmod insn in compute_mode
4230              3) try a div insn in compute_mode and multiply-subtract to get
4231                 remainder
4232              4) try the same things with widening allowed.  */
4233           remainder
4234             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4235                                  op0, op1, target,
4236                                  unsignedp,
4237                                  ((optab2->handlers[compute_mode].insn_code
4238                                    != CODE_FOR_nothing)
4239                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4240           if (remainder == 0)
4241             {
4242               /* No luck there.  Can we do remainder and divide at once
4243                  without a library call?  */
4244               remainder = gen_reg_rtx (compute_mode);
4245               if (! expand_twoval_binop ((unsignedp
4246                                           ? udivmod_optab
4247                                           : sdivmod_optab),
4248                                          op0, op1,
4249                                          NULL_RTX, remainder, unsignedp))
4250                 remainder = 0;
4251             }
4252
4253           if (remainder)
4254             return gen_lowpart (mode, remainder);
4255         }
4256
4257       /* Produce the quotient.  Try a quotient insn, but not a library call.
4258          If we have a divmod in this mode, use it in preference to widening
4259          the div (for this test we assume it will not fail). Note that optab2
4260          is set to the one of the two optabs that the call below will use.  */
4261       quotient
4262         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4263                              op0, op1, rem_flag ? NULL_RTX : target,
4264                              unsignedp,
4265                              ((optab2->handlers[compute_mode].insn_code
4266                                != CODE_FOR_nothing)
4267                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4268
4269       if (quotient == 0)
4270         {
4271           /* No luck there.  Try a quotient-and-remainder insn,
4272              keeping the quotient alone.  */
4273           quotient = gen_reg_rtx (compute_mode);
4274           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4275                                      op0, op1,
4276                                      quotient, NULL_RTX, unsignedp))
4277             {
4278               quotient = 0;
4279               if (! rem_flag)
4280                 /* Still no luck.  If we are not computing the remainder,
4281                    use a library call for the quotient.  */
4282                 quotient = sign_expand_binop (compute_mode,
4283                                               udiv_optab, sdiv_optab,
4284                                               op0, op1, target,
4285                                               unsignedp, OPTAB_LIB_WIDEN);
4286             }
4287         }
4288     }
4289
4290   if (rem_flag)
4291     {
4292       if (target && GET_MODE (target) != compute_mode)
4293         target = 0;
4294
4295       if (quotient == 0)
4296         /* No divide instruction either.  Use library for remainder.  */
4297         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4298                                        op0, op1, target,
4299                                        unsignedp, OPTAB_LIB_WIDEN);
4300       else
4301         {
4302           /* We divided.  Now finish doing X - Y * (X / Y).  */
4303           remainder = expand_mult (compute_mode, quotient, op1,
4304                                    NULL_RTX, unsignedp);
4305           remainder = expand_binop (compute_mode, sub_optab, op0,
4306                                     remainder, target, unsignedp,
4307                                     OPTAB_LIB_WIDEN);
4308         }
4309     }
4310
4311   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4312 }
4313 \f
4314 /* Return a tree node with data type TYPE, describing the value of X.
4315    Usually this is an VAR_DECL, if there is no obvious better choice.
4316    X may be an expression, however we only support those expressions
4317    generated by loop.c.  */
4318
4319 tree
4320 make_tree (tree type, rtx x)
4321 {
4322   tree t;
4323
4324   switch (GET_CODE (x))
4325     {
4326     case CONST_INT:
4327       t = build_int_2 (INTVAL (x),
4328                        (TYPE_UNSIGNED (type)
4329                         && (GET_MODE_BITSIZE (TYPE_MODE (type))
4330                             < HOST_BITS_PER_WIDE_INT))
4331                        || INTVAL (x) >= 0 ? 0 : -1);
4332       TREE_TYPE (t) = type;
4333       return t;
4334
4335     case CONST_DOUBLE:
4336       if (GET_MODE (x) == VOIDmode)
4337         {
4338           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4339           TREE_TYPE (t) = type;
4340         }
4341       else
4342         {
4343           REAL_VALUE_TYPE d;
4344
4345           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4346           t = build_real (type, d);
4347         }
4348
4349       return t;
4350
4351     case CONST_VECTOR:
4352       {
4353         int i, units;
4354         rtx elt;
4355         tree t = NULL_TREE;
4356
4357         units = CONST_VECTOR_NUNITS (x);
4358
4359         /* Build a tree with vector elements.  */
4360         for (i = units - 1; i >= 0; --i)
4361           {
4362             elt = CONST_VECTOR_ELT (x, i);
4363             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4364           }
4365
4366         return build_vector (type, t);
4367       }
4368
4369     case PLUS:
4370       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4371                           make_tree (type, XEXP (x, 1))));
4372
4373     case MINUS:
4374       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4375                           make_tree (type, XEXP (x, 1))));
4376
4377     case NEG:
4378       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4379
4380     case MULT:
4381       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4382                           make_tree (type, XEXP (x, 1))));
4383
4384     case ASHIFT:
4385       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4386                           make_tree (type, XEXP (x, 1))));
4387
4388     case LSHIFTRT:
4389       t = lang_hooks.types.unsigned_type (type);
4390       return fold (convert (type,
4391                             build (RSHIFT_EXPR, t,
4392                                    make_tree (t, XEXP (x, 0)),
4393                                    make_tree (type, XEXP (x, 1)))));
4394
4395     case ASHIFTRT:
4396       t = lang_hooks.types.signed_type (type);
4397       return fold (convert (type,
4398                             build (RSHIFT_EXPR, t,
4399                                    make_tree (t, XEXP (x, 0)),
4400                                    make_tree (type, XEXP (x, 1)))));
4401
4402     case DIV:
4403       if (TREE_CODE (type) != REAL_TYPE)
4404         t = lang_hooks.types.signed_type (type);
4405       else
4406         t = type;
4407
4408       return fold (convert (type,
4409                             build (TRUNC_DIV_EXPR, t,
4410                                    make_tree (t, XEXP (x, 0)),
4411                                    make_tree (t, XEXP (x, 1)))));
4412     case UDIV:
4413       t = lang_hooks.types.unsigned_type (type);
4414       return fold (convert (type,
4415                             build (TRUNC_DIV_EXPR, t,
4416                                    make_tree (t, XEXP (x, 0)),
4417                                    make_tree (t, XEXP (x, 1)))));
4418
4419     case SIGN_EXTEND:
4420     case ZERO_EXTEND:
4421       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4422                                           GET_CODE (x) == ZERO_EXTEND);
4423       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4424
4425     default:
4426       t = build_decl (VAR_DECL, NULL_TREE, type);
4427
4428       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4429          ptr_mode.  So convert.  */
4430       if (POINTER_TYPE_P (type))
4431         x = convert_memory_address (TYPE_MODE (type), x);
4432
4433       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4434          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4435       t->decl.rtl = x;
4436
4437       return t;
4438     }
4439 }
4440
4441 /* Check whether the multiplication X * MULT + ADD overflows.
4442    X, MULT and ADD must be CONST_*.
4443    MODE is the machine mode for the computation.
4444    X and MULT must have mode MODE.  ADD may have a different mode.
4445    So can X (defaults to same as MODE).
4446    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4447
4448 bool
4449 const_mult_add_overflow_p (rtx x, rtx mult, rtx add, enum machine_mode mode, int unsignedp)
4450 {
4451   tree type, mult_type, add_type, result;
4452
4453   type = lang_hooks.types.type_for_mode (mode, unsignedp);
4454
4455   /* In order to get a proper overflow indication from an unsigned
4456      type, we have to pretend that it's a sizetype.  */
4457   mult_type = type;
4458   if (unsignedp)
4459     {
4460       mult_type = copy_node (type);
4461       TYPE_IS_SIZETYPE (mult_type) = 1;
4462     }
4463
4464   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4465               : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
4466
4467   result = fold (build (PLUS_EXPR, mult_type,
4468                         fold (build (MULT_EXPR, mult_type,
4469                                      make_tree (mult_type, x),
4470                                      make_tree (mult_type, mult))),
4471                         make_tree (add_type, add)));
4472
4473   return TREE_CONSTANT_OVERFLOW (result);
4474 }
4475
4476 /* Return an rtx representing the value of X * MULT + ADD.
4477    TARGET is a suggestion for where to store the result (an rtx).
4478    MODE is the machine mode for the computation.
4479    X and MULT must have mode MODE.  ADD may have a different mode.
4480    So can X (defaults to same as MODE).
4481    UNSIGNEDP is nonzero to do unsigned multiplication.
4482    This may emit insns.  */
4483
4484 rtx
4485 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4486                  int unsignedp)
4487 {
4488   tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
4489   tree add_type = (GET_MODE (add) == VOIDmode
4490                    ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
4491                                                            unsignedp));
4492   tree result =  fold (build (PLUS_EXPR, type,
4493                               fold (build (MULT_EXPR, type,
4494                                            make_tree (type, x),
4495                                            make_tree (type, mult))),
4496                               make_tree (add_type, add)));
4497
4498   return expand_expr (result, target, VOIDmode, 0);
4499 }
4500 \f
4501 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4502    and returning TARGET.
4503
4504    If TARGET is 0, a pseudo-register or constant is returned.  */
4505
4506 rtx
4507 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4508 {
4509   rtx tem = 0;
4510
4511   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4512     tem = simplify_binary_operation (AND, mode, op0, op1);
4513   if (tem == 0)
4514     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4515
4516   if (target == 0)
4517     target = tem;
4518   else if (tem != target)
4519     emit_move_insn (target, tem);
4520   return target;
4521 }
4522 \f
4523 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4524    and storing in TARGET.  Normally return TARGET.
4525    Return 0 if that cannot be done.
4526
4527    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4528    it is VOIDmode, they cannot both be CONST_INT.
4529
4530    UNSIGNEDP is for the case where we have to widen the operands
4531    to perform the operation.  It says to use zero-extension.
4532
4533    NORMALIZEP is 1 if we should convert the result to be either zero
4534    or one.  Normalize is -1 if we should convert the result to be
4535    either zero or -1.  If NORMALIZEP is zero, the result will be left
4536    "raw" out of the scc insn.  */
4537
4538 rtx
4539 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4540                  enum machine_mode mode, int unsignedp, int normalizep)
4541 {
4542   rtx subtarget;
4543   enum insn_code icode;
4544   enum machine_mode compare_mode;
4545   enum machine_mode target_mode = GET_MODE (target);
4546   rtx tem;
4547   rtx last = get_last_insn ();
4548   rtx pattern, comparison;
4549
4550   /* ??? Ok to do this and then fail? */
4551   op0 = protect_from_queue (op0, 0);
4552   op1 = protect_from_queue (op1, 0);
4553
4554   if (unsignedp)
4555     code = unsigned_condition (code);
4556
4557   /* If one operand is constant, make it the second one.  Only do this
4558      if the other operand is not constant as well.  */
4559
4560   if (swap_commutative_operands_p (op0, op1))
4561     {
4562       tem = op0;
4563       op0 = op1;
4564       op1 = tem;
4565       code = swap_condition (code);
4566     }
4567
4568   if (mode == VOIDmode)
4569     mode = GET_MODE (op0);
4570
4571   /* For some comparisons with 1 and -1, we can convert this to
4572      comparisons with zero.  This will often produce more opportunities for
4573      store-flag insns.  */
4574
4575   switch (code)
4576     {
4577     case LT:
4578       if (op1 == const1_rtx)
4579         op1 = const0_rtx, code = LE;
4580       break;
4581     case LE:
4582       if (op1 == constm1_rtx)
4583         op1 = const0_rtx, code = LT;
4584       break;
4585     case GE:
4586       if (op1 == const1_rtx)
4587         op1 = const0_rtx, code = GT;
4588       break;
4589     case GT:
4590       if (op1 == constm1_rtx)
4591         op1 = const0_rtx, code = GE;
4592       break;
4593     case GEU:
4594       if (op1 == const1_rtx)
4595         op1 = const0_rtx, code = NE;
4596       break;
4597     case LTU:
4598       if (op1 == const1_rtx)
4599         op1 = const0_rtx, code = EQ;
4600       break;
4601     default:
4602       break;
4603     }
4604
4605   /* If we are comparing a double-word integer with zero or -1, we can
4606      convert the comparison into one involving a single word.  */
4607   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4608       && GET_MODE_CLASS (mode) == MODE_INT
4609       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
4610     {
4611       if ((code == EQ || code == NE)
4612           && (op1 == const0_rtx || op1 == constm1_rtx))
4613         {
4614           rtx op00, op01, op0both;
4615
4616           /* Do a logical OR or AND of the two words and compare the result.  */
4617           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
4618           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
4619           op0both = expand_binop (word_mode,
4620                                   op1 == const0_rtx ? ior_optab : and_optab,
4621                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
4622
4623           if (op0both != 0)
4624             return emit_store_flag (target, code, op0both, op1, word_mode,
4625                                     unsignedp, normalizep);
4626         }
4627       else if ((code == LT || code == GE) && op1 == const0_rtx)
4628         {
4629           rtx op0h;
4630
4631           /* If testing the sign bit, can just test on high word.  */
4632           op0h = simplify_gen_subreg (word_mode, op0, mode,
4633                                       subreg_highpart_offset (word_mode, mode));
4634           return emit_store_flag (target, code, op0h, op1, word_mode,
4635                                   unsignedp, normalizep);
4636         }
4637     }
4638
4639   /* From now on, we won't change CODE, so set ICODE now.  */
4640   icode = setcc_gen_code[(int) code];
4641
4642   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4643      complement of A (for GE) and shifting the sign bit to the low bit.  */
4644   if (op1 == const0_rtx && (code == LT || code == GE)
4645       && GET_MODE_CLASS (mode) == MODE_INT
4646       && (normalizep || STORE_FLAG_VALUE == 1
4647           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4648               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4649                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4650     {
4651       subtarget = target;
4652
4653       /* If the result is to be wider than OP0, it is best to convert it
4654          first.  If it is to be narrower, it is *incorrect* to convert it
4655          first.  */
4656       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4657         {
4658           op0 = protect_from_queue (op0, 0);
4659           op0 = convert_modes (target_mode, mode, op0, 0);
4660           mode = target_mode;
4661         }
4662
4663       if (target_mode != mode)
4664         subtarget = 0;
4665
4666       if (code == GE)
4667         op0 = expand_unop (mode, one_cmpl_optab, op0,
4668                            ((STORE_FLAG_VALUE == 1 || normalizep)
4669                             ? 0 : subtarget), 0);
4670
4671       if (STORE_FLAG_VALUE == 1 || normalizep)
4672         /* If we are supposed to produce a 0/1 value, we want to do
4673            a logical shift from the sign bit to the low-order bit; for
4674            a -1/0 value, we do an arithmetic shift.  */
4675         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4676                             size_int (GET_MODE_BITSIZE (mode) - 1),
4677                             subtarget, normalizep != -1);
4678
4679       if (mode != target_mode)
4680         op0 = convert_modes (target_mode, mode, op0, 0);
4681
4682       return op0;
4683     }
4684
4685   if (icode != CODE_FOR_nothing)
4686     {
4687       insn_operand_predicate_fn pred;
4688
4689       /* We think we may be able to do this with a scc insn.  Emit the
4690          comparison and then the scc insn.
4691
4692          compare_from_rtx may call emit_queue, which would be deleted below
4693          if the scc insn fails.  So call it ourselves before setting LAST.
4694          Likewise for do_pending_stack_adjust.  */
4695
4696       emit_queue ();
4697       do_pending_stack_adjust ();
4698       last = get_last_insn ();
4699
4700       comparison
4701         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4702       if (CONSTANT_P (comparison))
4703         {
4704           if (GET_CODE (comparison) == CONST_INT)
4705             {
4706               if (comparison == const0_rtx)
4707                 return const0_rtx;
4708             }
4709 #ifdef FLOAT_STORE_FLAG_VALUE
4710           else if (GET_CODE (comparison) == CONST_DOUBLE)
4711             {
4712               if (comparison == CONST0_RTX (GET_MODE (comparison)))
4713                 return const0_rtx;
4714             }
4715 #endif
4716           else
4717             abort ();
4718           if (normalizep == 1)
4719             return const1_rtx;
4720           if (normalizep == -1)
4721             return constm1_rtx;
4722           return const_true_rtx;
4723         }
4724
4725       /* The code of COMPARISON may not match CODE if compare_from_rtx
4726          decided to swap its operands and reverse the original code.
4727
4728          We know that compare_from_rtx returns either a CONST_INT or
4729          a new comparison code, so it is safe to just extract the
4730          code from COMPARISON.  */
4731       code = GET_CODE (comparison);
4732
4733       /* Get a reference to the target in the proper mode for this insn.  */
4734       compare_mode = insn_data[(int) icode].operand[0].mode;
4735       subtarget = target;
4736       pred = insn_data[(int) icode].operand[0].predicate;
4737       if (preserve_subexpressions_p ()
4738           || ! (*pred) (subtarget, compare_mode))
4739         subtarget = gen_reg_rtx (compare_mode);
4740
4741       pattern = GEN_FCN (icode) (subtarget);
4742       if (pattern)
4743         {
4744           emit_insn (pattern);
4745
4746           /* If we are converting to a wider mode, first convert to
4747              TARGET_MODE, then normalize.  This produces better combining
4748              opportunities on machines that have a SIGN_EXTRACT when we are
4749              testing a single bit.  This mostly benefits the 68k.
4750
4751              If STORE_FLAG_VALUE does not have the sign bit set when
4752              interpreted in COMPARE_MODE, we can do this conversion as
4753              unsigned, which is usually more efficient.  */
4754           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4755             {
4756               convert_move (target, subtarget,
4757                             (GET_MODE_BITSIZE (compare_mode)
4758                              <= HOST_BITS_PER_WIDE_INT)
4759                             && 0 == (STORE_FLAG_VALUE
4760                                      & ((HOST_WIDE_INT) 1
4761                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4762               op0 = target;
4763               compare_mode = target_mode;
4764             }
4765           else
4766             op0 = subtarget;
4767
4768           /* If we want to keep subexpressions around, don't reuse our
4769              last target.  */
4770
4771           if (preserve_subexpressions_p ())
4772             subtarget = 0;
4773
4774           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4775              we don't have to do anything.  */
4776           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4777             ;
4778           /* STORE_FLAG_VALUE might be the most negative number, so write
4779              the comparison this way to avoid a compiler-time warning.  */
4780           else if (- normalizep == STORE_FLAG_VALUE)
4781             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4782
4783           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4784              makes it hard to use a value of just the sign bit due to
4785              ANSI integer constant typing rules.  */
4786           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4787                    && (STORE_FLAG_VALUE
4788                        & ((HOST_WIDE_INT) 1
4789                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4790             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4791                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4792                                 subtarget, normalizep == 1);
4793           else if (STORE_FLAG_VALUE & 1)
4794             {
4795               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4796               if (normalizep == -1)
4797                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4798             }
4799           else
4800             abort ();
4801
4802           /* If we were converting to a smaller mode, do the
4803              conversion now.  */
4804           if (target_mode != compare_mode)
4805             {
4806               convert_move (target, op0, 0);
4807               return target;
4808             }
4809           else
4810             return op0;
4811         }
4812     }
4813
4814   delete_insns_since (last);
4815
4816   /* If expensive optimizations, use different pseudo registers for each
4817      insn, instead of reusing the same pseudo.  This leads to better CSE,
4818      but slows down the compiler, since there are more pseudos */
4819   subtarget = (!flag_expensive_optimizations
4820                && (target_mode == mode)) ? target : NULL_RTX;
4821
4822   /* If we reached here, we can't do this with a scc insn.  However, there
4823      are some comparisons that can be done directly.  For example, if
4824      this is an equality comparison of integers, we can try to exclusive-or
4825      (or subtract) the two operands and use a recursive call to try the
4826      comparison with zero.  Don't do any of these cases if branches are
4827      very cheap.  */
4828
4829   if (BRANCH_COST > 0
4830       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4831       && op1 != const0_rtx)
4832     {
4833       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4834                           OPTAB_WIDEN);
4835
4836       if (tem == 0)
4837         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4838                             OPTAB_WIDEN);
4839       if (tem != 0)
4840         tem = emit_store_flag (target, code, tem, const0_rtx,
4841                                mode, unsignedp, normalizep);
4842       if (tem == 0)
4843         delete_insns_since (last);
4844       return tem;
4845     }
4846
4847   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4848      the constant zero.  Reject all other comparisons at this point.  Only
4849      do LE and GT if branches are expensive since they are expensive on
4850      2-operand machines.  */
4851
4852   if (BRANCH_COST == 0
4853       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4854       || (code != EQ && code != NE
4855           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4856     return 0;
4857
4858   /* See what we need to return.  We can only return a 1, -1, or the
4859      sign bit.  */
4860
4861   if (normalizep == 0)
4862     {
4863       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4864         normalizep = STORE_FLAG_VALUE;
4865
4866       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4867                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4868                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4869         ;
4870       else
4871         return 0;
4872     }
4873
4874   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4875      do the necessary operation below.  */
4876
4877   tem = 0;
4878
4879   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4880      the sign bit set.  */
4881
4882   if (code == LE)
4883     {
4884       /* This is destructive, so SUBTARGET can't be OP0.  */
4885       if (rtx_equal_p (subtarget, op0))
4886         subtarget = 0;
4887
4888       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4889                           OPTAB_WIDEN);
4890       if (tem)
4891         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4892                             OPTAB_WIDEN);
4893     }
4894
4895   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4896      number of bits in the mode of OP0, minus one.  */
4897
4898   if (code == GT)
4899     {
4900       if (rtx_equal_p (subtarget, op0))
4901         subtarget = 0;
4902
4903       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4904                           size_int (GET_MODE_BITSIZE (mode) - 1),
4905                           subtarget, 0);
4906       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4907                           OPTAB_WIDEN);
4908     }
4909
4910   if (code == EQ || code == NE)
4911     {
4912       /* For EQ or NE, one way to do the comparison is to apply an operation
4913          that converts the operand into a positive number if it is nonzero
4914          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4915          for NE we negate.  This puts the result in the sign bit.  Then we
4916          normalize with a shift, if needed.
4917
4918          Two operations that can do the above actions are ABS and FFS, so try
4919          them.  If that doesn't work, and MODE is smaller than a full word,
4920          we can use zero-extension to the wider mode (an unsigned conversion)
4921          as the operation.  */
4922
4923       /* Note that ABS doesn't yield a positive number for INT_MIN, but
4924          that is compensated by the subsequent overflow when subtracting
4925          one / negating.  */
4926
4927       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
4928         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4929       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
4930         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4931       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4932         {
4933           op0 = protect_from_queue (op0, 0);
4934           tem = convert_modes (word_mode, mode, op0, 1);
4935           mode = word_mode;
4936         }
4937
4938       if (tem != 0)
4939         {
4940           if (code == EQ)
4941             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4942                                 0, OPTAB_WIDEN);
4943           else
4944             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4945         }
4946
4947       /* If we couldn't do it that way, for NE we can "or" the two's complement
4948          of the value with itself.  For EQ, we take the one's complement of
4949          that "or", which is an extra insn, so we only handle EQ if branches
4950          are expensive.  */
4951
4952       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4953         {
4954           if (rtx_equal_p (subtarget, op0))
4955             subtarget = 0;
4956
4957           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4958           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4959                               OPTAB_WIDEN);
4960
4961           if (tem && code == EQ)
4962             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4963         }
4964     }
4965
4966   if (tem && normalizep)
4967     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4968                         size_int (GET_MODE_BITSIZE (mode) - 1),
4969                         subtarget, normalizep == 1);
4970
4971   if (tem)
4972     {
4973       if (GET_MODE (tem) != target_mode)
4974         {
4975           convert_move (target, tem, 0);
4976           tem = target;
4977         }
4978       else if (!subtarget)
4979         {
4980           emit_move_insn (target, tem);
4981           tem = target;
4982         }
4983     }
4984   else
4985     delete_insns_since (last);
4986
4987   return tem;
4988 }
4989
4990 /* Like emit_store_flag, but always succeeds.  */
4991
4992 rtx
4993 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
4994                        enum machine_mode mode, int unsignedp, int normalizep)
4995 {
4996   rtx tem, label;
4997
4998   /* First see if emit_store_flag can do the job.  */
4999   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5000   if (tem != 0)
5001     return tem;
5002
5003   if (normalizep == 0)
5004     normalizep = 1;
5005
5006   /* If this failed, we have to do this with set/compare/jump/set code.  */
5007
5008   if (!REG_P (target)
5009       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5010     target = gen_reg_rtx (GET_MODE (target));
5011
5012   emit_move_insn (target, const1_rtx);
5013   label = gen_label_rtx ();
5014   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5015                            NULL_RTX, label);
5016
5017   emit_move_insn (target, const0_rtx);
5018   emit_label (label);
5019
5020   return target;
5021 }
5022 \f
5023 /* Perform possibly multi-word comparison and conditional jump to LABEL
5024    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5025
5026    The algorithm is based on the code in expr.c:do_jump.
5027
5028    Note that this does not perform a general comparison.  Only variants
5029    generated within expmed.c are correctly handled, others abort (but could
5030    be handled if needed).  */
5031
5032 static void
5033 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5034                  rtx label)
5035 {
5036   /* If this mode is an integer too wide to compare properly,
5037      compare word by word.  Rely on cse to optimize constant cases.  */
5038
5039   if (GET_MODE_CLASS (mode) == MODE_INT
5040       && ! can_compare_p (op, mode, ccp_jump))
5041     {
5042       rtx label2 = gen_label_rtx ();
5043
5044       switch (op)
5045         {
5046         case LTU:
5047           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5048           break;
5049
5050         case LEU:
5051           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5052           break;
5053
5054         case LT:
5055           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5056           break;
5057
5058         case GT:
5059           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5060           break;
5061
5062         case GE:
5063           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5064           break;
5065
5066           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5067              that's the only equality operations we do */
5068         case EQ:
5069           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5070             abort ();
5071           do_jump_by_parts_equality_rtx (arg1, label2, label);
5072           break;
5073
5074         case NE:
5075           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5076             abort ();
5077           do_jump_by_parts_equality_rtx (arg1, label, label2);
5078           break;
5079
5080         default:
5081           abort ();
5082         }
5083
5084       emit_label (label2);
5085     }
5086   else
5087     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5088 }