gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 88, 89, 92-97, 1998 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "rtl.h"
  26 #include "tree.h"
  27 #include "flags.h"
  28 #include "insn-flags.h"
  29 #include "insn-codes.h"
  30 #include "insn-config.h"
  31 #include "expr.h"
  32 #include "real.h"
  33 #include "recog.h"
  34
  35 static void store_fixed_bit_field       PROTO((rtx, int, int, int, rtx, int));
  36 static void store_split_bit_field       PROTO((rtx, int, int, rtx, int));
  37 static rtx extract_fixed_bit_field      PROTO((enum machine_mode, rtx, int,
  38                                                int, int, rtx, int, int));
  39 static rtx mask_rtx                     PROTO((enum machine_mode, int,
  40                                                int, int));
  41 static rtx lshift_value                 PROTO((enum machine_mode, rtx,
  42                                                int, int));
  43 static rtx extract_split_bit_field      PROTO((rtx, int, int, int, int));
  44 static void do_cmp_and_jump             PROTO((rtx, rtx, enum rtx_code,
  45                                                enum machine_mode, rtx));
  46
  47 #define CEIL(x,y) (((x) + (y) - 1) / (y))
  48
  49 /* Non-zero means divides or modulus operations are relatively cheap for
  50    powers of two, so don't use branches; emit the operation instead.
  51    Usually, this will mean that the MD file will emit non-branch
  52    sequences.  */
  53
  54 static int sdiv_pow2_cheap, smod_pow2_cheap;
  55
  56 #ifndef SLOW_UNALIGNED_ACCESS
  57 #define SLOW_UNALIGNED_ACCESS STRICT_ALIGNMENT
  58 #endif
  59
  60 /* For compilers that support multiple targets with different word sizes,
  61    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  62    is the H8/300(H) compiler.  */
  63
  64 #ifndef MAX_BITS_PER_WORD
  65 #define MAX_BITS_PER_WORD BITS_PER_WORD
  66 #endif
  67
  68 /* Cost of various pieces of RTL.  Note that some of these are indexed by shift count,
  69    and some by mode.  */
  70 static int add_cost, negate_cost, zero_cost;
  71 static int shift_cost[MAX_BITS_PER_WORD];
  72 static int shiftadd_cost[MAX_BITS_PER_WORD];
  73 static int shiftsub_cost[MAX_BITS_PER_WORD];
  74 static int mul_cost[NUM_MACHINE_MODES];
  75 static int div_cost[NUM_MACHINE_MODES];
  76 static int mul_widen_cost[NUM_MACHINE_MODES];
  77 static int mul_highpart_cost[NUM_MACHINE_MODES];
  78
  79 void
  80 init_expmed ()
  81 {
  82   char *free_point;
  83   /* This is "some random pseudo register" for purposes of calling recog
  84      to see what insns exist.  */
  85   rtx reg = gen_rtx_REG (word_mode, 10000);
  86   rtx shift_insn, shiftadd_insn, shiftsub_insn;
  87   int dummy;
  88   int m;
  89   enum machine_mode mode, wider_mode;
  90
  91   start_sequence ();
  92
  93   /* Since we are on the permanent obstack, we must be sure we save this
  94      spot AFTER we call start_sequence, since it will reuse the rtl it
  95      makes.  */
  96   free_point = (char *) oballoc (0);
  97
  98   reg = gen_rtx (REG, word_mode, 10000);
  99
 100   zero_cost = rtx_cost (const0_rtx, 0);
 101   add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
 102
 103   shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
 104                                        gen_rtx_ASHIFT (word_mode, reg,
 105                                                        const0_rtx)));
 106
 107   shiftadd_insn
 108     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 109                               gen_rtx_PLUS (word_mode,
 110                                             gen_rtx_MULT (word_mode,
 111                                                           reg, const0_rtx),
 112                                             reg)));
 113
 114   shiftsub_insn
 115     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 116                               gen_rtx_MINUS (word_mode,
 117                                              gen_rtx_MULT (word_mode,
 118                                                            reg, const0_rtx),
 119                                              reg)));
 120
 121   init_recog ();
 122
 123   shift_cost[0] = 0;
 124   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 125
 126   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 127     {
 128       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 129
 130       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 131       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 132         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 133
 134       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1)
 135         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 136       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 137         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 138
 139       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1)
 140         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 141       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 142         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 143     }
 144
 145   negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
 146
 147   sdiv_pow2_cheap
 148     = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
 149        <= 2 * add_cost);
 150   smod_pow2_cheap
 151     = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
 152        <= 2 * add_cost);
 153
 154   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 155        mode != VOIDmode;
 156        mode = GET_MODE_WIDER_MODE (mode))
 157     {
 158       reg = gen_rtx_REG (mode, 10000);
 159       div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
 160       mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
 161       wider_mode = GET_MODE_WIDER_MODE (mode);
 162       if (wider_mode != VOIDmode)
 163         {
 164           mul_widen_cost[(int) wider_mode]
 165             = rtx_cost (gen_rtx_MULT (wider_mode,
 166                                       gen_rtx_ZERO_EXTEND (wider_mode, reg),
 167                                       gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 168                         SET);
 169           mul_highpart_cost[(int) mode]
 170             = rtx_cost (gen_rtx_TRUNCATE
 171                         (mode,
 172                          gen_rtx_LSHIFTRT
 173                          (wider_mode,
 174                           gen_rtx_MULT (wider_mode,
 175                                         gen_rtx_ZERO_EXTEND (wider_mode, reg),
 176                                         gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 177                           GEN_INT (GET_MODE_BITSIZE (mode)))),
 178                         SET);
 179         }
 180     }
 181
 182   /* Free the objects we just allocated.  */
 183   end_sequence ();
 184   obfree (free_point);
 185 }
 186
 187 /* Return an rtx representing minus the value of X.
 188    MODE is the intended mode of the result,
 189    useful if X is a CONST_INT.  */
 190
 191 rtx
 192 negate_rtx (mode, x)
 193      enum machine_mode mode;
 194      rtx x;
 195 {
 196   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 197
 198   if (result == 0)
 199     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 200
 201   return result;
 202 }
 203 \f
 204 /* Generate code to store value from rtx VALUE
 205    into a bit-field within structure STR_RTX
 206    containing BITSIZE bits starting at bit BITNUM.
 207    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 208    ALIGN is the alignment that STR_RTX is known to have, measured in bytes.
 209    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 210
 211 /* ??? Note that there are two different ideas here for how
 212    to determine the size to count bits within, for a register.
 213    One is BITS_PER_WORD, and the other is the size of operand 3
 214    of the insv pattern.
 215
 216    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 217    else, we use the mode of operand 3.  */
 218
 219 rtx
 220 store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
 221      rtx str_rtx;
 222      register int bitsize;
 223      int bitnum;
 224      enum machine_mode fieldmode;
 225      rtx value;
 226      int align;
 227      int total_size;
 228 {
 229   int unit = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 230   register int offset = bitnum / unit;
 231   register int bitpos = bitnum % unit;
 232   register rtx op0 = str_rtx;
 233 #ifdef HAVE_insv
 234   int insv_bitsize;
 235
 236   if (insn_operand_mode[(int) CODE_FOR_insv][3] == VOIDmode)
 237     insv_bitsize = GET_MODE_BITSIZE (word_mode);
 238   else
 239     insv_bitsize = GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_insv][3]);
 240 #endif
 241
 242   if (GET_CODE (str_rtx) == MEM && ! MEM_IN_STRUCT_P (str_rtx))
 243     abort ();
 244
 245   /* Discount the part of the structure before the desired byte.
 246      We need to know how many bytes are safe to reference after it.  */
 247   if (total_size >= 0)
 248     total_size -= (bitpos / BIGGEST_ALIGNMENT
 249                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 250
 251   while (GET_CODE (op0) == SUBREG)
 252     {
 253       /* The following line once was done only if WORDS_BIG_ENDIAN,
 254          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 255          meaningful at a much higher level; when structures are copied
 256          between memory and regs, the higher-numbered regs
 257          always get higher addresses.  */
 258       offset += SUBREG_WORD (op0);
 259       /* We used to adjust BITPOS here, but now we do the whole adjustment
 260          right after the loop.  */
 261       op0 = SUBREG_REG (op0);
 262     }
 263
 264   /* Make sure we are playing with integral modes.  Pun with subregs
 265      if we aren't.  */
 266   {
 267     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 268     if (imode != GET_MODE (op0))
 269       {
 270         if (GET_CODE (op0) == MEM)
 271           op0 = change_address (op0, imode, NULL_RTX);
 272         else if (imode != BLKmode)
 273           op0 = gen_lowpart (imode, op0);
 274         else
 275           abort ();
 276       }
 277   }
 278
 279   /* If OP0 is a register, BITPOS must count within a word.
 280      But as we have it, it counts within whatever size OP0 now has.
 281      On a bigendian machine, these are not the same, so convert.  */
 282   if (BYTES_BIG_ENDIAN
 283       && GET_CODE (op0) != MEM
 284       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 285     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 286
 287   value = protect_from_queue (value, 0);
 288
 289   if (flag_force_mem)
 290     value = force_not_mem (value);
 291
 292   /* Note that the adjustment of BITPOS above has no effect on whether
 293      BITPOS is 0 in a REG bigger than a word.  */
 294   if (GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 295       && (GET_CODE (op0) != MEM
 296           || ! SLOW_UNALIGNED_ACCESS
 297           || (offset * BITS_PER_UNIT % bitsize == 0
 298               && align % GET_MODE_SIZE (fieldmode) == 0))
 299       && bitpos == 0 && bitsize == GET_MODE_BITSIZE (fieldmode))
 300     {
 301       /* Storing in a full-word or multi-word field in a register
 302          can be done with just SUBREG.  */
 303       if (GET_MODE (op0) != fieldmode)
 304         {
 305           if (GET_CODE (op0) == SUBREG)
 306             {
 307               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 308                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 309                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 310                 op0 = SUBREG_REG (op0);
 311               else
 312                 /* Else we've got some float mode source being extracted into
 313                    a different float mode destination -- this combination of
 314                    subregs results in Severe Tire Damage.  */
 315                 abort ();
 316             }
 317           if (GET_CODE (op0) == REG)
 318             op0 = gen_rtx_SUBREG (fieldmode, op0, offset);
 319           else
 320             op0 = change_address (op0, fieldmode,
 321                                   plus_constant (XEXP (op0, 0), offset));
 322         }
 323       emit_move_insn (op0, value);
 324       return value;
 325     }
 326
 327   /* Storing an lsb-aligned field in a register
 328      can be done with a movestrict instruction.  */
 329
 330   if (GET_CODE (op0) != MEM
 331       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 332       && bitsize == GET_MODE_BITSIZE (fieldmode)
 333       && (GET_MODE (op0) == fieldmode
 334           || (movstrict_optab->handlers[(int) fieldmode].insn_code
 335               != CODE_FOR_nothing)))
 336     {
 337       /* Get appropriate low part of the value being stored.  */
 338       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 339         value = gen_lowpart (fieldmode, value);
 340       else if (!(GET_CODE (value) == SYMBOL_REF
 341                  || GET_CODE (value) == LABEL_REF
 342                  || GET_CODE (value) == CONST))
 343         value = convert_to_mode (fieldmode, value, 0);
 344
 345       if (GET_MODE (op0) == fieldmode)
 346         emit_move_insn (op0, value);
 347       else
 348         {
 349           int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 350           if(! (*insn_operand_predicate[icode][1]) (value, fieldmode))
 351             value = copy_to_mode_reg (fieldmode, value);
 352           emit_insn (GEN_FCN (icode)
 353                    (gen_rtx_SUBREG (fieldmode, op0, offset), value));
 354         }
 355       return value;
 356     }
 357
 358   /* Handle fields bigger than a word.  */
 359
 360   if (bitsize > BITS_PER_WORD)
 361     {
 362       /* Here we transfer the words of the field
 363          in the order least significant first.
 364          This is because the most significant word is the one which may
 365          be less than full.
 366          However, only do that if the value is not BLKmode.  */
 367
 368       int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 369
 370       int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 371       int i;
 372
 373       /* This is the mode we must force value to, so that there will be enough
 374          subwords to extract.  Note that fieldmode will often (always?) be
 375          VOIDmode, because that is what store_field uses to indicate that this
 376          is a bit field, but passing VOIDmode to operand_subword_force will
 377          result in an abort.  */
 378       fieldmode = mode_for_size (nwords * BITS_PER_WORD, MODE_INT, 0);
 379
 380       for (i = 0; i < nwords; i++)
 381         {
 382           /* If I is 0, use the low-order word in both field and target;
 383              if I is 1, use the next to lowest word; and so on.  */
 384           int wordnum = (backwards ? nwords - i - 1 : i);
 385           int bit_offset = (backwards
 386                             ? MAX (bitsize - (i + 1) * BITS_PER_WORD, 0)
 387                             : i * BITS_PER_WORD);
 388           store_bit_field (op0, MIN (BITS_PER_WORD,
 389                                      bitsize - i * BITS_PER_WORD),
 390                            bitnum + bit_offset, word_mode,
 391                            operand_subword_force (value, wordnum,
 392                                                   (GET_MODE (value) == VOIDmode
 393                                                    ? fieldmode
 394                                                    : GET_MODE (value))),
 395                            align, total_size);
 396         }
 397       return value;
 398     }
 399
 400   /* From here on we can assume that the field to be stored in is
 401      a full-word (whatever type that is), since it is shorter than a word.  */
 402
 403   /* OFFSET is the number of words or bytes (UNIT says which)
 404      from STR_RTX to the first word or byte containing part of the field.  */
 405
 406   if (GET_CODE (op0) == REG)
 407     {
 408       if (offset != 0
 409           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 410         op0 = gen_rtx_SUBREG (TYPE_MODE (type_for_size (BITS_PER_WORD, 0)),
 411                        op0, offset);
 412       offset = 0;
 413     }
 414   else
 415     {
 416       op0 = protect_from_queue (op0, 1);
 417     }
 418
 419   /* If VALUE is a floating-point mode, access it as an integer of the
 420      corresponding size.  This can occur on a machine with 64 bit registers
 421      that uses SFmode for float.  This can also occur for unaligned float
 422      structure fields.  */
 423   if (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT)
 424     {
 425       if (GET_CODE (value) != REG)
 426         value = copy_to_reg (value);
 427       value = gen_rtx_SUBREG (word_mode, value, 0);
 428     }
 429
 430   /* Now OFFSET is nonzero only if OP0 is memory
 431      and is therefore always measured in bytes.  */
 432
 433 #ifdef HAVE_insv
 434   if (HAVE_insv
 435       && GET_MODE (value) != BLKmode
 436       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 437       /* Ensure insv's size is wide enough for this field.  */
 438       && (insv_bitsize >= bitsize)
 439       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 440             && (bitsize + bitpos > insv_bitsize)))
 441     {
 442       int xbitpos = bitpos;
 443       rtx value1;
 444       rtx xop0 = op0;
 445       rtx last = get_last_insn ();
 446       rtx pat;
 447       enum machine_mode maxmode;
 448       int save_volatile_ok = volatile_ok;
 449
 450       maxmode = insn_operand_mode[(int) CODE_FOR_insv][3];
 451       if (maxmode == VOIDmode)
 452         maxmode = word_mode;
 453
 454       volatile_ok = 1;
 455
 456       /* If this machine's insv can only insert into a register, copy OP0
 457          into a register and save it back later.  */
 458       /* This used to check flag_force_mem, but that was a serious
 459          de-optimization now that flag_force_mem is enabled by -O2.  */
 460       if (GET_CODE (op0) == MEM
 461           && ! ((*insn_operand_predicate[(int) CODE_FOR_insv][0])
 462                 (op0, VOIDmode)))
 463         {
 464           rtx tempreg;
 465           enum machine_mode bestmode;
 466
 467           /* Get the mode to use for inserting into this field.  If OP0 is
 468              BLKmode, get the smallest mode consistent with the alignment. If
 469              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 470              mode. Otherwise, use the smallest mode containing the field.  */
 471
 472           if (GET_MODE (op0) == BLKmode
 473               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 474             bestmode
 475               = get_best_mode (bitsize, bitnum, align * BITS_PER_UNIT, maxmode,
 476                                MEM_VOLATILE_P (op0));
 477           else
 478             bestmode = GET_MODE (op0);
 479
 480           if (bestmode == VOIDmode
 481               || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
 482             goto insv_loses;
 483
 484           /* Adjust address to point to the containing unit of that mode.  */
 485           unit = GET_MODE_BITSIZE (bestmode);
 486           /* Compute offset as multiple of this unit, counting in bytes.  */
 487           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 488           bitpos = bitnum % unit;
 489           op0 = change_address (op0, bestmode,
 490                                 plus_constant (XEXP (op0, 0), offset));
 491
 492           /* Fetch that unit, store the bitfield in it, then store the unit.  */
 493           tempreg = copy_to_reg (op0);
 494           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 495                            align, total_size);
 496           emit_move_insn (op0, tempreg);
 497           return value;
 498         }
 499       volatile_ok = save_volatile_ok;
 500
 501       /* Add OFFSET into OP0's address.  */
 502       if (GET_CODE (xop0) == MEM)
 503         xop0 = change_address (xop0, byte_mode,
 504                                plus_constant (XEXP (xop0, 0), offset));
 505
 506       /* If xop0 is a register, we need it in MAXMODE
 507          to make it acceptable to the format of insv.  */
 508       if (GET_CODE (xop0) == SUBREG)
 509         /* We can't just change the mode, because this might clobber op0,
 510            and we will need the original value of op0 if insv fails.  */
 511         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_WORD (xop0));
 512       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 513         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 514
 515       /* On big-endian machines, we count bits from the most significant.
 516          If the bit field insn does not, we must invert.  */
 517
 518       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 519         xbitpos = unit - bitsize - xbitpos;
 520
 521       /* We have been counting XBITPOS within UNIT.
 522          Count instead within the size of the register.  */
 523       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 524         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 525
 526       unit = GET_MODE_BITSIZE (maxmode);
 527
 528       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 529       value1 = value;
 530       if (GET_MODE (value) != maxmode)
 531         {
 532           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 533             {
 534               /* Optimization: Don't bother really extending VALUE
 535                  if it has all the bits we will actually use.  However,
 536                  if we must narrow it, be sure we do it correctly.  */
 537
 538               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 539                 {
 540                   /* Avoid making subreg of a subreg, or of a mem.  */
 541                   if (GET_CODE (value1) != REG)
 542                 value1 = copy_to_reg (value1);
 543                   value1 = gen_rtx_SUBREG (maxmode, value1, 0);
 544                 }
 545               else
 546                 value1 = gen_lowpart (maxmode, value1);
 547             }
 548           else if (!CONSTANT_P (value))
 549             /* Parse phase is supposed to make VALUE's data type
 550                match that of the component reference, which is a type
 551                at least as wide as the field; so VALUE should have
 552                a mode that corresponds to that type.  */
 553             abort ();
 554         }
 555
 556       /* If this machine's insv insists on a register,
 557          get VALUE1 into a register.  */
 558       if (! ((*insn_operand_predicate[(int) CODE_FOR_insv][3])
 559              (value1, maxmode)))
 560         value1 = force_reg (maxmode, value1);
 561
 562       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 563       if (pat)
 564         emit_insn (pat);
 565       else
 566         {
 567           delete_insns_since (last);
 568           store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 569         }
 570     }
 571   else
 572     insv_loses:
 573 #endif
 574     /* Insv is not available; store using shifts and boolean ops.  */
 575     store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 576   return value;
 577 }
 578 \f
 579 /* Use shifts and boolean operations to store VALUE
 580    into a bit field of width BITSIZE
 581    in a memory location specified by OP0 except offset by OFFSET bytes.
 582      (OFFSET must be 0 if OP0 is a register.)
 583    The field starts at position BITPOS within the byte.
 584     (If OP0 is a register, it may be a full word or a narrower mode,
 585      but BITPOS still counts within a full word,
 586      which is significant on bigendian machines.)
 587    STRUCT_ALIGN is the alignment the structure is known to have (in bytes).
 588
 589    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 590
 591 static void
 592 store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align)
 593      register rtx op0;
 594      register int offset, bitsize, bitpos;
 595      register rtx value;
 596      int struct_align;
 597 {
 598   register enum machine_mode mode;
 599   int total_bits = BITS_PER_WORD;
 600   rtx subtarget, temp;
 601   int all_zero = 0;
 602   int all_one = 0;
 603
 604   if (! SLOW_UNALIGNED_ACCESS)
 605     struct_align = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
 606
 607   /* There is a case not handled here:
 608      a structure with a known alignment of just a halfword
 609      and a field split across two aligned halfwords within the structure.
 610      Or likewise a structure with a known alignment of just a byte
 611      and a field split across two bytes.
 612      Such cases are not supposed to be able to occur.  */
 613
 614   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 615     {
 616       if (offset != 0)
 617         abort ();
 618       /* Special treatment for a bit field split across two registers.  */
 619       if (bitsize + bitpos > BITS_PER_WORD)
 620         {
 621           store_split_bit_field (op0, bitsize, bitpos,
 622                                  value, BITS_PER_WORD);
 623           return;
 624         }
 625     }
 626   else
 627     {
 628       /* Get the proper mode to use for this field.  We want a mode that
 629          includes the entire field.  If such a mode would be larger than
 630          a word, we won't be doing the extraction the normal way.  */
 631
 632       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 633                             struct_align * BITS_PER_UNIT, word_mode,
 634                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
 635
 636       if (mode == VOIDmode)
 637         {
 638           /* The only way this should occur is if the field spans word
 639              boundaries.  */
 640           store_split_bit_field (op0,
 641                                  bitsize, bitpos + offset * BITS_PER_UNIT,
 642                                  value, struct_align);
 643           return;
 644         }
 645
 646       total_bits = GET_MODE_BITSIZE (mode);
 647
 648       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 649          be in the range 0 to total_bits-1, and put any excess bytes in
 650          OFFSET.  */
 651       if (bitpos >= total_bits)
 652         {
 653           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 654           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 655                      * BITS_PER_UNIT);
 656         }
 657
 658       /* Get ref to an aligned byte, halfword, or word containing the field.
 659          Adjust BITPOS to be position within a word,
 660          and OFFSET to be the offset of that word.
 661          Then alter OP0 to refer to that word.  */
 662       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 663       offset -= (offset % (total_bits / BITS_PER_UNIT));
 664       op0 = change_address (op0, mode,
 665                             plus_constant (XEXP (op0, 0), offset));
 666     }
 667
 668   mode = GET_MODE (op0);
 669
 670   /* Now MODE is either some integral mode for a MEM as OP0,
 671      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 672      The bit field is contained entirely within OP0.
 673      BITPOS is the starting bit number within OP0.
 674      (OP0's mode may actually be narrower than MODE.)  */
 675
 676   if (BYTES_BIG_ENDIAN)
 677       /* BITPOS is the distance between our msb
 678          and that of the containing datum.
 679          Convert it to the distance from the lsb.  */
 680       bitpos = total_bits - bitsize - bitpos;
 681
 682   /* Now BITPOS is always the distance between our lsb
 683      and that of OP0.  */
 684
 685   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 686      we must first convert its mode to MODE.  */
 687
 688   if (GET_CODE (value) == CONST_INT)
 689     {
 690       register HOST_WIDE_INT v = INTVAL (value);
 691
 692       if (bitsize < HOST_BITS_PER_WIDE_INT)
 693         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 694
 695       if (v == 0)
 696         all_zero = 1;
 697       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 698                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 699                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 700         all_one = 1;
 701
 702       value = lshift_value (mode, value, bitpos, bitsize);
 703     }
 704   else
 705     {
 706       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 707                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 708
 709       if (GET_MODE (value) != mode)
 710         {
 711           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 712               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 713             value = gen_lowpart (mode, value);
 714           else
 715             value = convert_to_mode (mode, value, 1);
 716         }
 717
 718       if (must_and)
 719         value = expand_binop (mode, and_optab, value,
 720                               mask_rtx (mode, 0, bitsize, 0),
 721                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 722       if (bitpos > 0)
 723         value = expand_shift (LSHIFT_EXPR, mode, value,
 724                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 725     }
 726
 727   /* Now clear the chosen bits in OP0,
 728      except that if VALUE is -1 we need not bother.  */
 729
 730   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 731
 732   if (! all_one)
 733     {
 734       temp = expand_binop (mode, and_optab, op0,
 735                            mask_rtx (mode, bitpos, bitsize, 1),
 736                            subtarget, 1, OPTAB_LIB_WIDEN);
 737       subtarget = temp;
 738     }
 739   else
 740     temp = op0;
 741
 742   /* Now logical-or VALUE into OP0, unless it is zero.  */
 743
 744   if (! all_zero)
 745     temp = expand_binop (mode, ior_optab, temp, value,
 746                          subtarget, 1, OPTAB_LIB_WIDEN);
 747   if (op0 != temp)
 748     emit_move_insn (op0, temp);
 749 }
 750 \f
 751 /* Store a bit field that is split across multiple accessible memory objects.
 752
 753    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 754    BITSIZE is the field width; BITPOS the position of its first bit
 755    (within the word).
 756    VALUE is the value to store.
 757    ALIGN is the known alignment of OP0, measured in bytes.
 758    This is also the size of the memory objects to be used.
 759
 760    This does not yet handle fields wider than BITS_PER_WORD.  */
 761
 762 static void
 763 store_split_bit_field (op0, bitsize, bitpos, value, align)
 764      rtx op0;
 765      int bitsize, bitpos;
 766      rtx value;
 767      int align;
 768 {
 769   int unit;
 770   int bitsdone = 0;
 771
 772   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 773      much at a time.  */
 774   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 775     unit = BITS_PER_WORD;
 776   else
 777     unit = MIN (align * BITS_PER_UNIT, BITS_PER_WORD);
 778
 779   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 780      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 781      that VALUE might be a floating-point constant.  */
 782   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 783     {
 784       rtx word = gen_lowpart_common (word_mode, value);
 785
 786       if (word && (value != word))
 787         value = word;
 788       else
 789         value = gen_lowpart_common (word_mode,
 790                                     force_reg (GET_MODE (value) != VOIDmode
 791                                                ? GET_MODE (value)
 792                                                : word_mode, value));
 793     }
 794   else if (GET_CODE (value) == ADDRESSOF)
 795     value = copy_to_reg (value);
 796
 797   while (bitsdone < bitsize)
 798     {
 799       int thissize;
 800       rtx part, word;
 801       int thispos;
 802       int offset;
 803
 804       offset = (bitpos + bitsdone) / unit;
 805       thispos = (bitpos + bitsdone) % unit;
 806
 807       /* THISSIZE must not overrun a word boundary.  Otherwise,
 808          store_fixed_bit_field will call us again, and we will mutually
 809          recurse forever.  */
 810       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 811       thissize = MIN (thissize, unit - thispos);
 812
 813       if (BYTES_BIG_ENDIAN)
 814         {
 815           int total_bits;
 816
 817           /* We must do an endian conversion exactly the same way as it is
 818              done in extract_bit_field, so that the two calls to
 819              extract_fixed_bit_field will have comparable arguments.  */
 820           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 821             total_bits = BITS_PER_WORD;
 822           else
 823             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 824
 825           /* Fetch successively less significant portions.  */
 826           if (GET_CODE (value) == CONST_INT)
 827             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 828                              >> (bitsize - bitsdone - thissize))
 829                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 830           else
 831             /* The args are chosen so that the last part includes the
 832                lsb.  Give extract_bit_field the value it needs (with
 833                endianness compensation) to fetch the piece we want.
 834
 835                ??? We have no idea what the alignment of VALUE is, so
 836                we have to use a guess.  */
 837             part
 838               = extract_fixed_bit_field
 839                 (word_mode, value, 0, thissize,
 840                  total_bits - bitsize + bitsdone, NULL_RTX, 1,
 841                  GET_MODE (value) == VOIDmode
 842                  ? UNITS_PER_WORD
 843                  : (GET_MODE (value) == BLKmode
 844                     ? 1
 845                     : GET_MODE_ALIGNMENT (GET_MODE (value)) / BITS_PER_UNIT));
 846         }
 847       else
 848         {
 849           /* Fetch successively more significant portions.  */
 850           if (GET_CODE (value) == CONST_INT)
 851             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 852                              >> bitsdone)
 853                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 854           else
 855             part
 856               = extract_fixed_bit_field
 857                 (word_mode, value, 0, thissize, bitsdone, NULL_RTX, 1,
 858                  GET_MODE (value) == VOIDmode
 859                  ? UNITS_PER_WORD
 860                  : (GET_MODE (value) == BLKmode
 861                     ? 1
 862                     : GET_MODE_ALIGNMENT (GET_MODE (value)) / BITS_PER_UNIT));
 863         }
 864
 865       /* If OP0 is a register, then handle OFFSET here.
 866
 867          When handling multiword bitfields, extract_bit_field may pass
 868          down a word_mode SUBREG of a larger REG for a bitfield that actually
 869          crosses a word boundary.  Thus, for a SUBREG, we must find
 870          the current word starting from the base register.  */
 871       if (GET_CODE (op0) == SUBREG)
 872         {
 873           word = operand_subword_force (SUBREG_REG (op0),
 874                                         SUBREG_WORD (op0) + offset,
 875                                         GET_MODE (SUBREG_REG (op0)));
 876           offset = 0;
 877         }
 878       else if (GET_CODE (op0) == REG)
 879         {
 880           word = operand_subword_force (op0, offset, GET_MODE (op0));
 881           offset = 0;
 882         }
 883       else
 884         word = op0;
 885
 886       /* OFFSET is in UNITs, and UNIT is in bits.
 887          store_fixed_bit_field wants offset in bytes.  */
 888       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT,
 889                              thissize, thispos, part, align);
 890       bitsdone += thissize;
 891     }
 892 }
 893 \f
 894 /* Generate code to extract a byte-field from STR_RTX
 895    containing BITSIZE bits, starting at BITNUM,
 896    and put it in TARGET if possible (if TARGET is nonzero).
 897    Regardless of TARGET, we return the rtx for where the value is placed.
 898    It may be a QUEUED.
 899
 900    STR_RTX is the structure containing the byte (a REG or MEM).
 901    UNSIGNEDP is nonzero if this is an unsigned bit field.
 902    MODE is the natural mode of the field value once extracted.
 903    TMODE is the mode the caller would like the value to have;
 904    but the value may be returned with type MODE instead.
 905
 906    ALIGN is the alignment that STR_RTX is known to have, measured in bytes.
 907    TOTAL_SIZE is the size in bytes of the containing structure,
 908    or -1 if varying.
 909
 910    If a TARGET is specified and we can store in it at no extra cost,
 911    we do so, and return TARGET.
 912    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
 913    if they are equally easy.  */
 914
 915 rtx
 916 extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 917                    target, mode, tmode, align, total_size)
 918      rtx str_rtx;
 919      register int bitsize;
 920      int bitnum;
 921      int unsignedp;
 922      rtx target;
 923      enum machine_mode mode, tmode;
 924      int align;
 925      int total_size;
 926 {
 927   int unit = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 928   register int offset = bitnum / unit;
 929   register int bitpos = bitnum % unit;
 930   register rtx op0 = str_rtx;
 931   rtx spec_target = target;
 932   rtx spec_target_subreg = 0;
 933 #ifdef HAVE_extv
 934   int extv_bitsize;
 935 #endif
 936 #ifdef HAVE_extzv
 937   int extzv_bitsize;
 938 #endif
 939
 940 #ifdef HAVE_extv
 941   if (insn_operand_mode[(int) CODE_FOR_extv][0] == VOIDmode)
 942     extv_bitsize = GET_MODE_BITSIZE (word_mode);
 943   else
 944     extv_bitsize = GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extv][0]);
 945 #endif
 946
 947 #ifdef HAVE_extzv
 948   if (insn_operand_mode[(int) CODE_FOR_extzv][0] == VOIDmode)
 949     extzv_bitsize = GET_MODE_BITSIZE (word_mode);
 950   else
 951     extzv_bitsize
 952       = GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extzv][0]);
 953 #endif
 954
 955   /* Discount the part of the structure before the desired byte.
 956      We need to know how many bytes are safe to reference after it.  */
 957   if (total_size >= 0)
 958     total_size -= (bitpos / BIGGEST_ALIGNMENT
 959                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 960
 961   if (tmode == VOIDmode)
 962     tmode = mode;
 963   while (GET_CODE (op0) == SUBREG)
 964     {
 965       int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
 966       int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
 967
 968       offset += SUBREG_WORD (op0);
 969
 970       inner_size = MIN (inner_size, BITS_PER_WORD);
 971
 972       if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
 973         {
 974           bitpos += inner_size - outer_size;
 975           if (bitpos > unit)
 976             {
 977               offset += (bitpos / unit);
 978               bitpos %= unit;
 979             }
 980         }
 981
 982       op0 = SUBREG_REG (op0);
 983     }
 984
 985   /* Make sure we are playing with integral modes.  Pun with subregs
 986      if we aren't.  */
 987   {
 988     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 989     if (imode != GET_MODE (op0))
 990       {
 991         if (GET_CODE (op0) == MEM)
 992           op0 = change_address (op0, imode, NULL_RTX);
 993         else if (imode != BLKmode)
 994           op0 = gen_lowpart (imode, op0);
 995         else
 996           abort ();
 997       }
 998   }
 999
1000   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1001      If that's wrong, the solution is to test for it and set TARGET to 0
1002      if needed.  */
1003
1004   /* If OP0 is a register, BITPOS must count within a word.
1005      But as we have it, it counts within whatever size OP0 now has.
1006      On a bigendian machine, these are not the same, so convert.  */
1007   if (BYTES_BIG_ENDIAN
1008       && GET_CODE (op0) != MEM
1009       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1010     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1011
1012   /* Extracting a full-word or multi-word value
1013      from a structure in a register or aligned memory.
1014      This can be done with just SUBREG.
1015      So too extracting a subword value in
1016      the least significant part of the register.  */
1017
1018   if (((GET_CODE (op0) != MEM
1019         && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1020                                   GET_MODE_BITSIZE (GET_MODE (op0))))
1021        || (GET_CODE (op0) == MEM
1022            && (! SLOW_UNALIGNED_ACCESS
1023                || (offset * BITS_PER_UNIT % bitsize == 0
1024                    && align * BITS_PER_UNIT % bitsize == 0))))
1025       && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1026            && bitpos % BITS_PER_WORD == 0)
1027           || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
1028               /* ??? The big endian test here is wrong.  This is correct
1029                  if the value is in a register, and if mode_for_size is not
1030                  the same mode as op0.  This causes us to get unnecessarily
1031                  inefficient code from the Thumb port when -mbig-endian.  */
1032               && (BYTES_BIG_ENDIAN
1033                   ? bitpos + bitsize == BITS_PER_WORD
1034                   : bitpos == 0))))
1035     {
1036       enum machine_mode mode1
1037         = mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0);
1038
1039       if (mode1 != GET_MODE (op0))
1040         {
1041           if (GET_CODE (op0) == SUBREG)
1042             {
1043               if (GET_MODE (SUBREG_REG (op0)) == mode1
1044                   || GET_MODE_CLASS (mode1) == MODE_INT
1045                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1046                 op0 = SUBREG_REG (op0);
1047               else
1048                 /* Else we've got some float mode source being extracted into
1049                    a different float mode destination -- this combination of
1050                    subregs results in Severe Tire Damage.  */
1051                 abort ();
1052             }
1053           if (GET_CODE (op0) == REG)
1054             op0 = gen_rtx_SUBREG (mode1, op0, offset);
1055           else
1056             op0 = change_address (op0, mode1,
1057                                   plus_constant (XEXP (op0, 0), offset));
1058         }
1059       if (mode1 != mode)
1060         return convert_to_mode (tmode, op0, unsignedp);
1061       return op0;
1062     }
1063
1064   /* Handle fields bigger than a word.  */
1065
1066   if (bitsize > BITS_PER_WORD)
1067     {
1068       /* Here we transfer the words of the field
1069          in the order least significant first.
1070          This is because the most significant word is the one which may
1071          be less than full.  */
1072
1073       int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1074       int i;
1075
1076       if (target == 0 || GET_CODE (target) != REG)
1077         target = gen_reg_rtx (mode);
1078
1079       /* Indicate for flow that the entire target reg is being set.  */
1080       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1081
1082       for (i = 0; i < nwords; i++)
1083         {
1084           /* If I is 0, use the low-order word in both field and target;
1085              if I is 1, use the next to lowest word; and so on.  */
1086           /* Word number in TARGET to use.  */
1087           int wordnum = (WORDS_BIG_ENDIAN
1088                          ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1089                          : i);
1090           /* Offset from start of field in OP0.  */
1091           int bit_offset = (WORDS_BIG_ENDIAN
1092                             ? MAX (0, bitsize - (i + 1) * BITS_PER_WORD)
1093                             : i * BITS_PER_WORD);
1094           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1095           rtx result_part
1096             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1097                                            bitsize - i * BITS_PER_WORD),
1098                                  bitnum + bit_offset,
1099                                  1, target_part, mode, word_mode,
1100                                  align, total_size);
1101
1102           if (target_part == 0)
1103             abort ();
1104
1105           if (result_part != target_part)
1106             emit_move_insn (target_part, result_part);
1107         }
1108
1109       if (unsignedp)
1110         {
1111           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1112              need to be zero'd out.  */
1113           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1114             {
1115               int i,total_words;
1116
1117               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1118               for (i = nwords; i < total_words; i++)
1119                 {
1120                   int wordnum = WORDS_BIG_ENDIAN ? total_words - i - 1 : i;
1121                   rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1122                   emit_move_insn (target_part, const0_rtx);
1123                 }
1124             }
1125           return target;
1126         }
1127
1128       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1129       target = expand_shift (LSHIFT_EXPR, mode, target,
1130                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1131                              NULL_RTX, 0);
1132       return expand_shift (RSHIFT_EXPR, mode, target,
1133                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1134                            NULL_RTX, 0);
1135     }
1136
1137   /* From here on we know the desired field is smaller than a word
1138      so we can assume it is an integer.  So we can safely extract it as one
1139      size of integer, if necessary, and then truncate or extend
1140      to the size that is wanted.  */
1141
1142   /* OFFSET is the number of words or bytes (UNIT says which)
1143      from STR_RTX to the first word or byte containing part of the field.  */
1144
1145   if (GET_CODE (op0) != MEM)
1146     {
1147       if (offset != 0
1148           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1149         op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1150                               op0, offset);
1151       offset = 0;
1152     }
1153   else
1154     {
1155       op0 = protect_from_queue (str_rtx, 1);
1156     }
1157
1158   /* Now OFFSET is nonzero only for memory operands.  */
1159
1160   if (unsignedp)
1161     {
1162 #ifdef HAVE_extzv
1163       if (HAVE_extzv
1164           && (extzv_bitsize >= bitsize)
1165           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1166                 && (bitsize + bitpos > extzv_bitsize)))
1167         {
1168           int xbitpos = bitpos, xoffset = offset;
1169           rtx bitsize_rtx, bitpos_rtx;
1170           rtx last = get_last_insn ();
1171           rtx xop0 = op0;
1172           rtx xtarget = target;
1173           rtx xspec_target = spec_target;
1174           rtx xspec_target_subreg = spec_target_subreg;
1175           rtx pat;
1176           enum machine_mode maxmode;
1177
1178           maxmode = insn_operand_mode[(int) CODE_FOR_extzv][0];
1179           if (maxmode == VOIDmode)
1180             maxmode = word_mode;
1181
1182           if (GET_CODE (xop0) == MEM)
1183             {
1184               int save_volatile_ok = volatile_ok;
1185               volatile_ok = 1;
1186
1187               /* Is the memory operand acceptable?  */
1188               if (! ((*insn_operand_predicate[(int) CODE_FOR_extzv][1])
1189                      (xop0, GET_MODE (xop0))))
1190                 {
1191                   /* No, load into a reg and extract from there.  */
1192                   enum machine_mode bestmode;
1193
1194                   /* Get the mode to use for inserting into this field.  If
1195                      OP0 is BLKmode, get the smallest mode consistent with the
1196                      alignment. If OP0 is a non-BLKmode object that is no
1197                      wider than MAXMODE, use its mode. Otherwise, use the
1198                      smallest mode containing the field.  */
1199
1200                   if (GET_MODE (xop0) == BLKmode
1201                       || (GET_MODE_SIZE (GET_MODE (op0))
1202                           > GET_MODE_SIZE (maxmode)))
1203                     bestmode = get_best_mode (bitsize, bitnum,
1204                                               align * BITS_PER_UNIT, maxmode,
1205                                               MEM_VOLATILE_P (xop0));
1206                   else
1207                     bestmode = GET_MODE (xop0);
1208
1209                   if (bestmode == VOIDmode
1210                       || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
1211                     goto extzv_loses;
1212
1213                   /* Compute offset as multiple of this unit,
1214                      counting in bytes.  */
1215                   unit = GET_MODE_BITSIZE (bestmode);
1216                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1217                   xbitpos = bitnum % unit;
1218                   xop0 = change_address (xop0, bestmode,
1219                                          plus_constant (XEXP (xop0, 0),
1220                                                         xoffset));
1221                   /* Fetch it to a register in that size.  */
1222                   xop0 = force_reg (bestmode, xop0);
1223
1224                   /* XBITPOS counts within UNIT, which is what is expected.  */
1225                 }
1226               else
1227                 /* Get ref to first byte containing part of the field.  */
1228                 xop0 = change_address (xop0, byte_mode,
1229                                        plus_constant (XEXP (xop0, 0), xoffset));
1230
1231               volatile_ok = save_volatile_ok;
1232             }
1233
1234           /* If op0 is a register, we need it in MAXMODE (which is usually
1235              SImode). to make it acceptable to the format of extzv.  */
1236           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1237             goto extzv_loses;
1238           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1239             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1240
1241           /* On big-endian machines, we count bits from the most significant.
1242              If the bit field insn does not, we must invert.  */
1243           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1244             xbitpos = unit - bitsize - xbitpos;
1245
1246           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1247           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1248             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1249
1250           unit = GET_MODE_BITSIZE (maxmode);
1251
1252           if (xtarget == 0
1253               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1254             xtarget = xspec_target = gen_reg_rtx (tmode);
1255
1256           if (GET_MODE (xtarget) != maxmode)
1257             {
1258               if (GET_CODE (xtarget) == REG)
1259                 {
1260                   int wider = (GET_MODE_SIZE (maxmode)
1261                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1262                   xtarget = gen_lowpart (maxmode, xtarget);
1263                   if (wider)
1264                     xspec_target_subreg = xtarget;
1265                 }
1266               else
1267                 xtarget = gen_reg_rtx (maxmode);
1268             }
1269
1270           /* If this machine's extzv insists on a register target,
1271              make sure we have one.  */
1272           if (! ((*insn_operand_predicate[(int) CODE_FOR_extzv][0])
1273                  (xtarget, maxmode)))
1274             xtarget = gen_reg_rtx (maxmode);
1275
1276           bitsize_rtx = GEN_INT (bitsize);
1277           bitpos_rtx = GEN_INT (xbitpos);
1278
1279           pat = gen_extzv (protect_from_queue (xtarget, 1),
1280                            xop0, bitsize_rtx, bitpos_rtx);
1281           if (pat)
1282             {
1283               emit_insn (pat);
1284               target = xtarget;
1285               spec_target = xspec_target;
1286               spec_target_subreg = xspec_target_subreg;
1287             }
1288           else
1289             {
1290               delete_insns_since (last);
1291               target = extract_fixed_bit_field (tmode, op0, offset, bitsize,
1292                                                 bitpos, target, 1, align);
1293             }
1294         }
1295       else
1296         extzv_loses:
1297 #endif
1298         target = extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1299                                           target, 1, align);
1300     }
1301   else
1302     {
1303 #ifdef HAVE_extv
1304       if (HAVE_extv
1305           && (extv_bitsize >= bitsize)
1306           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1307                 && (bitsize + bitpos > extv_bitsize)))
1308         {
1309           int xbitpos = bitpos, xoffset = offset;
1310           rtx bitsize_rtx, bitpos_rtx;
1311           rtx last = get_last_insn ();
1312           rtx xop0 = op0, xtarget = target;
1313           rtx xspec_target = spec_target;
1314           rtx xspec_target_subreg = spec_target_subreg;
1315           rtx pat;
1316           enum machine_mode maxmode;
1317
1318           maxmode = insn_operand_mode[(int) CODE_FOR_extv][0];
1319           if (maxmode == VOIDmode)
1320             maxmode = word_mode;
1321
1322           if (GET_CODE (xop0) == MEM)
1323             {
1324               /* Is the memory operand acceptable?  */
1325               if (! ((*insn_operand_predicate[(int) CODE_FOR_extv][1])
1326                      (xop0, GET_MODE (xop0))))
1327                 {
1328                   /* No, load into a reg and extract from there.  */
1329                   enum machine_mode bestmode;
1330
1331                   /* Get the mode to use for inserting into this field.  If
1332                      OP0 is BLKmode, get the smallest mode consistent with the
1333                      alignment. If OP0 is a non-BLKmode object that is no
1334                      wider than MAXMODE, use its mode. Otherwise, use the
1335                      smallest mode containing the field.  */
1336
1337                   if (GET_MODE (xop0) == BLKmode
1338                       || (GET_MODE_SIZE (GET_MODE (op0))
1339                           > GET_MODE_SIZE (maxmode)))
1340                     bestmode = get_best_mode (bitsize, bitnum,
1341                                               align * BITS_PER_UNIT, maxmode,
1342                                               MEM_VOLATILE_P (xop0));
1343                   else
1344                     bestmode = GET_MODE (xop0);
1345
1346                   if (bestmode == VOIDmode
1347                       || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
1348                     goto extv_loses;
1349
1350                   /* Compute offset as multiple of this unit,
1351                      counting in bytes.  */
1352                   unit = GET_MODE_BITSIZE (bestmode);
1353                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1354                   xbitpos = bitnum % unit;
1355                   xop0 = change_address (xop0, bestmode,
1356                                          plus_constant (XEXP (xop0, 0),
1357                                                         xoffset));
1358                   /* Fetch it to a register in that size.  */
1359                   xop0 = force_reg (bestmode, xop0);
1360
1361                   /* XBITPOS counts within UNIT, which is what is expected.  */
1362                 }
1363               else
1364                 /* Get ref to first byte containing part of the field.  */
1365                 xop0 = change_address (xop0, byte_mode,
1366                                        plus_constant (XEXP (xop0, 0), xoffset));
1367             }
1368
1369           /* If op0 is a register, we need it in MAXMODE (which is usually
1370              SImode) to make it acceptable to the format of extv.  */
1371           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1372             goto extv_loses;
1373           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1374             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1375
1376           /* On big-endian machines, we count bits from the most significant.
1377              If the bit field insn does not, we must invert.  */
1378           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1379             xbitpos = unit - bitsize - xbitpos;
1380
1381           /* XBITPOS counts within a size of UNIT.
1382              Adjust to count within a size of MAXMODE.  */
1383           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1384             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1385
1386           unit = GET_MODE_BITSIZE (maxmode);
1387
1388           if (xtarget == 0
1389               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1390             xtarget = xspec_target = gen_reg_rtx (tmode);
1391
1392           if (GET_MODE (xtarget) != maxmode)
1393             {
1394               if (GET_CODE (xtarget) == REG)
1395                 {
1396                   int wider = (GET_MODE_SIZE (maxmode)
1397                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1398                   xtarget = gen_lowpart (maxmode, xtarget);
1399                   if (wider)
1400                     xspec_target_subreg = xtarget;
1401                 }
1402               else
1403                 xtarget = gen_reg_rtx (maxmode);
1404             }
1405
1406           /* If this machine's extv insists on a register target,
1407              make sure we have one.  */
1408           if (! ((*insn_operand_predicate[(int) CODE_FOR_extv][0])
1409                  (xtarget, maxmode)))
1410             xtarget = gen_reg_rtx (maxmode);
1411
1412           bitsize_rtx = GEN_INT (bitsize);
1413           bitpos_rtx = GEN_INT (xbitpos);
1414
1415           pat = gen_extv (protect_from_queue (xtarget, 1),
1416                           xop0, bitsize_rtx, bitpos_rtx);
1417           if (pat)
1418             {
1419               emit_insn (pat);
1420               target = xtarget;
1421               spec_target = xspec_target;
1422               spec_target_subreg = xspec_target_subreg;
1423             }
1424           else
1425             {
1426               delete_insns_since (last);
1427               target = extract_fixed_bit_field (tmode, op0, offset, bitsize,
1428                                                 bitpos, target, 0, align);
1429             }
1430         }
1431       else
1432         extv_loses:
1433 #endif
1434         target = extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1435                                           target, 0, align);
1436     }
1437   if (target == spec_target)
1438     return target;
1439   if (target == spec_target_subreg)
1440     return spec_target;
1441   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1442     {
1443       /* If the target mode is floating-point, first convert to the
1444          integer mode of that size and then access it as a floating-point
1445          value via a SUBREG.  */
1446       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1447         {
1448           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1449                                                    MODE_INT, 0),
1450                                     target, unsignedp);
1451           if (GET_CODE (target) != REG)
1452             target = copy_to_reg (target);
1453           return gen_rtx_SUBREG (tmode, target, 0);
1454         }
1455       else
1456         return convert_to_mode (tmode, target, unsignedp);
1457     }
1458   return target;
1459 }
1460 \f
1461 /* Extract a bit field using shifts and boolean operations
1462    Returns an rtx to represent the value.
1463    OP0 addresses a register (word) or memory (byte).
1464    BITPOS says which bit within the word or byte the bit field starts in.
1465    OFFSET says how many bytes farther the bit field starts;
1466     it is 0 if OP0 is a register.
1467    BITSIZE says how many bits long the bit field is.
1468     (If OP0 is a register, it may be narrower than a full word,
1469      but BITPOS still counts within a full word,
1470      which is significant on bigendian machines.)
1471
1472    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1473    If TARGET is nonzero, attempts to store the value there
1474    and return TARGET, but this is not guaranteed.
1475    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.
1476
1477    ALIGN is the alignment that STR_RTX is known to have, measured in bytes.  */
1478
1479 static rtx
1480 extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1481                          target, unsignedp, align)
1482      enum machine_mode tmode;
1483      register rtx op0, target;
1484      register int offset, bitsize, bitpos;
1485      int unsignedp;
1486      int align;
1487 {
1488   int total_bits = BITS_PER_WORD;
1489   enum machine_mode mode;
1490
1491   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1492     {
1493       /* Special treatment for a bit field split across two registers.  */
1494       if (bitsize + bitpos > BITS_PER_WORD)
1495         return extract_split_bit_field (op0, bitsize, bitpos,
1496                                         unsignedp, align);
1497     }
1498   else
1499     {
1500       /* Get the proper mode to use for this field.  We want a mode that
1501          includes the entire field.  If such a mode would be larger than
1502          a word, we won't be doing the extraction the normal way.  */
1503
1504       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1505                             align * BITS_PER_UNIT, word_mode,
1506                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
1507
1508       if (mode == VOIDmode)
1509         /* The only way this should occur is if the field spans word
1510            boundaries.  */
1511         return extract_split_bit_field (op0, bitsize,
1512                                         bitpos + offset * BITS_PER_UNIT,
1513                                         unsignedp, align);
1514
1515       total_bits = GET_MODE_BITSIZE (mode);
1516
1517       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1518          be in the range 0 to total_bits-1, and put any excess bytes in
1519          OFFSET.  */
1520       if (bitpos >= total_bits)
1521         {
1522           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1523           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1524                      * BITS_PER_UNIT);
1525         }
1526
1527       /* Get ref to an aligned byte, halfword, or word containing the field.
1528          Adjust BITPOS to be position within a word,
1529          and OFFSET to be the offset of that word.
1530          Then alter OP0 to refer to that word.  */
1531       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1532       offset -= (offset % (total_bits / BITS_PER_UNIT));
1533       op0 = change_address (op0, mode,
1534                             plus_constant (XEXP (op0, 0), offset));
1535     }
1536
1537   mode = GET_MODE (op0);
1538
1539   if (BYTES_BIG_ENDIAN)
1540     {
1541       /* BITPOS is the distance between our msb and that of OP0.
1542          Convert it to the distance from the lsb.  */
1543
1544       bitpos = total_bits - bitsize - bitpos;
1545     }
1546
1547   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1548      We have reduced the big-endian case to the little-endian case.  */
1549
1550   if (unsignedp)
1551     {
1552       if (bitpos)
1553         {
1554           /* If the field does not already start at the lsb,
1555              shift it so it does.  */
1556           tree amount = build_int_2 (bitpos, 0);
1557           /* Maybe propagate the target for the shift.  */
1558           /* But not if we will return it--could confuse integrate.c.  */
1559           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1560                            && !REG_FUNCTION_VALUE_P (target)
1561                            ? target : 0);
1562           if (tmode != mode) subtarget = 0;
1563           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1564         }
1565       /* Convert the value to the desired mode.  */
1566       if (mode != tmode)
1567         op0 = convert_to_mode (tmode, op0, 1);
1568
1569       /* Unless the msb of the field used to be the msb when we shifted,
1570          mask out the upper bits.  */
1571
1572       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize
1573 #if 0
1574 #ifdef SLOW_ZERO_EXTEND
1575           /* Always generate an `and' if
1576              we just zero-extended op0 and SLOW_ZERO_EXTEND, since it
1577              will combine fruitfully with the zero-extend.  */
1578           || tmode != mode
1579 #endif
1580 #endif
1581           )
1582         return expand_binop (GET_MODE (op0), and_optab, op0,
1583                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1584                              target, 1, OPTAB_LIB_WIDEN);
1585       return op0;
1586     }
1587
1588   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1589      then arithmetic-shift its lsb to the lsb of the word.  */
1590   op0 = force_reg (mode, op0);
1591   if (mode != tmode)
1592     target = 0;
1593
1594   /* Find the narrowest integer mode that contains the field.  */
1595
1596   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1597        mode = GET_MODE_WIDER_MODE (mode))
1598     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1599       {
1600         op0 = convert_to_mode (mode, op0, 0);
1601         break;
1602       }
1603
1604   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1605     {
1606       tree amount = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1607       /* Maybe propagate the target for the shift.  */
1608       /* But not if we will return the result--could confuse integrate.c.  */
1609       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1610                        && ! REG_FUNCTION_VALUE_P (target)
1611                        ? target : 0);
1612       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1613     }
1614
1615   return expand_shift (RSHIFT_EXPR, mode, op0,
1616                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1617                        target, 0);
1618 }
1619 \f
1620 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1621    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1622    complement of that if COMPLEMENT.  The mask is truncated if
1623    necessary to the width of mode MODE.  The mask is zero-extended if
1624    BITSIZE+BITPOS is too small for MODE.  */
1625
1626 static rtx
1627 mask_rtx (mode, bitpos, bitsize, complement)
1628      enum machine_mode mode;
1629      int bitpos, bitsize, complement;
1630 {
1631   HOST_WIDE_INT masklow, maskhigh;
1632
1633   if (bitpos < HOST_BITS_PER_WIDE_INT)
1634     masklow = (HOST_WIDE_INT) -1 << bitpos;
1635   else
1636     masklow = 0;
1637
1638   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1639     masklow &= ((unsigned HOST_WIDE_INT) -1
1640                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1641
1642   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1643     maskhigh = -1;
1644   else
1645     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1646
1647   if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1648     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1649                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1650   else
1651     maskhigh = 0;
1652
1653   if (complement)
1654     {
1655       maskhigh = ~maskhigh;
1656       masklow = ~masklow;
1657     }
1658
1659   return immed_double_const (masklow, maskhigh, mode);
1660 }
1661
1662 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1663    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1664
1665 static rtx
1666 lshift_value (mode, value, bitpos, bitsize)
1667      enum machine_mode mode;
1668      rtx value;
1669      int bitpos, bitsize;
1670 {
1671   unsigned HOST_WIDE_INT v = INTVAL (value);
1672   HOST_WIDE_INT low, high;
1673
1674   if (bitsize < HOST_BITS_PER_WIDE_INT)
1675     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1676
1677   if (bitpos < HOST_BITS_PER_WIDE_INT)
1678     {
1679       low = v << bitpos;
1680       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1681     }
1682   else
1683     {
1684       low = 0;
1685       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1686     }
1687
1688   return immed_double_const (low, high, mode);
1689 }
1690 \f
1691 /* Extract a bit field that is split across two words
1692    and return an RTX for the result.
1693
1694    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1695    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1696    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
1697
1698    ALIGN is the known alignment of OP0, measured in bytes.
1699    This is also the size of the memory objects to be used.  */
1700
1701 static rtx
1702 extract_split_bit_field (op0, bitsize, bitpos, unsignedp, align)
1703      rtx op0;
1704      int bitsize, bitpos, unsignedp, align;
1705 {
1706   int unit;
1707   int bitsdone = 0;
1708   rtx result = NULL_RTX;
1709   int first = 1;
1710
1711   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1712      much at a time.  */
1713   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1714     unit = BITS_PER_WORD;
1715   else
1716     unit = MIN (align * BITS_PER_UNIT, BITS_PER_WORD);
1717
1718   while (bitsdone < bitsize)
1719     {
1720       int thissize;
1721       rtx part, word;
1722       int thispos;
1723       int offset;
1724
1725       offset = (bitpos + bitsdone) / unit;
1726       thispos = (bitpos + bitsdone) % unit;
1727
1728       /* THISSIZE must not overrun a word boundary.  Otherwise,
1729          extract_fixed_bit_field will call us again, and we will mutually
1730          recurse forever.  */
1731       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1732       thissize = MIN (thissize, unit - thispos);
1733
1734       /* If OP0 is a register, then handle OFFSET here.
1735
1736          When handling multiword bitfields, extract_bit_field may pass
1737          down a word_mode SUBREG of a larger REG for a bitfield that actually
1738          crosses a word boundary.  Thus, for a SUBREG, we must find
1739          the current word starting from the base register.  */
1740       if (GET_CODE (op0) == SUBREG)
1741         {
1742           word = operand_subword_force (SUBREG_REG (op0),
1743                                         SUBREG_WORD (op0) + offset,
1744                                         GET_MODE (SUBREG_REG (op0)));
1745           offset = 0;
1746         }
1747       else if (GET_CODE (op0) == REG)
1748         {
1749           word = operand_subword_force (op0, offset, GET_MODE (op0));
1750           offset = 0;
1751         }
1752       else
1753         word = op0;
1754
1755       /* Extract the parts in bit-counting order,
1756          whose meaning is determined by BYTES_PER_UNIT.
1757          OFFSET is in UNITs, and UNIT is in bits.
1758          extract_fixed_bit_field wants offset in bytes.  */
1759       part = extract_fixed_bit_field (word_mode, word,
1760                                       offset * unit / BITS_PER_UNIT,
1761                                       thissize, thispos, 0, 1, align);
1762       bitsdone += thissize;
1763
1764       /* Shift this part into place for the result.  */
1765       if (BYTES_BIG_ENDIAN)
1766         {
1767           if (bitsize != bitsdone)
1768             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1769                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1770         }
1771       else
1772         {
1773           if (bitsdone != thissize)
1774             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1775                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1776         }
1777
1778       if (first)
1779         result = part;
1780       else
1781         /* Combine the parts with bitwise or.  This works
1782            because we extracted each part as an unsigned bit field.  */
1783         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1784                                OPTAB_LIB_WIDEN);
1785
1786       first = 0;
1787     }
1788
1789   /* Unsigned bit field: we are done.  */
1790   if (unsignedp)
1791     return result;
1792   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1793   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1794                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1795                          NULL_RTX, 0);
1796   return expand_shift (RSHIFT_EXPR, word_mode, result,
1797                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1798 }
1799 \f
1800 /* Add INC into TARGET.  */
1801
1802 void
1803 expand_inc (target, inc)
1804      rtx target, inc;
1805 {
1806   rtx value = expand_binop (GET_MODE (target), add_optab,
1807                             target, inc,
1808                             target, 0, OPTAB_LIB_WIDEN);
1809   if (value != target)
1810     emit_move_insn (target, value);
1811 }
1812
1813 /* Subtract DEC from TARGET.  */
1814
1815 void
1816 expand_dec (target, dec)
1817      rtx target, dec;
1818 {
1819   rtx value = expand_binop (GET_MODE (target), sub_optab,
1820                             target, dec,
1821                             target, 0, OPTAB_LIB_WIDEN);
1822   if (value != target)
1823     emit_move_insn (target, value);
1824 }
1825 \f
1826 /* Output a shift instruction for expression code CODE,
1827    with SHIFTED being the rtx for the value to shift,
1828    and AMOUNT the tree for the amount to shift by.
1829    Store the result in the rtx TARGET, if that is convenient.
1830    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1831    Return the rtx for where the value is.  */
1832
1833 rtx
1834 expand_shift (code, mode, shifted, amount, target, unsignedp)
1835      enum tree_code code;
1836      register enum machine_mode mode;
1837      rtx shifted;
1838      tree amount;
1839      register rtx target;
1840      int unsignedp;
1841 {
1842   register rtx op1, temp = 0;
1843   register int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1844   register int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1845   int try;
1846
1847   /* Previously detected shift-counts computed by NEGATE_EXPR
1848      and shifted in the other direction; but that does not work
1849      on all machines.  */
1850
1851   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1852
1853 #ifdef SHIFT_COUNT_TRUNCATED
1854   if (SHIFT_COUNT_TRUNCATED)
1855     {
1856       if (GET_CODE (op1) == CONST_INT
1857           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
1858               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
1859         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1860                        % GET_MODE_BITSIZE (mode));
1861       else if (GET_CODE (op1) == SUBREG
1862                && SUBREG_WORD (op1) == 0)
1863         op1 = SUBREG_REG (op1);
1864     }
1865 #endif
1866
1867   if (op1 == const0_rtx)
1868     return shifted;
1869
1870   for (try = 0; temp == 0 && try < 3; try++)
1871     {
1872       enum optab_methods methods;
1873
1874       if (try == 0)
1875         methods = OPTAB_DIRECT;
1876       else if (try == 1)
1877         methods = OPTAB_WIDEN;
1878       else
1879         methods = OPTAB_LIB_WIDEN;
1880
1881       if (rotate)
1882         {
1883           /* Widening does not work for rotation.  */
1884           if (methods == OPTAB_WIDEN)
1885             continue;
1886           else if (methods == OPTAB_LIB_WIDEN)
1887             {
1888               /* If we have been unable to open-code this by a rotation,
1889                  do it as the IOR of two shifts.  I.e., to rotate A
1890                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1891                  where C is the bitsize of A.
1892
1893                  It is theoretically possible that the target machine might
1894                  not be able to perform either shift and hence we would
1895                  be making two libcalls rather than just the one for the
1896                  shift (similarly if IOR could not be done).  We will allow
1897                  this extremely unlikely lossage to avoid complicating the
1898                  code below.  */
1899
1900               rtx subtarget = target == shifted ? 0 : target;
1901               rtx temp1;
1902               tree type = TREE_TYPE (amount);
1903               tree new_amount = make_tree (type, op1);
1904               tree other_amount
1905                 = fold (build (MINUS_EXPR, type,
1906                                convert (type,
1907                                         build_int_2 (GET_MODE_BITSIZE (mode),
1908                                                      0)),
1909                                amount));
1910
1911               shifted = force_reg (mode, shifted);
1912
1913               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1914                                    mode, shifted, new_amount, subtarget, 1);
1915               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1916                                     mode, shifted, other_amount, 0, 1);
1917               return expand_binop (mode, ior_optab, temp, temp1, target,
1918                                    unsignedp, methods);
1919             }
1920
1921           temp = expand_binop (mode,
1922                                left ? rotl_optab : rotr_optab,
1923                                shifted, op1, target, unsignedp, methods);
1924
1925           /* If we don't have the rotate, but we are rotating by a constant
1926              that is in range, try a rotate in the opposite direction.  */
1927
1928           if (temp == 0 && GET_CODE (op1) == CONST_INT
1929               && INTVAL (op1) > 0 && INTVAL (op1) < GET_MODE_BITSIZE (mode))
1930             temp = expand_binop (mode,
1931                                  left ? rotr_optab : rotl_optab,
1932                                  shifted,
1933                                  GEN_INT (GET_MODE_BITSIZE (mode)
1934                                           - INTVAL (op1)),
1935                                  target, unsignedp, methods);
1936         }
1937       else if (unsignedp)
1938         temp = expand_binop (mode,
1939                              left ? ashl_optab : lshr_optab,
1940                              shifted, op1, target, unsignedp, methods);
1941
1942       /* Do arithmetic shifts.
1943          Also, if we are going to widen the operand, we can just as well
1944          use an arithmetic right-shift instead of a logical one.  */
1945       if (temp == 0 && ! rotate
1946           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
1947         {
1948           enum optab_methods methods1 = methods;
1949
1950           /* If trying to widen a log shift to an arithmetic shift,
1951              don't accept an arithmetic shift of the same size.  */
1952           if (unsignedp)
1953             methods1 = OPTAB_MUST_WIDEN;
1954
1955           /* Arithmetic shift */
1956
1957           temp = expand_binop (mode,
1958                                left ? ashl_optab : ashr_optab,
1959                                shifted, op1, target, unsignedp, methods1);
1960         }
1961
1962       /* We used to try extzv here for logical right shifts, but that was
1963          only useful for one machine, the VAX, and caused poor code
1964          generation there for lshrdi3, so the code was deleted and a
1965          define_expand for lshrsi3 was added to vax.md.  */
1966     }
1967
1968   if (temp == 0)
1969     abort ();
1970   return temp;
1971 }
1972 \f
1973 enum alg_code { alg_zero, alg_m, alg_shift,
1974                   alg_add_t_m2, alg_sub_t_m2,
1975                   alg_add_factor, alg_sub_factor,
1976                   alg_add_t2_m, alg_sub_t2_m,
1977                   alg_add, alg_subtract, alg_factor, alg_shiftop };
1978
1979 /* This structure records a sequence of operations.
1980    `ops' is the number of operations recorded.
1981    `cost' is their total cost.
1982    The operations are stored in `op' and the corresponding
1983    logarithms of the integer coefficients in `log'.
1984
1985    These are the operations:
1986    alg_zero             total := 0;
1987    alg_m                total := multiplicand;
1988    alg_shift            total := total * coeff
1989    alg_add_t_m2         total := total + multiplicand * coeff;
1990    alg_sub_t_m2         total := total - multiplicand * coeff;
1991    alg_add_factor       total := total * coeff + total;
1992    alg_sub_factor       total := total * coeff - total;
1993    alg_add_t2_m         total := total * coeff + multiplicand;
1994    alg_sub_t2_m         total := total * coeff - multiplicand;
1995
1996    The first operand must be either alg_zero or alg_m.  */
1997
1998 struct algorithm
1999 {
2000   short cost;
2001   short ops;
2002   /* The size of the OP and LOG fields are not directly related to the
2003      word size, but the worst-case algorithms will be if we have few
2004      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2005      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2006      in total wordsize operations.  */
2007   enum alg_code op[MAX_BITS_PER_WORD];
2008   char log[MAX_BITS_PER_WORD];
2009 };
2010
2011 static void synth_mult                  PROTO((struct algorithm *,
2012                                                unsigned HOST_WIDE_INT,
2013                                                int));
2014 static unsigned HOST_WIDE_INT choose_multiplier PROTO((unsigned HOST_WIDE_INT,
2015                                                        int, int,
2016                                                        unsigned HOST_WIDE_INT *,
2017                                                        int *, int *));
2018 static unsigned HOST_WIDE_INT invert_mod2n      PROTO((unsigned HOST_WIDE_INT,
2019                                                        int));
2020 /* Compute and return the best algorithm for multiplying by T.
2021    The algorithm must cost less than cost_limit
2022    If retval.cost >= COST_LIMIT, no algorithm was found and all
2023    other field of the returned struct are undefined.  */
2024
2025 static void
2026 synth_mult (alg_out, t, cost_limit)
2027      struct algorithm *alg_out;
2028      unsigned HOST_WIDE_INT t;
2029      int cost_limit;
2030 {
2031   int m;
2032   struct algorithm *alg_in, *best_alg;
2033   int cost;
2034   unsigned HOST_WIDE_INT q;
2035
2036   /* Indicate that no algorithm is yet found.  If no algorithm
2037      is found, this value will be returned and indicate failure.  */
2038   alg_out->cost = cost_limit;
2039
2040   if (cost_limit <= 0)
2041     return;
2042
2043   /* t == 1 can be done in zero cost.  */
2044   if (t == 1)
2045     {
2046       alg_out->ops = 1;
2047       alg_out->cost = 0;
2048       alg_out->op[0] = alg_m;
2049       return;
2050     }
2051
2052   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2053      fail now.  */
2054   if (t == 0)
2055     {
2056       if (zero_cost >= cost_limit)
2057         return;
2058       else
2059         {
2060           alg_out->ops = 1;
2061           alg_out->cost = zero_cost;
2062           alg_out->op[0] = alg_zero;
2063           return;
2064         }
2065     }
2066
2067   /* We'll be needing a couple extra algorithm structures now.  */
2068
2069   alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
2070   best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
2071
2072   /* If we have a group of zero bits at the low-order part of T, try
2073      multiplying by the remaining bits and then doing a shift.  */
2074
2075   if ((t & 1) == 0)
2076     {
2077       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2078       q = t >> m;
2079       cost = shift_cost[m];
2080       synth_mult (alg_in, q, cost_limit - cost);
2081
2082       cost += alg_in->cost;
2083       if (cost < cost_limit)
2084         {
2085           struct algorithm *x;
2086           x = alg_in, alg_in = best_alg, best_alg = x;
2087           best_alg->log[best_alg->ops] = m;
2088           best_alg->op[best_alg->ops] = alg_shift;
2089           cost_limit = cost;
2090         }
2091     }
2092
2093   /* If we have an odd number, add or subtract one.  */
2094   if ((t & 1) != 0)
2095     {
2096       unsigned HOST_WIDE_INT w;
2097
2098       for (w = 1; (w & t) != 0; w <<= 1)
2099         ;
2100       /* If T was -1, then W will be zero after the loop.  This is another
2101          case where T ends with ...111.  Handling this with (T + 1) and
2102          subtract 1 produces slightly better code and results in algorithm
2103          selection much faster than treating it like the ...0111 case
2104          below.  */
2105       if (w == 0
2106           || (w > 2
2107               /* Reject the case where t is 3.
2108                  Thus we prefer addition in that case.  */
2109               && t != 3))
2110         {
2111           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2112
2113           cost = add_cost;
2114           synth_mult (alg_in, t + 1, cost_limit - cost);
2115
2116           cost += alg_in->cost;
2117           if (cost < cost_limit)
2118             {
2119               struct algorithm *x;
2120               x = alg_in, alg_in = best_alg, best_alg = x;
2121               best_alg->log[best_alg->ops] = 0;
2122               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2123               cost_limit = cost;
2124             }
2125         }
2126       else
2127         {
2128           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2129
2130           cost = add_cost;
2131           synth_mult (alg_in, t - 1, cost_limit - cost);
2132
2133           cost += alg_in->cost;
2134           if (cost < cost_limit)
2135             {
2136               struct algorithm *x;
2137               x = alg_in, alg_in = best_alg, best_alg = x;
2138               best_alg->log[best_alg->ops] = 0;
2139               best_alg->op[best_alg->ops] = alg_add_t_m2;
2140               cost_limit = cost;
2141             }
2142         }
2143     }
2144
2145   /* Look for factors of t of the form
2146      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2147      If we find such a factor, we can multiply by t using an algorithm that
2148      multiplies by q, shift the result by m and add/subtract it to itself.
2149
2150      We search for large factors first and loop down, even if large factors
2151      are less probable than small; if we find a large factor we will find a
2152      good sequence quickly, and therefore be able to prune (by decreasing
2153      COST_LIMIT) the search.  */
2154
2155   for (m = floor_log2 (t - 1); m >= 2; m--)
2156     {
2157       unsigned HOST_WIDE_INT d;
2158
2159       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2160       if (t % d == 0 && t > d)
2161         {
2162           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2163           synth_mult (alg_in, t / d, cost_limit - cost);
2164
2165           cost += alg_in->cost;
2166           if (cost < cost_limit)
2167             {
2168               struct algorithm *x;
2169               x = alg_in, alg_in = best_alg, best_alg = x;
2170               best_alg->log[best_alg->ops] = m;
2171               best_alg->op[best_alg->ops] = alg_add_factor;
2172               cost_limit = cost;
2173             }
2174           /* Other factors will have been taken care of in the recursion.  */
2175           break;
2176         }
2177
2178       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2179       if (t % d == 0 && t > d)
2180         {
2181           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2182           synth_mult (alg_in, t / d, cost_limit - cost);
2183
2184           cost += alg_in->cost;
2185           if (cost < cost_limit)
2186             {
2187               struct algorithm *x;
2188               x = alg_in, alg_in = best_alg, best_alg = x;
2189               best_alg->log[best_alg->ops] = m;
2190               best_alg->op[best_alg->ops] = alg_sub_factor;
2191               cost_limit = cost;
2192             }
2193           break;
2194         }
2195     }
2196
2197   /* Try shift-and-add (load effective address) instructions,
2198      i.e. do a*3, a*5, a*9.  */
2199   if ((t & 1) != 0)
2200     {
2201       q = t - 1;
2202       q = q & -q;
2203       m = exact_log2 (q);
2204       if (m >= 0)
2205         {
2206           cost = shiftadd_cost[m];
2207           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2208
2209           cost += alg_in->cost;
2210           if (cost < cost_limit)
2211             {
2212               struct algorithm *x;
2213               x = alg_in, alg_in = best_alg, best_alg = x;
2214               best_alg->log[best_alg->ops] = m;
2215               best_alg->op[best_alg->ops] = alg_add_t2_m;
2216               cost_limit = cost;
2217             }
2218         }
2219
2220       q = t + 1;
2221       q = q & -q;
2222       m = exact_log2 (q);
2223       if (m >= 0)
2224         {
2225           cost = shiftsub_cost[m];
2226           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2227
2228           cost += alg_in->cost;
2229           if (cost < cost_limit)
2230             {
2231               struct algorithm *x;
2232               x = alg_in, alg_in = best_alg, best_alg = x;
2233               best_alg->log[best_alg->ops] = m;
2234               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2235               cost_limit = cost;
2236             }
2237         }
2238     }
2239
2240   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2241      we have not found any algorithm.  */
2242   if (cost_limit == alg_out->cost)
2243     return;
2244
2245   /* If we are getting a too long sequence for `struct algorithm'
2246      to record, make this search fail.  */
2247   if (best_alg->ops == MAX_BITS_PER_WORD)
2248     return;
2249
2250   /* Copy the algorithm from temporary space to the space at alg_out.
2251      We avoid using structure assignment because the majority of
2252      best_alg is normally undefined, and this is a critical function.  */
2253   alg_out->ops = best_alg->ops + 1;
2254   alg_out->cost = cost_limit;
2255   bcopy ((char *) best_alg->op, (char *) alg_out->op,
2256          alg_out->ops * sizeof *alg_out->op);
2257   bcopy ((char *) best_alg->log, (char *) alg_out->log,
2258          alg_out->ops * sizeof *alg_out->log);
2259 }
2260 \f
2261 /* Perform a multiplication and return an rtx for the result.
2262    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2263    TARGET is a suggestion for where to store the result (an rtx).
2264
2265    We check specially for a constant integer as OP1.
2266    If you want this check for OP0 as well, then before calling
2267    you should swap the two operands if OP0 would be constant.  */
2268
2269 rtx
2270 expand_mult (mode, op0, op1, target, unsignedp)
2271      enum machine_mode mode;
2272      register rtx op0, op1, target;
2273      int unsignedp;
2274 {
2275   rtx const_op1 = op1;
2276
2277   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2278      less than or equal in size to `unsigned int' this doesn't matter.
2279      If the mode is larger than `unsigned int', then synth_mult works only
2280      if the constant value exactly fits in an `unsigned int' without any
2281      truncation.  This means that multiplying by negative values does
2282      not work; results are off by 2^32 on a 32 bit machine.  */
2283
2284   /* If we are multiplying in DImode, it may still be a win
2285      to try to work with shifts and adds.  */
2286   if (GET_CODE (op1) == CONST_DOUBLE
2287       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2288       && HOST_BITS_PER_INT >= BITS_PER_WORD
2289       && CONST_DOUBLE_HIGH (op1) == 0)
2290     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2291   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2292            && GET_CODE (op1) == CONST_INT
2293            && INTVAL (op1) < 0)
2294     const_op1 = 0;
2295
2296   /* We used to test optimize here, on the grounds that it's better to
2297      produce a smaller program when -O is not used.
2298      But this causes such a terrible slowdown sometimes
2299      that it seems better to use synth_mult always.  */
2300
2301   if (const_op1 && GET_CODE (const_op1) == CONST_INT)
2302     {
2303       struct algorithm alg;
2304       struct algorithm alg2;
2305       HOST_WIDE_INT val = INTVAL (op1);
2306       HOST_WIDE_INT val_so_far;
2307       rtx insn;
2308       int mult_cost;
2309       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2310
2311       /* Try to do the computation three ways: multiply by the negative of OP1
2312          and then negate, do the multiplication directly, or do multiplication
2313          by OP1 - 1.  */
2314
2315       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2316       mult_cost = MIN (12 * add_cost, mult_cost);
2317
2318       synth_mult (&alg, val, mult_cost);
2319
2320       /* This works only if the inverted value actually fits in an
2321          `unsigned int' */
2322       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2323         {
2324           synth_mult (&alg2, - val,
2325                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2326           if (alg2.cost + negate_cost < alg.cost)
2327             alg = alg2, variant = negate_variant;
2328         }
2329
2330       /* This proves very useful for division-by-constant.  */
2331       synth_mult (&alg2, val - 1,
2332                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2333       if (alg2.cost + add_cost < alg.cost)
2334         alg = alg2, variant = add_variant;
2335
2336       if (alg.cost < mult_cost)
2337         {
2338           /* We found something cheaper than a multiply insn.  */
2339           int opno;
2340           rtx accum, tem;
2341
2342           op0 = protect_from_queue (op0, 0);
2343
2344           /* Avoid referencing memory over and over.
2345              For speed, but also for correctness when mem is volatile.  */
2346           if (GET_CODE (op0) == MEM)
2347             op0 = force_reg (mode, op0);
2348
2349           /* ACCUM starts out either as OP0 or as a zero, depending on
2350              the first operation.  */
2351
2352           if (alg.op[0] == alg_zero)
2353             {
2354               accum = copy_to_mode_reg (mode, const0_rtx);
2355               val_so_far = 0;
2356             }
2357           else if (alg.op[0] == alg_m)
2358             {
2359               accum = copy_to_mode_reg (mode, op0);
2360               val_so_far = 1;
2361             }
2362           else
2363             abort ();
2364
2365           for (opno = 1; opno < alg.ops; opno++)
2366             {
2367               int log = alg.log[opno];
2368               int preserve = preserve_subexpressions_p ();
2369               rtx shift_subtarget = preserve ? 0 : accum;
2370               rtx add_target
2371                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2372                    && ! preserve)
2373                   ? target : 0;
2374               rtx accum_target = preserve ? 0 : accum;
2375
2376               switch (alg.op[opno])
2377                 {
2378                 case alg_shift:
2379                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2380                                         build_int_2 (log, 0), NULL_RTX, 0);
2381                   val_so_far <<= log;
2382                   break;
2383
2384                 case alg_add_t_m2:
2385                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2386                                       build_int_2 (log, 0), NULL_RTX, 0);
2387                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2388                                          add_target ? add_target : accum_target);
2389                   val_so_far += (HOST_WIDE_INT) 1 << log;
2390                   break;
2391
2392                 case alg_sub_t_m2:
2393                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2394                                       build_int_2 (log, 0), NULL_RTX, 0);
2395                   accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2396                                          add_target ? add_target : accum_target);
2397                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2398                   break;
2399
2400                 case alg_add_t2_m:
2401                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2402                                         build_int_2 (log, 0), shift_subtarget,
2403                                         0);
2404                   accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2405                                          add_target ? add_target : accum_target);
2406                   val_so_far = (val_so_far << log) + 1;
2407                   break;
2408
2409                 case alg_sub_t2_m:
2410                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2411                                         build_int_2 (log, 0), shift_subtarget,
2412                                         0);
2413                   accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2414                                          add_target ? add_target : accum_target);
2415                   val_so_far = (val_so_far << log) - 1;
2416                   break;
2417
2418                 case alg_add_factor:
2419                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2420                                       build_int_2 (log, 0), NULL_RTX, 0);
2421                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2422                                          add_target ? add_target : accum_target);
2423                   val_so_far += val_so_far << log;
2424                   break;
2425
2426                 case alg_sub_factor:
2427                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2428                                       build_int_2 (log, 0), NULL_RTX, 0);
2429                   accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2430                                          (add_target ? add_target
2431                                           : preserve ? 0 : tem));
2432                   val_so_far = (val_so_far << log) - val_so_far;
2433                   break;
2434
2435                 default:
2436                   abort ();;
2437                 }
2438
2439               /* Write a REG_EQUAL note on the last insn so that we can cse
2440                  multiplication sequences.  */
2441
2442               insn = get_last_insn ();
2443               REG_NOTES (insn)
2444                 = gen_rtx_EXPR_LIST (REG_EQUAL,
2445                                      gen_rtx_MULT (mode, op0, GEN_INT (val_so_far)),
2446                                      REG_NOTES (insn));
2447             }
2448
2449           if (variant == negate_variant)
2450             {
2451               val_so_far = - val_so_far;
2452               accum = expand_unop (mode, neg_optab, accum, target, 0);
2453             }
2454           else if (variant == add_variant)
2455             {
2456               val_so_far = val_so_far + 1;
2457               accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2458             }
2459
2460           if (val != val_so_far)
2461             abort ();
2462
2463           return accum;
2464         }
2465     }
2466
2467   /* This used to use umul_optab if unsigned, but for non-widening multiply
2468      there is no difference between signed and unsigned.  */
2469   op0 = expand_binop (mode, smul_optab,
2470                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2471   if (op0 == 0)
2472     abort ();
2473   return op0;
2474 }
2475 \f
2476 /* Return the smallest n such that 2**n >= X.  */
2477
2478 int
2479 ceil_log2 (x)
2480      unsigned HOST_WIDE_INT x;
2481 {
2482   return floor_log2 (x - 1) + 1;
2483 }
2484
2485 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2486    replace division by D, and put the least significant N bits of the result
2487    in *MULTIPLIER_PTR and return the most significant bit.
2488
2489    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2490    needed precision is in PRECISION (should be <= N).
2491
2492    PRECISION should be as small as possible so this function can choose
2493    multiplier more freely.
2494
2495    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2496    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2497
2498    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2499    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2500
2501 static
2502 unsigned HOST_WIDE_INT
2503 choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
2504      unsigned HOST_WIDE_INT d;
2505      int n;
2506      int precision;
2507      unsigned HOST_WIDE_INT *multiplier_ptr;
2508      int *post_shift_ptr;
2509      int *lgup_ptr;
2510 {
2511   unsigned HOST_WIDE_INT mhigh_hi, mhigh_lo;
2512   unsigned HOST_WIDE_INT mlow_hi, mlow_lo;
2513   int lgup, post_shift;
2514   int pow, pow2;
2515   unsigned HOST_WIDE_INT nh, nl, dummy1, dummy2;
2516
2517   /* lgup = ceil(log2(divisor)); */
2518   lgup = ceil_log2 (d);
2519
2520   if (lgup > n)
2521     abort ();
2522
2523   pow = n + lgup;
2524   pow2 = n + lgup - precision;
2525
2526   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2527     {
2528       /* We could handle this with some effort, but this case is much better
2529          handled directly with a scc insn, so rely on caller using that.  */
2530       abort ();
2531     }
2532
2533   /* mlow = 2^(N + lgup)/d */
2534  if (pow >= HOST_BITS_PER_WIDE_INT)
2535     {
2536       nh = (unsigned HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2537       nl = 0;
2538     }
2539   else
2540     {
2541       nh = 0;
2542       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2543     }
2544   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2545                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2546
2547   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2548   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2549     nh |= (unsigned HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2550   else
2551     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2552   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2553                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2554
2555   if (mhigh_hi && nh - d >= d)
2556     abort ();
2557   if (mhigh_hi > 1 || mlow_hi > 1)
2558     abort ();
2559   /* assert that mlow < mhigh.  */
2560   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2561     abort();
2562
2563   /* If precision == N, then mlow, mhigh exceed 2^N
2564      (but they do not exceed 2^(N+1)).  */
2565
2566   /* Reduce to lowest terms */
2567   for (post_shift = lgup; post_shift > 0; post_shift--)
2568     {
2569       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2570       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2571       if (ml_lo >= mh_lo)
2572         break;
2573
2574       mlow_hi = 0;
2575       mlow_lo = ml_lo;
2576       mhigh_hi = 0;
2577       mhigh_lo = mh_lo;
2578     }
2579
2580   *post_shift_ptr = post_shift;
2581   *lgup_ptr = lgup;
2582   if (n < HOST_BITS_PER_WIDE_INT)
2583     {
2584       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2585       *multiplier_ptr = mhigh_lo & mask;
2586       return mhigh_lo >= mask;
2587     }
2588   else
2589     {
2590       *multiplier_ptr = mhigh_lo;
2591       return mhigh_hi;
2592     }
2593 }
2594
2595 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2596    congruent to 1 (mod 2**N).  */
2597
2598 static unsigned HOST_WIDE_INT
2599 invert_mod2n (x, n)
2600      unsigned HOST_WIDE_INT x;
2601      int n;
2602 {
2603   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2604
2605   /* The algorithm notes that the choice y = x satisfies
2606      x*y == 1 mod 2^3, since x is assumed odd.
2607      Each iteration doubles the number of bits of significance in y.  */
2608
2609   unsigned HOST_WIDE_INT mask;
2610   unsigned HOST_WIDE_INT y = x;
2611   int nbit = 3;
2612
2613   mask = (n == HOST_BITS_PER_WIDE_INT
2614           ? ~(unsigned HOST_WIDE_INT) 0
2615           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2616
2617   while (nbit < n)
2618     {
2619       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2620       nbit *= 2;
2621     }
2622   return y;
2623 }
2624
2625 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2626    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2627    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2628    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2629    become signed.
2630
2631    The result is put in TARGET if that is convenient.
2632
2633    MODE is the mode of operation.  */
2634
2635 rtx
2636 expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
2637      enum machine_mode mode;
2638      register rtx adj_operand, op0, op1, target;
2639      int unsignedp;
2640 {
2641   rtx tem;
2642   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2643
2644   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2645                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2646                       NULL_RTX, 0);
2647   tem = expand_and (tem, op1, NULL_RTX);
2648   adj_operand
2649     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2650                      adj_operand);
2651
2652   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2653                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2654                       NULL_RTX, 0);
2655   tem = expand_and (tem, op0, NULL_RTX);
2656   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2657                           target);
2658
2659   return target;
2660 }
2661
2662 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2663    in TARGET if that is convenient, and return where the result is.  If the
2664    operation can not be performed, 0 is returned.
2665
2666    MODE is the mode of operation and result.
2667
2668    UNSIGNEDP nonzero means unsigned multiply.
2669
2670    MAX_COST is the total allowed cost for the expanded RTL.  */
2671
2672 rtx
2673 expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
2674      enum machine_mode mode;
2675      register rtx op0, target;
2676      unsigned HOST_WIDE_INT cnst1;
2677      int unsignedp;
2678      int max_cost;
2679 {
2680   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2681   optab mul_highpart_optab;
2682   optab moptab;
2683   rtx tem;
2684   int size = GET_MODE_BITSIZE (mode);
2685   rtx op1, wide_op1;
2686
2687   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2688   if (size > HOST_BITS_PER_WIDE_INT)
2689     abort ();
2690
2691   op1 = GEN_INT (cnst1);
2692
2693   if (GET_MODE_BITSIZE (wider_mode) <= HOST_BITS_PER_INT)
2694     wide_op1 = op1;
2695   else
2696     wide_op1
2697       = immed_double_const (cnst1,
2698                             (unsignedp
2699                              ? (HOST_WIDE_INT) 0
2700                              : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2701                             wider_mode);
2702
2703   /* expand_mult handles constant multiplication of word_mode
2704      or narrower.  It does a poor job for large modes.  */
2705   if (size < BITS_PER_WORD
2706       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2707     {
2708       /* We have to do this, since expand_binop doesn't do conversion for
2709          multiply.  Maybe change expand_binop to handle widening multiply?  */
2710       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2711
2712       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, unsignedp);
2713       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2714                           build_int_2 (size, 0), NULL_RTX, 1);
2715       return convert_modes (mode, wider_mode, tem, unsignedp);
2716     }
2717
2718   if (target == 0)
2719     target = gen_reg_rtx (mode);
2720
2721   /* Firstly, try using a multiplication insn that only generates the needed
2722      high part of the product, and in the sign flavor of unsignedp.  */
2723   if (mul_highpart_cost[(int) mode] < max_cost)
2724     {
2725       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2726       target = expand_binop (mode, mul_highpart_optab,
2727                              op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2728       if (target)
2729         return target;
2730     }
2731
2732   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2733      Need to adjust the result after the multiplication.  */
2734   if (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost < max_cost)
2735     {
2736       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2737       target = expand_binop (mode, mul_highpart_optab,
2738                              op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2739       if (target)
2740         /* We used the wrong signedness.  Adjust the result.  */
2741         return expand_mult_highpart_adjust (mode, target, op0,
2742                                             op1, target, unsignedp);
2743     }
2744
2745   /* Try widening multiplication.  */
2746   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2747   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2748       && mul_widen_cost[(int) wider_mode] < max_cost)
2749     {
2750       op1 = force_reg (mode, op1);
2751       goto try;
2752     }
2753
2754   /* Try widening the mode and perform a non-widening multiplication.  */
2755   moptab = smul_optab;
2756   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2757       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2758     {
2759       op1 = wide_op1;
2760       goto try;
2761     }
2762
2763   /* Try widening multiplication of opposite signedness, and adjust.  */
2764   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2765   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2766       && (mul_widen_cost[(int) wider_mode]
2767           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2768     {
2769       rtx regop1 = force_reg (mode, op1);
2770       tem = expand_binop (wider_mode, moptab, op0, regop1,
2771                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2772       if (tem != 0)
2773         {
2774           /* Extract the high half of the just generated product.  */
2775           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2776                               build_int_2 (size, 0), NULL_RTX, 1);
2777           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2778           /* We used the wrong signedness.  Adjust the result.  */
2779           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2780                                               target, unsignedp);
2781         }
2782     }
2783
2784   return 0;
2785
2786  try:
2787   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2788   tem = expand_binop (wider_mode, moptab, op0, op1,
2789                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2790   if (tem == 0)
2791     return 0;
2792
2793   /* Extract the high half of the just generated product.  */
2794   if (mode == word_mode)
2795     {
2796       return gen_highpart (mode, tem);
2797     }
2798   else
2799     {
2800       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2801                           build_int_2 (size, 0), NULL_RTX, 1);
2802       return convert_modes (mode, wider_mode, tem, unsignedp);
2803     }
2804 }
2805 \f
2806 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2807    if that is convenient, and returning where the result is.
2808    You may request either the quotient or the remainder as the result;
2809    specify REM_FLAG nonzero to get the remainder.
2810
2811    CODE is the expression code for which kind of division this is;
2812    it controls how rounding is done.  MODE is the machine mode to use.
2813    UNSIGNEDP nonzero means do unsigned division.  */
2814
2815 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2816    and then correct it by or'ing in missing high bits
2817    if result of ANDI is nonzero.
2818    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2819    This could optimize to a bfexts instruction.
2820    But C doesn't use these operations, so their optimizations are
2821    left for later.  */
2822
2823 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2824
2825 rtx
2826 expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
2827      int rem_flag;
2828      enum tree_code code;
2829      enum machine_mode mode;
2830      register rtx op0, op1, target;
2831      int unsignedp;
2832 {
2833   enum machine_mode compute_mode;
2834   register rtx tquotient;
2835   rtx quotient = 0, remainder = 0;
2836   rtx last;
2837   int size;
2838   rtx insn, set;
2839   optab optab1, optab2;
2840   int op1_is_constant, op1_is_pow2;
2841   int max_cost, extra_cost;
2842   static HOST_WIDE_INT last_div_const = 0;
2843
2844   op1_is_constant = GET_CODE (op1) == CONST_INT;
2845   op1_is_pow2 = (op1_is_constant
2846                  && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2847                       || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1))))));
2848
2849   /*
2850      This is the structure of expand_divmod:
2851
2852      First comes code to fix up the operands so we can perform the operations
2853      correctly and efficiently.
2854
2855      Second comes a switch statement with code specific for each rounding mode.
2856      For some special operands this code emits all RTL for the desired
2857      operation, for other cases, it generates only a quotient and stores it in
2858      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2859      to indicate that it has not done anything.
2860
2861      Last comes code that finishes the operation.  If QUOTIENT is set and
2862      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2863      QUOTIENT is not set, it is computed using trunc rounding.
2864
2865      We try to generate special code for division and remainder when OP1 is a
2866      constant.  If |OP1| = 2**n we can use shifts and some other fast
2867      operations.  For other values of OP1, we compute a carefully selected
2868      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2869      by m.
2870
2871      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2872      half of the product.  Different strategies for generating the product are
2873      implemented in expand_mult_highpart.
2874
2875      If what we actually want is the remainder, we generate that by another
2876      by-constant multiplication and a subtraction.  */
2877
2878   /* We shouldn't be called with OP1 == const1_rtx, but some of the
2879      code below will malfunction if we are, so check here and handle
2880      the special case if so.  */
2881   if (op1 == const1_rtx)
2882     return rem_flag ? const0_rtx : op0;
2883
2884   if (target
2885       /* Don't use the function value register as a target
2886          since we have to read it as well as write it,
2887          and function-inlining gets confused by this.  */
2888       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
2889           /* Don't clobber an operand while doing a multi-step calculation.  */
2890           || ((rem_flag || op1_is_constant)
2891               && (reg_mentioned_p (target, op0)
2892                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
2893           || reg_mentioned_p (target, op1)
2894           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
2895     target = 0;
2896
2897   /* Get the mode in which to perform this computation.  Normally it will
2898      be MODE, but sometimes we can't do the desired operation in MODE.
2899      If so, pick a wider mode in which we can do the operation.  Convert
2900      to that mode at the start to avoid repeated conversions.
2901
2902      First see what operations we need.  These depend on the expression
2903      we are evaluating.  (We assume that divxx3 insns exist under the
2904      same conditions that modxx3 insns and that these insns don't normally
2905      fail.  If these assumptions are not correct, we may generate less
2906      efficient code in some cases.)
2907
2908      Then see if we find a mode in which we can open-code that operation
2909      (either a division, modulus, or shift).  Finally, check for the smallest
2910      mode for which we can do the operation with a library call.  */
2911
2912   /* We might want to refine this now that we have division-by-constant
2913      optimization.  Since expand_mult_highpart tries so many variants, it is
2914      not straightforward to generalize this.  Maybe we should make an array
2915      of possible modes in init_expmed?  Save this for GCC 2.7.  */
2916
2917   optab1 = (op1_is_pow2 ? (unsignedp ? lshr_optab : ashr_optab)
2918             : (unsignedp ? udiv_optab : sdiv_optab));
2919   optab2 = (op1_is_pow2 ? optab1 : (unsignedp ? udivmod_optab : sdivmod_optab));
2920
2921   for (compute_mode = mode; compute_mode != VOIDmode;
2922        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
2923     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
2924         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
2925       break;
2926
2927   if (compute_mode == VOIDmode)
2928     for (compute_mode = mode; compute_mode != VOIDmode;
2929          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
2930       if (optab1->handlers[(int) compute_mode].libfunc
2931           || optab2->handlers[(int) compute_mode].libfunc)
2932         break;
2933
2934   /* If we still couldn't find a mode, use MODE, but we'll probably abort
2935      in expand_binop.  */
2936   if (compute_mode == VOIDmode)
2937     compute_mode = mode;
2938
2939   if (target && GET_MODE (target) == compute_mode)
2940     tquotient = target;
2941   else
2942     tquotient = gen_reg_rtx (compute_mode);
2943
2944   size = GET_MODE_BITSIZE (compute_mode);
2945 #if 0
2946   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
2947      (mode), and thereby get better code when OP1 is a constant.  Do that
2948      later.  It will require going over all usages of SIZE below.  */
2949   size = GET_MODE_BITSIZE (mode);
2950 #endif
2951
2952   /* Only deduct something for a REM if the last divide done was
2953      for a different constant.   Then set the constant of the last
2954      divide.  */
2955   max_cost = div_cost[(int) compute_mode]
2956     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
2957                       && INTVAL (op1) == last_div_const)
2958        ? mul_cost[(int) compute_mode] + add_cost : 0);
2959
2960   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
2961
2962   /* Now convert to the best mode to use.  */
2963   if (compute_mode != mode)
2964     {
2965       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
2966       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
2967
2968       /* convert_modes may have placed op1 into a register, so we
2969          must recompute the following.  */
2970       op1_is_constant = GET_CODE (op1) == CONST_INT;
2971       op1_is_pow2 = (op1_is_constant
2972                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2973                           || (! unsignedp
2974                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
2975     }
2976
2977   /* If one of the operands is a volatile MEM, copy it into a register.  */
2978
2979   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
2980     op0 = force_reg (compute_mode, op0);
2981   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
2982     op1 = force_reg (compute_mode, op1);
2983
2984   /* If we need the remainder or if OP1 is constant, we need to
2985      put OP0 in a register in case it has any queued subexpressions.  */
2986   if (rem_flag || op1_is_constant)
2987     op0 = force_reg (compute_mode, op0);
2988
2989   last = get_last_insn ();
2990
2991   /* Promote floor rounding to trunc rounding for unsigned operations.  */
2992   if (unsignedp)
2993     {
2994       if (code == FLOOR_DIV_EXPR)
2995         code = TRUNC_DIV_EXPR;
2996       if (code == FLOOR_MOD_EXPR)
2997         code = TRUNC_MOD_EXPR;
2998       if (code == EXACT_DIV_EXPR && op1_is_pow2)
2999         code = TRUNC_DIV_EXPR;
3000     }
3001
3002   if (op1 != const0_rtx)
3003     switch (code)
3004       {
3005       case TRUNC_MOD_EXPR:
3006       case TRUNC_DIV_EXPR:
3007         if (op1_is_constant)
3008           {
3009             if (unsignedp)
3010               {
3011                 unsigned HOST_WIDE_INT mh, ml;
3012                 int pre_shift, post_shift;
3013                 int dummy;
3014                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3015
3016                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3017                   {
3018                     pre_shift = floor_log2 (d);
3019                     if (rem_flag)
3020                       {
3021                         remainder
3022                           = expand_binop (compute_mode, and_optab, op0,
3023                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3024                                           remainder, 1,
3025                                           OPTAB_LIB_WIDEN);
3026                         if (remainder)
3027                           return gen_lowpart (mode, remainder);
3028                       }
3029                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3030                                              build_int_2 (pre_shift, 0),
3031                                              tquotient, 1);
3032                   }
3033                 else if (size <= HOST_BITS_PER_WIDE_INT)
3034                   {
3035                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3036                       {
3037                         /* Most significant bit of divisor is set; emit an scc
3038                            insn.  */
3039                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3040                                                     compute_mode, 1, 1);
3041                         if (quotient == 0)
3042                           goto fail1;
3043                       }
3044                     else
3045                       {
3046                         /* Find a suitable multiplier and right shift count
3047                            instead of multiplying with D.  */
3048
3049                         mh = choose_multiplier (d, size, size,
3050                                                 &ml, &post_shift, &dummy);
3051
3052                         /* If the suggested multiplier is more than SIZE bits,
3053                            we can do better for even divisors, using an
3054                            initial right shift.  */
3055                         if (mh != 0 && (d & 1) == 0)
3056                           {
3057                             pre_shift = floor_log2 (d & -d);
3058                             mh = choose_multiplier (d >> pre_shift, size,
3059                                                     size - pre_shift,
3060                                                     &ml, &post_shift, &dummy);
3061                             if (mh)
3062                               abort ();
3063                           }
3064                         else
3065                           pre_shift = 0;
3066
3067                         if (mh != 0)
3068                           {
3069                             rtx t1, t2, t3, t4;
3070
3071                             extra_cost = (shift_cost[post_shift - 1]
3072                                           + shift_cost[1] + 2 * add_cost);
3073                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3074                                                        NULL_RTX, 1,
3075                                                        max_cost - extra_cost);
3076                             if (t1 == 0)
3077                               goto fail1;
3078                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3079                                                                op0, t1),
3080                                                 NULL_RTX);
3081                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3082                                                build_int_2 (1, 0), NULL_RTX,1);
3083                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3084                                                               t1, t3),
3085                                                 NULL_RTX);
3086                             quotient
3087                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3088                                               build_int_2 (post_shift - 1, 0),
3089                                               tquotient, 1);
3090                           }
3091                         else
3092                           {
3093                             rtx t1, t2;
3094
3095                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3096                                                build_int_2 (pre_shift, 0),
3097                                                NULL_RTX, 1);
3098                             extra_cost = (shift_cost[pre_shift]
3099                                           + shift_cost[post_shift]);
3100                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3101                                                        NULL_RTX, 1,
3102                                                        max_cost - extra_cost);
3103                             if (t2 == 0)
3104                               goto fail1;
3105                             quotient
3106                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3107                                               build_int_2 (post_shift, 0),
3108                                               tquotient, 1);
3109                           }
3110                       }
3111                   }
3112                 else            /* Too wide mode to use tricky code */
3113                   break;
3114
3115                 insn = get_last_insn ();
3116                 if (insn != last
3117                     && (set = single_set (insn)) != 0
3118                     && SET_DEST (set) == quotient)
3119                   REG_NOTES (insn)
3120                     = gen_rtx_EXPR_LIST (REG_EQUAL,
3121                                          gen_rtx_UDIV (compute_mode, op0, op1),
3122                                          REG_NOTES (insn));
3123               }
3124             else                /* TRUNC_DIV, signed */
3125               {
3126                 unsigned HOST_WIDE_INT ml;
3127                 int lgup, post_shift;
3128                 HOST_WIDE_INT d = INTVAL (op1);
3129                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3130
3131                 /* n rem d = n rem -d */
3132                 if (rem_flag && d < 0)
3133                   {
3134                     d = abs_d;
3135                     op1 = GEN_INT (abs_d);
3136                   }
3137
3138                 if (d == 1)
3139                   quotient = op0;
3140                 else if (d == -1)
3141                   quotient = expand_unop (compute_mode, neg_optab, op0,
3142                                           tquotient, 0);
3143                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3144                   {
3145                     /* This case is not handled correctly below.  */
3146                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3147                                                 compute_mode, 1, 1);
3148                     if (quotient == 0)
3149                       goto fail1;
3150                   }
3151                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3152                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap))
3153                   ;
3154                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3155                   {
3156                     lgup = floor_log2 (abs_d);
3157                     if (abs_d != 2 && BRANCH_COST < 3)
3158                       {
3159                         rtx label = gen_label_rtx ();
3160                         rtx t1;
3161
3162                         t1 = copy_to_mode_reg (compute_mode, op0);
3163                         do_cmp_and_jump (t1, const0_rtx, GE,
3164                                          compute_mode, label);
3165                         expand_inc (t1, GEN_INT (abs_d - 1));
3166                         emit_label (label);
3167                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3168                                                  build_int_2 (lgup, 0),
3169                                                  tquotient, 0);
3170                       }
3171                     else
3172                       {
3173                         rtx t1, t2, t3;
3174                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3175                                            build_int_2 (size - 1, 0),
3176                                            NULL_RTX, 0);
3177                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3178                                            build_int_2 (size - lgup, 0),
3179                                            NULL_RTX, 1);
3180                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3181                                                           op0, t2),
3182                                             NULL_RTX);
3183                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3184                                                  build_int_2 (lgup, 0),
3185                                                  tquotient, 0);
3186                       }
3187
3188                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3189                        the quotient.  */
3190                     if (d < 0)
3191                       {
3192                         insn = get_last_insn ();
3193                         if (insn != last
3194                             && (set = single_set (insn)) != 0
3195                             && SET_DEST (set) == quotient)
3196                           REG_NOTES (insn)
3197                             = gen_rtx_EXPR_LIST (REG_EQUAL,
3198                                                  gen_rtx_DIV (compute_mode,
3199                                                               op0,
3200                                                               GEN_INT (abs_d)),
3201                                        REG_NOTES (insn));
3202
3203                         quotient = expand_unop (compute_mode, neg_optab,
3204                                                 quotient, quotient, 0);
3205                       }
3206                   }
3207                 else if (size <= HOST_BITS_PER_WIDE_INT)
3208                   {
3209                     choose_multiplier (abs_d, size, size - 1,
3210                                        &ml, &post_shift, &lgup);
3211                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3212                       {
3213                         rtx t1, t2, t3;
3214
3215                         extra_cost = (shift_cost[post_shift]
3216                                       + shift_cost[size - 1] + add_cost);
3217                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3218                                                    NULL_RTX, 0,
3219                                                    max_cost - extra_cost);
3220                         if (t1 == 0)
3221                           goto fail1;
3222                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3223                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3224                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3225                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3226                         if (d < 0)
3227                           quotient = force_operand (gen_rtx_MINUS (compute_mode, t3, t2),
3228                                                     tquotient);
3229                         else
3230                           quotient = force_operand (gen_rtx_MINUS (compute_mode, t2, t3),
3231                                                     tquotient);
3232                       }
3233                     else
3234                       {
3235                         rtx t1, t2, t3, t4;
3236
3237                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3238                         extra_cost = (shift_cost[post_shift]
3239                                       + shift_cost[size - 1] + 2 * add_cost);
3240                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3241                                                    NULL_RTX, 0,
3242                                                    max_cost - extra_cost);
3243                         if (t1 == 0)
3244                           goto fail1;
3245                         t2 = force_operand (gen_rtx_PLUS (compute_mode, t1, op0),
3246                                             NULL_RTX);
3247                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3248                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3249                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3250                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3251                         if (d < 0)
3252                           quotient = force_operand (gen_rtx_MINUS (compute_mode, t4, t3),
3253                                                     tquotient);
3254                         else
3255                           quotient = force_operand (gen_rtx_MINUS (compute_mode, t3, t4),
3256                                                     tquotient);
3257                       }
3258                   }
3259                 else            /* Too wide mode to use tricky code */
3260                   break;
3261
3262                 insn = get_last_insn ();
3263                 if (insn != last
3264                     && (set = single_set (insn)) != 0
3265                     && SET_DEST (set) == quotient)
3266                   REG_NOTES (insn)
3267                     = gen_rtx_EXPR_LIST (REG_EQUAL,
3268                                          gen_rtx_DIV (compute_mode, op0, op1),
3269                                          REG_NOTES (insn));
3270               }
3271             break;
3272           }
3273       fail1:
3274         delete_insns_since (last);
3275         break;
3276
3277       case FLOOR_DIV_EXPR:
3278       case FLOOR_MOD_EXPR:
3279       /* We will come here only for signed operations.  */
3280         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3281           {
3282             unsigned HOST_WIDE_INT mh, ml;
3283             int pre_shift, lgup, post_shift;
3284             HOST_WIDE_INT d = INTVAL (op1);
3285
3286             if (d > 0)
3287               {
3288                 /* We could just as easily deal with negative constants here,
3289                    but it does not seem worth the trouble for GCC 2.6.  */
3290                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3291                   {
3292                     pre_shift = floor_log2 (d);
3293                     if (rem_flag)
3294                       {
3295                         remainder = expand_binop (compute_mode, and_optab, op0,
3296                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3297                                                   remainder, 0, OPTAB_LIB_WIDEN);
3298                         if (remainder)
3299                           return gen_lowpart (mode, remainder);
3300                       }
3301                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3302                                              build_int_2 (pre_shift, 0),
3303                                              tquotient, 0);
3304                   }
3305                 else
3306                   {
3307                     rtx t1, t2, t3, t4;
3308
3309                     mh = choose_multiplier (d, size, size - 1,
3310                                             &ml, &post_shift, &lgup);
3311                     if (mh)
3312                       abort ();
3313
3314                     t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3315                                        build_int_2 (size - 1, 0), NULL_RTX, 0);
3316                     t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3317                                        NULL_RTX, 0, OPTAB_WIDEN);
3318                     extra_cost = (shift_cost[post_shift]
3319                                   + shift_cost[size - 1] + 2 * add_cost);
3320                     t3 = expand_mult_highpart (compute_mode, t2, ml,
3321                                                NULL_RTX, 1,
3322                                                max_cost - extra_cost);
3323                     if (t3 != 0)
3324                       {
3325                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3326                                            build_int_2 (post_shift, 0),
3327                                            NULL_RTX, 1);
3328                         quotient = expand_binop (compute_mode, xor_optab,
3329                                                  t4, t1, tquotient, 0,
3330                                                  OPTAB_WIDEN);
3331                       }
3332                   }
3333               }
3334             else
3335               {
3336                 rtx nsign, t1, t2, t3, t4;
3337                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3338                                                   op0, constm1_rtx), NULL_RTX);
3339                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3340                                    0, OPTAB_WIDEN);
3341                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3342                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3343                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3344                                     NULL_RTX);
3345                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3346                                     NULL_RTX, 0);
3347                 if (t4)
3348                   {
3349                     rtx t5;
3350                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3351                                       NULL_RTX, 0);
3352                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3353                                                             t4, t5),
3354                                               tquotient);
3355                   }
3356               }
3357           }
3358
3359         if (quotient != 0)
3360           break;
3361         delete_insns_since (last);
3362
3363         /* Try using an instruction that produces both the quotient and
3364            remainder, using truncation.  We can easily compensate the quotient
3365            or remainder to get floor rounding, once we have the remainder.
3366            Notice that we compute also the final remainder value here,
3367            and return the result right away.  */
3368         if (target == 0 || GET_MODE (target) != compute_mode)
3369           target = gen_reg_rtx (compute_mode);
3370
3371         if (rem_flag)
3372           {
3373             remainder
3374               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3375             quotient = gen_reg_rtx (compute_mode);
3376           }
3377         else
3378           {
3379             quotient
3380               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3381             remainder = gen_reg_rtx (compute_mode);
3382           }
3383
3384         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3385                                  quotient, remainder, 0))
3386           {
3387             /* This could be computed with a branch-less sequence.
3388                Save that for later.  */
3389             rtx tem;
3390             rtx label = gen_label_rtx ();
3391             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3392             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3393                                 NULL_RTX, 0, OPTAB_WIDEN);
3394             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3395             expand_dec (quotient, const1_rtx);
3396             expand_inc (remainder, op1);
3397             emit_label (label);
3398             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3399           }
3400
3401         /* No luck with division elimination or divmod.  Have to do it
3402            by conditionally adjusting op0 *and* the result.  */
3403         {
3404           rtx label1, label2, label3, label4, label5;
3405           rtx adjusted_op0;
3406           rtx tem;
3407
3408           quotient = gen_reg_rtx (compute_mode);
3409           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3410           label1 = gen_label_rtx ();
3411           label2 = gen_label_rtx ();
3412           label3 = gen_label_rtx ();
3413           label4 = gen_label_rtx ();
3414           label5 = gen_label_rtx ();
3415           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3416           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3417           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3418                               quotient, 0, OPTAB_LIB_WIDEN);
3419           if (tem != quotient)
3420             emit_move_insn (quotient, tem);
3421           emit_jump_insn (gen_jump (label5));
3422           emit_barrier ();
3423           emit_label (label1);
3424           expand_inc (adjusted_op0, const1_rtx);
3425           emit_jump_insn (gen_jump (label4));
3426           emit_barrier ();
3427           emit_label (label2);
3428           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3429           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3430                               quotient, 0, OPTAB_LIB_WIDEN);
3431           if (tem != quotient)
3432             emit_move_insn (quotient, tem);
3433           emit_jump_insn (gen_jump (label5));
3434           emit_barrier ();
3435           emit_label (label3);
3436           expand_dec (adjusted_op0, const1_rtx);
3437           emit_label (label4);
3438           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3439                               quotient, 0, OPTAB_LIB_WIDEN);
3440           if (tem != quotient)
3441             emit_move_insn (quotient, tem);
3442           expand_dec (quotient, const1_rtx);
3443           emit_label (label5);
3444         }
3445         break;
3446
3447       case CEIL_DIV_EXPR:
3448       case CEIL_MOD_EXPR:
3449         if (unsignedp)
3450           {
3451             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3452               {
3453                 rtx t1, t2, t3;
3454                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3455                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3456                                    build_int_2 (floor_log2 (d), 0),
3457                                    tquotient, 1);
3458                 t2 = expand_binop (compute_mode, and_optab, op0,
3459                                    GEN_INT (d - 1),
3460                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3461                 t3 = gen_reg_rtx (compute_mode);
3462                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3463                                       compute_mode, 1, 1);
3464                 if (t3 == 0)
3465                   {
3466                     rtx lab;
3467                     lab = gen_label_rtx ();
3468                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3469                     expand_inc (t1, const1_rtx);
3470                     emit_label (lab);
3471                     quotient = t1;
3472                   }
3473                 else
3474                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3475                                                           t1, t3),
3476                                             tquotient);
3477                 break;
3478               }
3479
3480             /* Try using an instruction that produces both the quotient and
3481                remainder, using truncation.  We can easily compensate the
3482                quotient or remainder to get ceiling rounding, once we have the
3483                remainder.  Notice that we compute also the final remainder
3484                value here, and return the result right away.  */
3485             if (target == 0 || GET_MODE (target) != compute_mode)
3486               target = gen_reg_rtx (compute_mode);
3487
3488             if (rem_flag)
3489               {
3490                 remainder = (GET_CODE (target) == REG
3491                              ? target : gen_reg_rtx (compute_mode));
3492                 quotient = gen_reg_rtx (compute_mode);
3493               }
3494             else
3495               {
3496                 quotient = (GET_CODE (target) == REG
3497                             ? target : gen_reg_rtx (compute_mode));
3498                 remainder = gen_reg_rtx (compute_mode);
3499               }
3500
3501             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3502                                      remainder, 1))
3503               {
3504                 /* This could be computed with a branch-less sequence.
3505                    Save that for later.  */
3506                 rtx label = gen_label_rtx ();
3507                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3508                                  compute_mode, label);
3509                 expand_inc (quotient, const1_rtx);
3510                 expand_dec (remainder, op1);
3511                 emit_label (label);
3512                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3513               }
3514
3515             /* No luck with division elimination or divmod.  Have to do it
3516                by conditionally adjusting op0 *and* the result.  */
3517             {
3518               rtx label1, label2;
3519               rtx adjusted_op0, tem;
3520
3521               quotient = gen_reg_rtx (compute_mode);
3522               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3523               label1 = gen_label_rtx ();
3524               label2 = gen_label_rtx ();
3525               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3526                                compute_mode, label1);
3527               emit_move_insn  (quotient, const0_rtx);
3528               emit_jump_insn (gen_jump (label2));
3529               emit_barrier ();
3530               emit_label (label1);
3531               expand_dec (adjusted_op0, const1_rtx);
3532               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3533                                   quotient, 1, OPTAB_LIB_WIDEN);
3534               if (tem != quotient)
3535                 emit_move_insn (quotient, tem);
3536               expand_inc (quotient, const1_rtx);
3537               emit_label (label2);
3538             }
3539           }
3540         else /* signed */
3541           {
3542             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3543                 && INTVAL (op1) >= 0)
3544               {
3545                 /* This is extremely similar to the code for the unsigned case
3546                    above.  For 2.7 we should merge these variants, but for
3547                    2.6.1 I don't want to touch the code for unsigned since that
3548                    get used in C.  The signed case will only be used by other
3549                    languages (Ada).  */
3550
3551                 rtx t1, t2, t3;
3552                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3553                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3554                                    build_int_2 (floor_log2 (d), 0),
3555                                    tquotient, 0);
3556                 t2 = expand_binop (compute_mode, and_optab, op0,
3557                                    GEN_INT (d - 1),
3558                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3559                 t3 = gen_reg_rtx (compute_mode);
3560                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3561                                       compute_mode, 1, 1);
3562                 if (t3 == 0)
3563                   {
3564                     rtx lab;
3565                     lab = gen_label_rtx ();
3566                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3567                     expand_inc (t1, const1_rtx);
3568                     emit_label (lab);
3569                     quotient = t1;
3570                   }
3571                 else
3572                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3573                                                           t1, t3),
3574                                             tquotient);
3575                 break;
3576               }
3577
3578             /* Try using an instruction that produces both the quotient and
3579                remainder, using truncation.  We can easily compensate the
3580                quotient or remainder to get ceiling rounding, once we have the
3581                remainder.  Notice that we compute also the final remainder
3582                value here, and return the result right away.  */
3583             if (target == 0 || GET_MODE (target) != compute_mode)
3584               target = gen_reg_rtx (compute_mode);
3585             if (rem_flag)
3586               {
3587                 remainder= (GET_CODE (target) == REG
3588                             ? target : gen_reg_rtx (compute_mode));
3589                 quotient = gen_reg_rtx (compute_mode);
3590               }
3591             else
3592               {
3593                 quotient = (GET_CODE (target) == REG
3594                             ? target : gen_reg_rtx (compute_mode));
3595                 remainder = gen_reg_rtx (compute_mode);
3596               }
3597
3598             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3599                                      remainder, 0))
3600               {
3601                 /* This could be computed with a branch-less sequence.
3602                    Save that for later.  */
3603                 rtx tem;
3604                 rtx label = gen_label_rtx ();
3605                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3606                                  compute_mode, label);
3607                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3608                                     NULL_RTX, 0, OPTAB_WIDEN);
3609                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3610                 expand_inc (quotient, const1_rtx);
3611                 expand_dec (remainder, op1);
3612                 emit_label (label);
3613                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3614               }
3615
3616             /* No luck with division elimination or divmod.  Have to do it
3617                by conditionally adjusting op0 *and* the result.  */
3618             {
3619               rtx label1, label2, label3, label4, label5;
3620               rtx adjusted_op0;
3621               rtx tem;
3622
3623               quotient = gen_reg_rtx (compute_mode);
3624               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3625               label1 = gen_label_rtx ();
3626               label2 = gen_label_rtx ();
3627               label3 = gen_label_rtx ();
3628               label4 = gen_label_rtx ();
3629               label5 = gen_label_rtx ();
3630               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3631               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3632                                compute_mode, label1);
3633               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3634                                   quotient, 0, OPTAB_LIB_WIDEN);
3635               if (tem != quotient)
3636                 emit_move_insn (quotient, tem);
3637               emit_jump_insn (gen_jump (label5));
3638               emit_barrier ();
3639               emit_label (label1);
3640               expand_dec (adjusted_op0, const1_rtx);
3641               emit_jump_insn (gen_jump (label4));
3642               emit_barrier ();
3643               emit_label (label2);
3644               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3645                                compute_mode, label3);
3646               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3647                                   quotient, 0, OPTAB_LIB_WIDEN);
3648               if (tem != quotient)
3649                 emit_move_insn (quotient, tem);
3650               emit_jump_insn (gen_jump (label5));
3651               emit_barrier ();
3652               emit_label (label3);
3653               expand_inc (adjusted_op0, const1_rtx);
3654               emit_label (label4);
3655               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3656                                   quotient, 0, OPTAB_LIB_WIDEN);
3657               if (tem != quotient)
3658                 emit_move_insn (quotient, tem);
3659               expand_inc (quotient, const1_rtx);
3660               emit_label (label5);
3661             }
3662           }
3663         break;
3664
3665       case EXACT_DIV_EXPR:
3666         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3667           {
3668             HOST_WIDE_INT d = INTVAL (op1);
3669             unsigned HOST_WIDE_INT ml;
3670             int post_shift;
3671             rtx t1;
3672
3673             post_shift = floor_log2 (d & -d);
3674             ml = invert_mod2n (d >> post_shift, size);
3675             t1 = expand_mult (compute_mode, op0, GEN_INT (ml), NULL_RTX,
3676                               unsignedp);
3677             quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3678                                      build_int_2 (post_shift, 0),
3679                                      NULL_RTX, unsignedp);
3680
3681             insn = get_last_insn ();
3682             REG_NOTES (insn)
3683               = gen_rtx_EXPR_LIST (REG_EQUAL,
3684                                    gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3685                                                    compute_mode,
3686                                                    op0, op1),
3687                                    REG_NOTES (insn));
3688           }
3689         break;
3690
3691       case ROUND_DIV_EXPR:
3692       case ROUND_MOD_EXPR:
3693         if (unsignedp)
3694           {
3695             rtx tem;
3696             rtx label;
3697             label = gen_label_rtx ();
3698             quotient = gen_reg_rtx (compute_mode);
3699             remainder = gen_reg_rtx (compute_mode);
3700             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3701               {
3702                 rtx tem;
3703                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3704                                          quotient, 1, OPTAB_LIB_WIDEN);
3705                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3706                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3707                                           remainder, 1, OPTAB_LIB_WIDEN);
3708               }
3709             tem = plus_constant (op1, -1);
3710             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3711                                 build_int_2 (1, 0), NULL_RTX, 1);
3712             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3713             expand_inc (quotient, const1_rtx);
3714             expand_dec (remainder, op1);
3715             emit_label (label);
3716           }
3717         else
3718           {
3719             rtx abs_rem, abs_op1, tem, mask;
3720             rtx label;
3721             label = gen_label_rtx ();
3722             quotient = gen_reg_rtx (compute_mode);
3723             remainder = gen_reg_rtx (compute_mode);
3724             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3725               {
3726                 rtx tem;
3727                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3728                                          quotient, 0, OPTAB_LIB_WIDEN);
3729                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3730                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3731                                           remainder, 0, OPTAB_LIB_WIDEN);
3732               }
3733             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 0, 0);
3734             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 0, 0);
3735             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3736                                 build_int_2 (1, 0), NULL_RTX, 1);
3737             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3738             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3739                                 NULL_RTX, 0, OPTAB_WIDEN);
3740             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3741                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3742             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3743                                 NULL_RTX, 0, OPTAB_WIDEN);
3744             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3745                                 NULL_RTX, 0, OPTAB_WIDEN);
3746             expand_inc (quotient, tem);
3747             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3748                                 NULL_RTX, 0, OPTAB_WIDEN);
3749             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3750                                 NULL_RTX, 0, OPTAB_WIDEN);
3751             expand_dec (remainder, tem);
3752             emit_label (label);
3753           }
3754         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3755
3756       default:
3757         abort ();
3758       }
3759
3760   if (quotient == 0)
3761     {
3762       if (target && GET_MODE (target) != compute_mode)
3763         target = 0;
3764
3765       if (rem_flag)
3766         {
3767           /* Try to produce the remainder without producing the quotient.
3768              If we seem to have a divmod patten that does not require widening,
3769              don't try windening here.  We should really have an WIDEN argument
3770              to expand_twoval_binop, since what we'd really like to do here is
3771              1) try a mod insn in compute_mode
3772              2) try a divmod insn in compute_mode
3773              3) try a div insn in compute_mode and multiply-subtract to get
3774                 remainder
3775              4) try the same things with widening allowed.  */
3776           remainder
3777             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3778                                  op0, op1, target,
3779                                  unsignedp,
3780                                  ((optab2->handlers[(int) compute_mode].insn_code
3781                                    != CODE_FOR_nothing)
3782                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
3783           if (remainder == 0)
3784             {
3785               /* No luck there.  Can we do remainder and divide at once
3786                  without a library call?  */
3787               remainder = gen_reg_rtx (compute_mode);
3788               if (! expand_twoval_binop ((unsignedp
3789                                           ? udivmod_optab
3790                                           : sdivmod_optab),
3791                                          op0, op1,
3792                                          NULL_RTX, remainder, unsignedp))
3793                 remainder = 0;
3794             }
3795
3796           if (remainder)
3797             return gen_lowpart (mode, remainder);
3798         }
3799
3800       /* Produce the quotient.  Try a quotient insn, but not a library call.
3801          If we have a divmod in this mode, use it in preference to widening
3802          the div (for this test we assume it will not fail). Note that optab2
3803          is set to the one of the two optabs that the call below will use.  */
3804       quotient
3805         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3806                              op0, op1, rem_flag ? NULL_RTX : target,
3807                              unsignedp,
3808                              ((optab2->handlers[(int) compute_mode].insn_code
3809                                != CODE_FOR_nothing)
3810                               ? OPTAB_DIRECT : OPTAB_WIDEN));
3811
3812       if (quotient == 0)
3813         {
3814           /* No luck there.  Try a quotient-and-remainder insn,
3815              keeping the quotient alone.  */
3816           quotient = gen_reg_rtx (compute_mode);
3817           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3818                                      op0, op1,
3819                                      quotient, NULL_RTX, unsignedp))
3820             {
3821               quotient = 0;
3822               if (! rem_flag)
3823                 /* Still no luck.  If we are not computing the remainder,
3824                    use a library call for the quotient.  */
3825                 quotient = sign_expand_binop (compute_mode,
3826                                               udiv_optab, sdiv_optab,
3827                                               op0, op1, target,
3828                                               unsignedp, OPTAB_LIB_WIDEN);
3829             }
3830         }
3831     }
3832
3833   if (rem_flag)
3834     {
3835       if (target && GET_MODE (target) != compute_mode)
3836         target = 0;
3837
3838       if (quotient == 0)
3839         /* No divide instruction either.  Use library for remainder.  */
3840         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3841                                        op0, op1, target,
3842                                        unsignedp, OPTAB_LIB_WIDEN);
3843       else
3844         {
3845           /* We divided.  Now finish doing X - Y * (X / Y).  */
3846           remainder = expand_mult (compute_mode, quotient, op1,
3847                                    NULL_RTX, unsignedp);
3848           remainder = expand_binop (compute_mode, sub_optab, op0,
3849                                     remainder, target, unsignedp,
3850                                     OPTAB_LIB_WIDEN);
3851         }
3852     }
3853
3854   return gen_lowpart (mode, rem_flag ? remainder : quotient);
3855 }
3856 \f
3857 /* Return a tree node with data type TYPE, describing the value of X.
3858    Usually this is an RTL_EXPR, if there is no obvious better choice.
3859    X may be an expression, however we only support those expressions
3860    generated by loop.c.   */
3861
3862 tree
3863 make_tree (type, x)
3864      tree type;
3865      rtx x;
3866 {
3867   tree t;
3868
3869   switch (GET_CODE (x))
3870     {
3871     case CONST_INT:
3872       t = build_int_2 (INTVAL (x),
3873                        (TREE_UNSIGNED (type)
3874                         && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
3875                        || INTVAL (x) >= 0 ? 0 : -1);
3876       TREE_TYPE (t) = type;
3877       return t;
3878
3879     case CONST_DOUBLE:
3880       if (GET_MODE (x) == VOIDmode)
3881         {
3882           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
3883           TREE_TYPE (t) = type;
3884         }
3885       else
3886         {
3887           REAL_VALUE_TYPE d;
3888
3889           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
3890           t = build_real (type, d);
3891         }
3892
3893       return t;
3894
3895     case PLUS:
3896       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3897                           make_tree (type, XEXP (x, 1))));
3898
3899     case MINUS:
3900       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3901                           make_tree (type, XEXP (x, 1))));
3902
3903     case NEG:
3904       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
3905
3906     case MULT:
3907       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
3908                           make_tree (type, XEXP (x, 1))));
3909
3910     case ASHIFT:
3911       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
3912                           make_tree (type, XEXP (x, 1))));
3913
3914     case LSHIFTRT:
3915       return fold (convert (type,
3916                             build (RSHIFT_EXPR, unsigned_type (type),
3917                                    make_tree (unsigned_type (type),
3918                                               XEXP (x, 0)),
3919                                    make_tree (type, XEXP (x, 1)))));
3920
3921     case ASHIFTRT:
3922       return fold (convert (type,
3923                             build (RSHIFT_EXPR, signed_type (type),
3924                                    make_tree (signed_type (type), XEXP (x, 0)),
3925                                    make_tree (type, XEXP (x, 1)))));
3926
3927     case DIV:
3928       if (TREE_CODE (type) != REAL_TYPE)
3929         t = signed_type (type);
3930       else
3931         t = type;
3932
3933       return fold (convert (type,
3934                             build (TRUNC_DIV_EXPR, t,
3935                                    make_tree (t, XEXP (x, 0)),
3936                                    make_tree (t, XEXP (x, 1)))));
3937     case UDIV:
3938       t = unsigned_type (type);
3939       return fold (convert (type,
3940                             build (TRUNC_DIV_EXPR, t,
3941                                    make_tree (t, XEXP (x, 0)),
3942                                    make_tree (t, XEXP (x, 1)))));
3943    default:
3944       t = make_node (RTL_EXPR);
3945       TREE_TYPE (t) = type;
3946       RTL_EXPR_RTL (t) = x;
3947       /* There are no insns to be output
3948          when this rtl_expr is used.  */
3949       RTL_EXPR_SEQUENCE (t) = 0;
3950       return t;
3951     }
3952 }
3953
3954 /* Return an rtx representing the value of X * MULT + ADD.
3955    TARGET is a suggestion for where to store the result (an rtx).
3956    MODE is the machine mode for the computation.
3957    X and MULT must have mode MODE.  ADD may have a different mode.
3958    So can X (defaults to same as MODE).
3959    UNSIGNEDP is non-zero to do unsigned multiplication.
3960    This may emit insns.  */
3961
3962 rtx
3963 expand_mult_add (x, target, mult, add, mode, unsignedp)
3964      rtx x, target, mult, add;
3965      enum machine_mode mode;
3966      int unsignedp;
3967 {
3968   tree type = type_for_mode (mode, unsignedp);
3969   tree add_type = (GET_MODE (add) == VOIDmode
3970                    ? type : type_for_mode (GET_MODE (add), unsignedp));
3971   tree result =  fold (build (PLUS_EXPR, type,
3972                               fold (build (MULT_EXPR, type,
3973                                            make_tree (type, x),
3974                                            make_tree (type, mult))),
3975                               make_tree (add_type, add)));
3976
3977   return expand_expr (result, target, VOIDmode, 0);
3978 }
3979 \f
3980 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
3981    and returning TARGET.
3982
3983    If TARGET is 0, a pseudo-register or constant is returned.  */
3984
3985 rtx
3986 expand_and (op0, op1, target)
3987      rtx op0, op1, target;
3988 {
3989   enum machine_mode mode = VOIDmode;
3990   rtx tem;
3991
3992   if (GET_MODE (op0) != VOIDmode)
3993     mode = GET_MODE (op0);
3994   else if (GET_MODE (op1) != VOIDmode)
3995     mode = GET_MODE (op1);
3996
3997   if (mode != VOIDmode)
3998     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
3999   else if (GET_CODE (op0) == CONST_INT && GET_CODE (op1) == CONST_INT)
4000     tem = GEN_INT (INTVAL (op0) & INTVAL (op1));
4001   else
4002     abort ();
4003
4004   if (target == 0)
4005     target = tem;
4006   else if (tem != target)
4007     emit_move_insn (target, tem);
4008   return target;
4009 }
4010 \f
4011 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4012    and storing in TARGET.  Normally return TARGET.
4013    Return 0 if that cannot be done.
4014
4015    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4016    it is VOIDmode, they cannot both be CONST_INT.
4017
4018    UNSIGNEDP is for the case where we have to widen the operands
4019    to perform the operation.  It says to use zero-extension.
4020
4021    NORMALIZEP is 1 if we should convert the result to be either zero
4022    or one.  Normalize is -1 if we should convert the result to be
4023    either zero or -1.  If NORMALIZEP is zero, the result will be left
4024    "raw" out of the scc insn.  */
4025
4026 rtx
4027 emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
4028      rtx target;
4029      enum rtx_code code;
4030      rtx op0, op1;
4031      enum machine_mode mode;
4032      int unsignedp;
4033      int normalizep;
4034 {
4035   rtx subtarget;
4036   enum insn_code icode;
4037   enum machine_mode compare_mode;
4038   enum machine_mode target_mode = GET_MODE (target);
4039   rtx tem;
4040   rtx last = get_last_insn ();
4041   rtx pattern, comparison;
4042
4043   /* If one operand is constant, make it the second one.  Only do this
4044      if the other operand is not constant as well.  */
4045
4046   if ((CONSTANT_P (op0) && ! CONSTANT_P (op1))
4047       || (GET_CODE (op0) == CONST_INT && GET_CODE (op1) != CONST_INT))
4048     {
4049       tem = op0;
4050       op0 = op1;
4051       op1 = tem;
4052       code = swap_condition (code);
4053     }
4054
4055   if (mode == VOIDmode)
4056     mode = GET_MODE (op0);
4057
4058   /* For some comparisons with 1 and -1, we can convert this to
4059      comparisons with zero.  This will often produce more opportunities for
4060      store-flag insns.  */
4061
4062   switch (code)
4063     {
4064     case LT:
4065       if (op1 == const1_rtx)
4066         op1 = const0_rtx, code = LE;
4067       break;
4068     case LE:
4069       if (op1 == constm1_rtx)
4070         op1 = const0_rtx, code = LT;
4071       break;
4072     case GE:
4073       if (op1 == const1_rtx)
4074         op1 = const0_rtx, code = GT;
4075       break;
4076     case GT:
4077       if (op1 == constm1_rtx)
4078         op1 = const0_rtx, code = GE;
4079       break;
4080     case GEU:
4081       if (op1 == const1_rtx)
4082         op1 = const0_rtx, code = NE;
4083       break;
4084     case LTU:
4085       if (op1 == const1_rtx)
4086         op1 = const0_rtx, code = EQ;
4087       break;
4088     default:
4089       break;
4090     }
4091
4092   /* From now on, we won't change CODE, so set ICODE now.  */
4093   icode = setcc_gen_code[(int) code];
4094
4095   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4096      complement of A (for GE) and shifting the sign bit to the low bit.  */
4097   if (op1 == const0_rtx && (code == LT || code == GE)
4098       && GET_MODE_CLASS (mode) == MODE_INT
4099       && (normalizep || STORE_FLAG_VALUE == 1
4100           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4101               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4102                   == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4103     {
4104       subtarget = target;
4105
4106       /* If the result is to be wider than OP0, it is best to convert it
4107          first.  If it is to be narrower, it is *incorrect* to convert it
4108          first.  */
4109       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4110         {
4111           op0 = protect_from_queue (op0, 0);
4112           op0 = convert_modes (target_mode, mode, op0, 0);
4113           mode = target_mode;
4114         }
4115
4116       if (target_mode != mode)
4117         subtarget = 0;
4118
4119       if (code == GE)
4120         op0 = expand_unop (mode, one_cmpl_optab, op0,
4121                            ((STORE_FLAG_VALUE == 1 || normalizep)
4122                             ? 0 : subtarget), 0);
4123
4124       if (STORE_FLAG_VALUE == 1 || normalizep)
4125         /* If we are supposed to produce a 0/1 value, we want to do
4126            a logical shift from the sign bit to the low-order bit; for
4127            a -1/0 value, we do an arithmetic shift.  */
4128         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4129                             size_int (GET_MODE_BITSIZE (mode) - 1),
4130                             subtarget, normalizep != -1);
4131
4132       if (mode != target_mode)
4133         op0 = convert_modes (target_mode, mode, op0, 0);
4134
4135       return op0;
4136     }
4137
4138   if (icode != CODE_FOR_nothing)
4139     {
4140       /* We think we may be able to do this with a scc insn.  Emit the
4141          comparison and then the scc insn.
4142
4143          compare_from_rtx may call emit_queue, which would be deleted below
4144          if the scc insn fails.  So call it ourselves before setting LAST.  */
4145
4146       emit_queue ();
4147       last = get_last_insn ();
4148
4149       comparison
4150         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
4151       if (GET_CODE (comparison) == CONST_INT)
4152         return (comparison == const0_rtx ? const0_rtx
4153                 : normalizep == 1 ? const1_rtx
4154                 : normalizep == -1 ? constm1_rtx
4155                 : const_true_rtx);
4156
4157       /* If the code of COMPARISON doesn't match CODE, something is
4158          wrong; we can no longer be sure that we have the operation.
4159          We could handle this case, but it should not happen.  */
4160
4161       if (GET_CODE (comparison) != code)
4162         abort ();
4163
4164       /* Get a reference to the target in the proper mode for this insn.  */
4165       compare_mode = insn_operand_mode[(int) icode][0];
4166       subtarget = target;
4167       if (preserve_subexpressions_p ()
4168           || ! (*insn_operand_predicate[(int) icode][0]) (subtarget, compare_mode))
4169         subtarget = gen_reg_rtx (compare_mode);
4170
4171       pattern = GEN_FCN (icode) (subtarget);
4172       if (pattern)
4173         {
4174           emit_insn (pattern);
4175
4176           /* If we are converting to a wider mode, first convert to
4177              TARGET_MODE, then normalize.  This produces better combining
4178              opportunities on machines that have a SIGN_EXTRACT when we are
4179              testing a single bit.  This mostly benefits the 68k.
4180
4181              If STORE_FLAG_VALUE does not have the sign bit set when
4182              interpreted in COMPARE_MODE, we can do this conversion as
4183              unsigned, which is usually more efficient.  */
4184           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4185             {
4186               convert_move (target, subtarget,
4187                             (GET_MODE_BITSIZE (compare_mode)
4188                              <= HOST_BITS_PER_WIDE_INT)
4189                             && 0 == (STORE_FLAG_VALUE
4190                                      & ((HOST_WIDE_INT) 1
4191                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4192               op0 = target;
4193               compare_mode = target_mode;
4194             }
4195           else
4196             op0 = subtarget;
4197
4198           /* If we want to keep subexpressions around, don't reuse our
4199              last target.  */
4200
4201           if (preserve_subexpressions_p ())
4202             subtarget = 0;
4203
4204           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4205              we don't have to do anything.  */
4206           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4207             ;
4208           else if (normalizep == - STORE_FLAG_VALUE)
4209             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4210
4211           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4212              makes it hard to use a value of just the sign bit due to
4213              ANSI integer constant typing rules.  */
4214           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4215                    && (STORE_FLAG_VALUE
4216                        & ((HOST_WIDE_INT) 1
4217                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4218             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4219                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4220                                 subtarget, normalizep == 1);
4221           else if (STORE_FLAG_VALUE & 1)
4222             {
4223               op0 = expand_and (op0, const1_rtx, subtarget);
4224               if (normalizep == -1)
4225                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4226             }
4227           else
4228             abort ();
4229
4230           /* If we were converting to a smaller mode, do the
4231              conversion now.  */
4232           if (target_mode != compare_mode)
4233             {
4234               convert_move (target, op0, 0);
4235               return target;
4236             }
4237           else
4238             return op0;
4239         }
4240     }
4241
4242   delete_insns_since (last);
4243
4244   /* If expensive optimizations, use different pseudo registers for each
4245      insn, instead of reusing the same pseudo.  This leads to better CSE,
4246      but slows down the compiler, since there are more pseudos */
4247   subtarget = (!flag_expensive_optimizations
4248                && (target_mode == mode)) ? target : NULL_RTX;
4249
4250   /* If we reached here, we can't do this with a scc insn.  However, there
4251      are some comparisons that can be done directly.  For example, if
4252      this is an equality comparison of integers, we can try to exclusive-or
4253      (or subtract) the two operands and use a recursive call to try the
4254      comparison with zero.  Don't do any of these cases if branches are
4255      very cheap.  */
4256
4257   if (BRANCH_COST > 0
4258       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4259       && op1 != const0_rtx)
4260     {
4261       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4262                           OPTAB_WIDEN);
4263
4264       if (tem == 0)
4265         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4266                             OPTAB_WIDEN);
4267       if (tem != 0)
4268         tem = emit_store_flag (target, code, tem, const0_rtx,
4269                                mode, unsignedp, normalizep);
4270       if (tem == 0)
4271         delete_insns_since (last);
4272       return tem;
4273     }
4274
4275   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4276      the constant zero.  Reject all other comparisons at this point.  Only
4277      do LE and GT if branches are expensive since they are expensive on
4278      2-operand machines.  */
4279
4280   if (BRANCH_COST == 0
4281       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4282       || (code != EQ && code != NE
4283           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4284     return 0;
4285
4286   /* See what we need to return.  We can only return a 1, -1, or the
4287      sign bit.  */
4288
4289   if (normalizep == 0)
4290     {
4291       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4292         normalizep = STORE_FLAG_VALUE;
4293
4294       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4295                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4296                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4297         ;
4298       else
4299         return 0;
4300     }
4301
4302   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4303      do the necessary operation below.  */
4304
4305   tem = 0;
4306
4307   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4308      the sign bit set.  */
4309
4310   if (code == LE)
4311     {
4312       /* This is destructive, so SUBTARGET can't be OP0.  */
4313       if (rtx_equal_p (subtarget, op0))
4314         subtarget = 0;
4315
4316       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4317                           OPTAB_WIDEN);
4318       if (tem)
4319         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4320                             OPTAB_WIDEN);
4321     }
4322
4323   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4324      number of bits in the mode of OP0, minus one.  */
4325
4326   if (code == GT)
4327     {
4328       if (rtx_equal_p (subtarget, op0))
4329         subtarget = 0;
4330
4331       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4332                           size_int (GET_MODE_BITSIZE (mode) - 1),
4333                           subtarget, 0);
4334       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4335                           OPTAB_WIDEN);
4336     }
4337
4338   if (code == EQ || code == NE)
4339     {
4340       /* For EQ or NE, one way to do the comparison is to apply an operation
4341          that converts the operand into a positive number if it is non-zero
4342          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4343          for NE we negate.  This puts the result in the sign bit.  Then we
4344          normalize with a shift, if needed.
4345
4346          Two operations that can do the above actions are ABS and FFS, so try
4347          them.  If that doesn't work, and MODE is smaller than a full word,
4348          we can use zero-extension to the wider mode (an unsigned conversion)
4349          as the operation.  */
4350
4351       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4352         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4353       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4354         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4355       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4356         {
4357           op0 = protect_from_queue (op0, 0);
4358           tem = convert_modes (word_mode, mode, op0, 1);
4359           mode = word_mode;
4360         }
4361
4362       if (tem != 0)
4363         {
4364           if (code == EQ)
4365             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4366                                 0, OPTAB_WIDEN);
4367           else
4368             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4369         }
4370
4371       /* If we couldn't do it that way, for NE we can "or" the two's complement
4372          of the value with itself.  For EQ, we take the one's complement of
4373          that "or", which is an extra insn, so we only handle EQ if branches
4374          are expensive.  */
4375
4376       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4377         {
4378           if (rtx_equal_p (subtarget, op0))
4379             subtarget = 0;
4380
4381           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4382           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4383                               OPTAB_WIDEN);
4384
4385           if (tem && code == EQ)
4386             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4387         }
4388     }
4389
4390   if (tem && normalizep)
4391     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4392                         size_int (GET_MODE_BITSIZE (mode) - 1),
4393                         subtarget, normalizep == 1);
4394
4395   if (tem)
4396     {
4397       if (GET_MODE (tem) != target_mode)
4398         {
4399           convert_move (target, tem, 0);
4400           tem = target;
4401         }
4402       else if (!subtarget)
4403         {
4404           emit_move_insn (target, tem);
4405           tem = target;
4406         }
4407     }
4408   else
4409     delete_insns_since (last);
4410
4411   return tem;
4412 }
4413
4414 /* Like emit_store_flag, but always succeeds.  */
4415
4416 rtx
4417 emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
4418      rtx target;
4419      enum rtx_code code;
4420      rtx op0, op1;
4421      enum machine_mode mode;
4422      int unsignedp;
4423      int normalizep;
4424 {
4425   rtx tem, label;
4426
4427   /* First see if emit_store_flag can do the job.  */
4428   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4429   if (tem != 0)
4430     return tem;
4431
4432   if (normalizep == 0)
4433     normalizep = 1;
4434
4435   /* If this failed, we have to do this with set/compare/jump/set code.  */
4436
4437   if (GET_CODE (target) != REG
4438       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4439     target = gen_reg_rtx (GET_MODE (target));
4440
4441   emit_move_insn (target, const1_rtx);
4442   tem = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
4443   if (GET_CODE (tem) == CONST_INT)
4444     return tem;
4445
4446   label = gen_label_rtx ();
4447   if (bcc_gen_fctn[(int) code] == 0)
4448     abort ();
4449
4450   emit_jump_insn ((*bcc_gen_fctn[(int) code]) (label));
4451   emit_move_insn (target, const0_rtx);
4452   emit_label (label);
4453
4454   return target;
4455 }
4456 \f
4457 /* Perform possibly multi-word comparison and conditional jump to LABEL
4458    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4459
4460    The algorithm is based on the code in expr.c:do_jump.
4461
4462    Note that this does not perform a general comparison.  Only variants
4463    generated within expmed.c are correctly handled, others abort (but could
4464    be handled if needed).  */
4465
4466 static void
4467 do_cmp_and_jump (arg1, arg2, op, mode, label)
4468      rtx arg1, arg2, label;
4469     enum rtx_code op;
4470     enum machine_mode mode;
4471 {
4472   /* If this mode is an integer too wide to compare properly,
4473      compare word by word.  Rely on cse to optimize constant cases.  */
4474
4475   if (GET_MODE_CLASS (mode) == MODE_INT && !can_compare_p (mode))
4476     {
4477       rtx label2 = gen_label_rtx ();
4478
4479       switch (op)
4480         {
4481         case LTU:
4482           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4483           break;
4484
4485         case LEU:
4486           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4487           break;
4488
4489         case LT:
4490           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4491           break;
4492
4493         case GT:
4494           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4495           break;
4496
4497         case GE:
4498           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4499           break;
4500
4501           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
4502              that's the only equality operations we do */
4503         case EQ:
4504           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4505             abort();
4506           do_jump_by_parts_equality_rtx (arg1, label2, label);
4507           break;
4508
4509         case NE:
4510           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4511             abort();
4512           do_jump_by_parts_equality_rtx (arg1, label, label2);
4513           break;
4514
4515         default:
4516           abort();
4517         }
4518
4519       emit_label (label2);
4520     }
4521   else
4522     {
4523       emit_cmp_insn(arg1, arg2, op, NULL_RTX, mode, 0, 0);
4524       if (bcc_gen_fctn[(int) op] == 0)
4525         abort ();
4526       emit_jump_insn ((*bcc_gen_fctn[(int) op]) (label));
4527     }
4528 }