gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001 Free Software Foundation, Inc.
   5
   6 This file is part of GNU CC.
   7
   8 GNU CC is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU CC is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU CC; see the file COPYING.  If not, write to
  20 the Free Software Foundation, 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "toplev.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "tm_p.h"
  30 #include "flags.h"
  31 #include "insn-config.h"
  32 #include "expr.h"
  33 #include "real.h"
  34 #include "recog.h"
  35
  36 static void store_fixed_bit_field       PARAMS ((rtx, unsigned HOST_WIDE_INT,
  37                                                  unsigned HOST_WIDE_INT,
  38                                                  unsigned HOST_WIDE_INT, rtx,
  39                                                  unsigned int));
  40 static void store_split_bit_field       PARAMS ((rtx, unsigned HOST_WIDE_INT,
  41                                                  unsigned HOST_WIDE_INT, rtx,
  42                                                  unsigned int));
  43 static rtx extract_fixed_bit_field      PARAMS ((enum machine_mode, rtx,
  44                                                  unsigned HOST_WIDE_INT,
  45                                                  unsigned HOST_WIDE_INT,
  46                                                  unsigned HOST_WIDE_INT,
  47                                                  rtx, int, unsigned int));
  48 static rtx mask_rtx                     PARAMS ((enum machine_mode, int,
  49                                                  int, int));
  50 static rtx lshift_value                 PARAMS ((enum machine_mode, rtx,
  51                                                  int, int));
  52 static rtx extract_split_bit_field      PARAMS ((rtx, unsigned HOST_WIDE_INT,
  53                                                  unsigned HOST_WIDE_INT, int,
  54                                                  unsigned int));
  55 static void do_cmp_and_jump             PARAMS ((rtx, rtx, enum rtx_code,
  56                                                  enum machine_mode, rtx));
  57
  58 /* Non-zero means divides or modulus operations are relatively cheap for
  59    powers of two, so don't use branches; emit the operation instead.
  60    Usually, this will mean that the MD file will emit non-branch
  61    sequences.  */
  62
  63 static int sdiv_pow2_cheap, smod_pow2_cheap;
  64
  65 #ifndef SLOW_UNALIGNED_ACCESS
  66 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  67 #endif
  68
  69 /* For compilers that support multiple targets with different word sizes,
  70    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  71    is the H8/300(H) compiler.  */
  72
  73 #ifndef MAX_BITS_PER_WORD
  74 #define MAX_BITS_PER_WORD BITS_PER_WORD
  75 #endif
  76
  77 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  78    shift count and some by mode.  */
  79 static int add_cost, negate_cost, zero_cost;
  80 static int shift_cost[MAX_BITS_PER_WORD];
  81 static int shiftadd_cost[MAX_BITS_PER_WORD];
  82 static int shiftsub_cost[MAX_BITS_PER_WORD];
  83 static int mul_cost[NUM_MACHINE_MODES];
  84 static int div_cost[NUM_MACHINE_MODES];
  85 static int mul_widen_cost[NUM_MACHINE_MODES];
  86 static int mul_highpart_cost[NUM_MACHINE_MODES];
  87
  88 void
  89 init_expmed ()
  90 {
  91   /* This is "some random pseudo register" for purposes of calling recog
  92      to see what insns exist.  */
  93   rtx reg = gen_rtx_REG (word_mode, 10000);
  94   rtx shift_insn, shiftadd_insn, shiftsub_insn;
  95   int dummy;
  96   int m;
  97   enum machine_mode mode, wider_mode;
  98
  99   start_sequence ();
 100
 101   reg = gen_rtx_REG (word_mode, 10000);
 102
 103   zero_cost = rtx_cost (const0_rtx, 0);
 104   add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
 105
 106   shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
 107                                        gen_rtx_ASHIFT (word_mode, reg,
 108                                                        const0_rtx)));
 109
 110   shiftadd_insn
 111     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 112                               gen_rtx_PLUS (word_mode,
 113                                             gen_rtx_MULT (word_mode,
 114                                                           reg, const0_rtx),
 115                                             reg)));
 116
 117   shiftsub_insn
 118     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 119                               gen_rtx_MINUS (word_mode,
 120                                              gen_rtx_MULT (word_mode,
 121                                                            reg, const0_rtx),
 122                                              reg)));
 123
 124   init_recog ();
 125
 126   shift_cost[0] = 0;
 127   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 128
 129   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 130     {
 131       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 132
 133       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 134       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 135         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 136
 137       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1)
 138         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 139       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 140         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 141
 142       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1)
 143         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 144       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 145         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 146     }
 147
 148   negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
 149
 150   sdiv_pow2_cheap
 151     = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
 152        <= 2 * add_cost);
 153   smod_pow2_cheap
 154     = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
 155        <= 2 * add_cost);
 156
 157   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 158        mode != VOIDmode;
 159        mode = GET_MODE_WIDER_MODE (mode))
 160     {
 161       reg = gen_rtx_REG (mode, 10000);
 162       div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
 163       mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
 164       wider_mode = GET_MODE_WIDER_MODE (mode);
 165       if (wider_mode != VOIDmode)
 166         {
 167           mul_widen_cost[(int) wider_mode]
 168             = rtx_cost (gen_rtx_MULT (wider_mode,
 169                                       gen_rtx_ZERO_EXTEND (wider_mode, reg),
 170                                       gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 171                         SET);
 172           mul_highpart_cost[(int) mode]
 173             = rtx_cost (gen_rtx_TRUNCATE
 174                         (mode,
 175                          gen_rtx_LSHIFTRT (wider_mode,
 176                                            gen_rtx_MULT (wider_mode,
 177                                                          gen_rtx_ZERO_EXTEND
 178                                                          (wider_mode, reg),
 179                                                          gen_rtx_ZERO_EXTEND
 180                                                          (wider_mode, reg)),
 181                                            GEN_INT (GET_MODE_BITSIZE (mode)))),
 182                         SET);
 183         }
 184     }
 185
 186   end_sequence ();
 187 }
 188
 189 /* Return an rtx representing minus the value of X.
 190    MODE is the intended mode of the result,
 191    useful if X is a CONST_INT.  */
 192
 193 rtx
 194 negate_rtx (mode, x)
 195      enum machine_mode mode;
 196      rtx x;
 197 {
 198   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 199
 200   if (result == 0)
 201     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 202
 203   return result;
 204 }
 205 \f
 206 /* Generate code to store value from rtx VALUE
 207    into a bit-field within structure STR_RTX
 208    containing BITSIZE bits starting at bit BITNUM.
 209    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 210    ALIGN is the alignment that STR_RTX is known to have.
 211    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 212
 213 /* ??? Note that there are two different ideas here for how
 214    to determine the size to count bits within, for a register.
 215    One is BITS_PER_WORD, and the other is the size of operand 3
 216    of the insv pattern.
 217
 218    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 219    else, we use the mode of operand 3.  */
 220
 221 rtx
 222 store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
 223      rtx str_rtx;
 224      unsigned HOST_WIDE_INT bitsize;
 225      unsigned HOST_WIDE_INT bitnum;
 226      enum machine_mode fieldmode;
 227      rtx value;
 228      unsigned int align;
 229      HOST_WIDE_INT total_size;
 230 {
 231   unsigned int unit
 232     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 233   unsigned HOST_WIDE_INT offset = bitnum / unit;
 234   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 235   register rtx op0 = str_rtx;
 236 #ifdef HAVE_insv
 237   unsigned HOST_WIDE_INT insv_bitsize;
 238   enum machine_mode op_mode;
 239
 240   op_mode = insn_data[(int) CODE_FOR_insv].operand[3].mode;
 241   if (op_mode == VOIDmode)
 242     op_mode = word_mode;
 243   insv_bitsize = GET_MODE_BITSIZE (op_mode);
 244 #endif
 245
 246   /* It is wrong to have align==0, since every object is aligned at
 247      least at a bit boundary.  This usually means a bug elsewhere.  */
 248   if (align == 0)
 249     abort ();
 250
 251   /* Discount the part of the structure before the desired byte.
 252      We need to know how many bytes are safe to reference after it.  */
 253   if (total_size >= 0)
 254     total_size -= (bitpos / BIGGEST_ALIGNMENT
 255                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 256
 257   while (GET_CODE (op0) == SUBREG)
 258     {
 259       /* The following line once was done only if WORDS_BIG_ENDIAN,
 260          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 261          meaningful at a much higher level; when structures are copied
 262          between memory and regs, the higher-numbered regs
 263          always get higher addresses.  */
 264       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 265       /* We used to adjust BITPOS here, but now we do the whole adjustment
 266          right after the loop.  */
 267       op0 = SUBREG_REG (op0);
 268     }
 269
 270   /* If OP0 is a register, BITPOS must count within a word.
 271      But as we have it, it counts within whatever size OP0 now has.
 272      On a bigendian machine, these are not the same, so convert.  */
 273   if (BYTES_BIG_ENDIAN
 274       && GET_CODE (op0) != MEM
 275       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 276     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 277
 278   value = protect_from_queue (value, 0);
 279
 280   if (flag_force_mem)
 281     value = force_not_mem (value);
 282
 283   /* If the target is a register, overwriting the entire object, or storing
 284      a full-word or multi-word field can be done with just a SUBREG.
 285
 286      If the target is memory, storing any naturally aligned field can be
 287      done with a simple store.  For targets that support fast unaligned
 288      memory, any naturally sized, unit aligned field can be done directly.  */
 289
 290   if (bitsize == GET_MODE_BITSIZE (fieldmode)
 291       && (GET_CODE (op0) != MEM
 292           ? (GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 293              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 294           : (! SLOW_UNALIGNED_ACCESS (fieldmode, align)
 295              || (offset * BITS_PER_UNIT % bitsize == 0
 296                  && align % GET_MODE_BITSIZE (fieldmode) == 0)))
 297       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0))
 298     {
 299       if (GET_MODE (op0) != fieldmode)
 300         {
 301           if (GET_CODE (op0) == SUBREG)
 302             {
 303               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 304                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 305                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 306                 op0 = SUBREG_REG (op0);
 307               else
 308                 /* Else we've got some float mode source being extracted into
 309                    a different float mode destination -- this combination of
 310                    subregs results in Severe Tire Damage.  */
 311                 abort ();
 312             }
 313           if (GET_CODE (op0) == REG)
 314             op0 = gen_rtx_SUBREG (fieldmode, op0,
 315                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 316                                   + (offset * UNITS_PER_WORD));
 317           else
 318             op0 = adjust_address (op0, fieldmode, offset);
 319         }
 320       emit_move_insn (op0, value);
 321       return value;
 322     }
 323
 324   /* Make sure we are playing with integral modes.  Pun with subregs
 325      if we aren't.  This must come after the entire register case above,
 326      since that case is valid for any mode.  The following cases are only
 327      valid for integral modes.  */
 328   {
 329     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 330     if (imode != GET_MODE (op0))
 331       {
 332         if (GET_CODE (op0) == MEM)
 333           op0 = adjust_address (op0, imode, 0);
 334         else if (imode != BLKmode)
 335           op0 = gen_lowpart (imode, op0);
 336         else
 337           abort ();
 338       }
 339   }
 340
 341   /* Storing an lsb-aligned field in a register
 342      can be done with a movestrict instruction.  */
 343
 344   if (GET_CODE (op0) != MEM
 345       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 346       && bitsize == GET_MODE_BITSIZE (fieldmode)
 347       && (movstrict_optab->handlers[(int) fieldmode].insn_code
 348           != CODE_FOR_nothing))
 349     {
 350       int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 351
 352       /* Get appropriate low part of the value being stored.  */
 353       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 354         value = gen_lowpart (fieldmode, value);
 355       else if (!(GET_CODE (value) == SYMBOL_REF
 356                  || GET_CODE (value) == LABEL_REF
 357                  || GET_CODE (value) == CONST))
 358         value = convert_to_mode (fieldmode, value, 0);
 359
 360       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 361         value = copy_to_mode_reg (fieldmode, value);
 362
 363       if (GET_CODE (op0) == SUBREG)
 364         {
 365           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 366               || GET_MODE_CLASS (fieldmode) == MODE_INT
 367               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 368             op0 = SUBREG_REG (op0);
 369           else
 370             /* Else we've got some float mode source being extracted into
 371                a different float mode destination -- this combination of
 372                subregs results in Severe Tire Damage.  */
 373             abort ();
 374         }
 375
 376       emit_insn (GEN_FCN (icode)
 377                  (gen_rtx_SUBREG (fieldmode, op0,
 378                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 379                                   + (offset * UNITS_PER_WORD)),
 380                                   value));
 381
 382       return value;
 383     }
 384
 385   /* Handle fields bigger than a word.  */
 386
 387   if (bitsize > BITS_PER_WORD)
 388     {
 389       /* Here we transfer the words of the field
 390          in the order least significant first.
 391          This is because the most significant word is the one which may
 392          be less than full.
 393          However, only do that if the value is not BLKmode.  */
 394
 395       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 396       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 397       unsigned int i;
 398
 399       /* This is the mode we must force value to, so that there will be enough
 400          subwords to extract.  Note that fieldmode will often (always?) be
 401          VOIDmode, because that is what store_field uses to indicate that this
 402          is a bit field, but passing VOIDmode to operand_subword_force will
 403          result in an abort.  */
 404       fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 405
 406       for (i = 0; i < nwords; i++)
 407         {
 408           /* If I is 0, use the low-order word in both field and target;
 409              if I is 1, use the next to lowest word; and so on.  */
 410           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 411           unsigned int bit_offset = (backwards
 412                             ? MAX ((int) bitsize - ((int) i + 1)
 413                                    * BITS_PER_WORD,
 414                                    0)
 415                             : (int) i * BITS_PER_WORD);
 416
 417           store_bit_field (op0, MIN (BITS_PER_WORD,
 418                                      bitsize - i * BITS_PER_WORD),
 419                            bitnum + bit_offset, word_mode,
 420                            operand_subword_force (value, wordnum,
 421                                                   (GET_MODE (value) == VOIDmode
 422                                                    ? fieldmode
 423                                                    : GET_MODE (value))),
 424                            align, total_size);
 425         }
 426       return value;
 427     }
 428
 429   /* From here on we can assume that the field to be stored in is
 430      a full-word (whatever type that is), since it is shorter than a word.  */
 431
 432   /* OFFSET is the number of words or bytes (UNIT says which)
 433      from STR_RTX to the first word or byte containing part of the field.  */
 434
 435   if (GET_CODE (op0) != MEM)
 436     {
 437       if (offset != 0
 438           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 439         {
 440           if (GET_CODE (op0) != REG)
 441             {
 442               /* Since this is a destination (lvalue), we can't copy it to a
 443                  pseudo.  We can trivially remove a SUBREG that does not
 444                  change the size of the operand.  Such a SUBREG may have been
 445                  added above.  Otherwise, abort.  */
 446               if (GET_CODE (op0) == SUBREG
 447                   && (GET_MODE_SIZE (GET_MODE (op0))
 448                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 449                 op0 = SUBREG_REG (op0);
 450               else
 451                 abort ();
 452             }
 453           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 454                                 op0, (offset * UNITS_PER_WORD));
 455         }
 456       offset = 0;
 457     }
 458   else
 459     {
 460       op0 = protect_from_queue (op0, 1);
 461     }
 462
 463   /* If VALUE is a floating-point mode, access it as an integer of the
 464      corresponding size.  This can occur on a machine with 64 bit registers
 465      that uses SFmode for float.  This can also occur for unaligned float
 466      structure fields.  */
 467   if (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT)
 468     {
 469       if (GET_CODE (value) != REG)
 470         value = copy_to_reg (value);
 471       value = gen_rtx_SUBREG (word_mode, value, 0);
 472     }
 473
 474   /* Now OFFSET is nonzero only if OP0 is memory
 475      and is therefore always measured in bytes.  */
 476
 477 #ifdef HAVE_insv
 478   if (HAVE_insv
 479       && GET_MODE (value) != BLKmode
 480       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 481       /* Ensure insv's size is wide enough for this field.  */
 482       && (insv_bitsize >= bitsize)
 483       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 484             && (bitsize + bitpos > insv_bitsize)))
 485     {
 486       int xbitpos = bitpos;
 487       rtx value1;
 488       rtx xop0 = op0;
 489       rtx last = get_last_insn ();
 490       rtx pat;
 491       enum machine_mode maxmode;
 492       int save_volatile_ok = volatile_ok;
 493
 494       maxmode = insn_data[(int) CODE_FOR_insv].operand[3].mode;
 495       if (maxmode == VOIDmode)
 496         maxmode = word_mode;
 497
 498       volatile_ok = 1;
 499
 500       /* If this machine's insv can only insert into a register, copy OP0
 501          into a register and save it back later.  */
 502       /* This used to check flag_force_mem, but that was a serious
 503          de-optimization now that flag_force_mem is enabled by -O2.  */
 504       if (GET_CODE (op0) == MEM
 505           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 506                 (op0, VOIDmode)))
 507         {
 508           rtx tempreg;
 509           enum machine_mode bestmode;
 510
 511           /* Get the mode to use for inserting into this field.  If OP0 is
 512              BLKmode, get the smallest mode consistent with the alignment. If
 513              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 514              mode. Otherwise, use the smallest mode containing the field.  */
 515
 516           if (GET_MODE (op0) == BLKmode
 517               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 518             bestmode
 519               = get_best_mode (bitsize, bitnum, align, maxmode,
 520                                MEM_VOLATILE_P (op0));
 521           else
 522             bestmode = GET_MODE (op0);
 523
 524           if (bestmode == VOIDmode
 525               || (SLOW_UNALIGNED_ACCESS (bestmode, align)
 526                   && GET_MODE_BITSIZE (bestmode) > align))
 527             goto insv_loses;
 528
 529           /* Adjust address to point to the containing unit of that mode.  */
 530           unit = GET_MODE_BITSIZE (bestmode);
 531           /* Compute offset as multiple of this unit, counting in bytes.  */
 532           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 533           bitpos = bitnum % unit;
 534           op0 = adjust_address (op0, bestmode,  offset);
 535
 536           /* Fetch that unit, store the bitfield in it, then store
 537              the unit.  */
 538           tempreg = copy_to_reg (op0);
 539           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 540                            align, total_size);
 541           emit_move_insn (op0, tempreg);
 542           return value;
 543         }
 544       volatile_ok = save_volatile_ok;
 545
 546       /* Add OFFSET into OP0's address.  */
 547       if (GET_CODE (xop0) == MEM)
 548         xop0 = adjust_address (xop0, byte_mode, offset);
 549
 550       /* If xop0 is a register, we need it in MAXMODE
 551          to make it acceptable to the format of insv.  */
 552       if (GET_CODE (xop0) == SUBREG)
 553         /* We can't just change the mode, because this might clobber op0,
 554            and we will need the original value of op0 if insv fails.  */
 555         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 556       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 557         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 558
 559       /* On big-endian machines, we count bits from the most significant.
 560          If the bit field insn does not, we must invert.  */
 561
 562       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 563         xbitpos = unit - bitsize - xbitpos;
 564
 565       /* We have been counting XBITPOS within UNIT.
 566          Count instead within the size of the register.  */
 567       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 568         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 569
 570       unit = GET_MODE_BITSIZE (maxmode);
 571
 572       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 573       value1 = value;
 574       if (GET_MODE (value) != maxmode)
 575         {
 576           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 577             {
 578               /* Optimization: Don't bother really extending VALUE
 579                  if it has all the bits we will actually use.  However,
 580                  if we must narrow it, be sure we do it correctly.  */
 581
 582               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 583                 {
 584                   /* Avoid making subreg of a subreg, or of a mem.  */
 585                   if (GET_CODE (value1) != REG)
 586                     value1 = copy_to_reg (value1);
 587                   value1 = gen_rtx_SUBREG (maxmode, value1, 0);
 588                 }
 589               else
 590                 value1 = gen_lowpart (maxmode, value1);
 591             }
 592           else if (GET_CODE (value) == CONST_INT)
 593             value1 = GEN_INT (trunc_int_for_mode (INTVAL (value), maxmode));
 594           else if (!CONSTANT_P (value))
 595             /* Parse phase is supposed to make VALUE's data type
 596                match that of the component reference, which is a type
 597                at least as wide as the field; so VALUE should have
 598                a mode that corresponds to that type.  */
 599             abort ();
 600         }
 601
 602       /* If this machine's insv insists on a register,
 603          get VALUE1 into a register.  */
 604       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 605              (value1, maxmode)))
 606         value1 = force_reg (maxmode, value1);
 607
 608       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 609       if (pat)
 610         emit_insn (pat);
 611       else
 612         {
 613           delete_insns_since (last);
 614           store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 615         }
 616     }
 617   else
 618     insv_loses:
 619 #endif
 620     /* Insv is not available; store using shifts and boolean ops.  */
 621     store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 622   return value;
 623 }
 624 \f
 625 /* Use shifts and boolean operations to store VALUE
 626    into a bit field of width BITSIZE
 627    in a memory location specified by OP0 except offset by OFFSET bytes.
 628      (OFFSET must be 0 if OP0 is a register.)
 629    The field starts at position BITPOS within the byte.
 630     (If OP0 is a register, it may be a full word or a narrower mode,
 631      but BITPOS still counts within a full word,
 632      which is significant on bigendian machines.)
 633    STRUCT_ALIGN is the alignment the structure is known to have.
 634
 635    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 636
 637 static void
 638 store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align)
 639      register rtx op0;
 640      unsigned HOST_WIDE_INT offset, bitsize, bitpos;
 641      register rtx value;
 642      unsigned int struct_align;
 643 {
 644   register enum machine_mode mode;
 645   unsigned int total_bits = BITS_PER_WORD;
 646   rtx subtarget, temp;
 647   int all_zero = 0;
 648   int all_one = 0;
 649
 650   if (! SLOW_UNALIGNED_ACCESS (word_mode, struct_align))
 651     struct_align = BIGGEST_ALIGNMENT;
 652
 653   /* There is a case not handled here:
 654      a structure with a known alignment of just a halfword
 655      and a field split across two aligned halfwords within the structure.
 656      Or likewise a structure with a known alignment of just a byte
 657      and a field split across two bytes.
 658      Such cases are not supposed to be able to occur.  */
 659
 660   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 661     {
 662       if (offset != 0)
 663         abort ();
 664       /* Special treatment for a bit field split across two registers.  */
 665       if (bitsize + bitpos > BITS_PER_WORD)
 666         {
 667           store_split_bit_field (op0, bitsize, bitpos,
 668                                  value, BITS_PER_WORD);
 669           return;
 670         }
 671     }
 672   else
 673     {
 674       /* Get the proper mode to use for this field.  We want a mode that
 675          includes the entire field.  If such a mode would be larger than
 676          a word, we won't be doing the extraction the normal way.
 677          We don't want a mode bigger than the destination.  */
 678
 679       mode = GET_MODE (op0);
 680       if (GET_MODE_BITSIZE (mode) == 0
 681           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 682         mode = word_mode;
 683       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 684                             struct_align, mode,
 685                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
 686
 687       if (mode == VOIDmode)
 688         {
 689           /* The only way this should occur is if the field spans word
 690              boundaries.  */
 691           store_split_bit_field (op0,
 692                                  bitsize, bitpos + offset * BITS_PER_UNIT,
 693                                  value, struct_align);
 694           return;
 695         }
 696
 697       total_bits = GET_MODE_BITSIZE (mode);
 698
 699       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 700          be in the range 0 to total_bits-1, and put any excess bytes in
 701          OFFSET.  */
 702       if (bitpos >= total_bits)
 703         {
 704           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 705           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 706                      * BITS_PER_UNIT);
 707         }
 708
 709       /* Get ref to an aligned byte, halfword, or word containing the field.
 710          Adjust BITPOS to be position within a word,
 711          and OFFSET to be the offset of that word.
 712          Then alter OP0 to refer to that word.  */
 713       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 714       offset -= (offset % (total_bits / BITS_PER_UNIT));
 715       op0 = adjust_address (op0, mode, offset);
 716     }
 717
 718   mode = GET_MODE (op0);
 719
 720   /* Now MODE is either some integral mode for a MEM as OP0,
 721      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 722      The bit field is contained entirely within OP0.
 723      BITPOS is the starting bit number within OP0.
 724      (OP0's mode may actually be narrower than MODE.)  */
 725
 726   if (BYTES_BIG_ENDIAN)
 727       /* BITPOS is the distance between our msb
 728          and that of the containing datum.
 729          Convert it to the distance from the lsb.  */
 730       bitpos = total_bits - bitsize - bitpos;
 731
 732   /* Now BITPOS is always the distance between our lsb
 733      and that of OP0.  */
 734
 735   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 736      we must first convert its mode to MODE.  */
 737
 738   if (GET_CODE (value) == CONST_INT)
 739     {
 740       register HOST_WIDE_INT v = INTVAL (value);
 741
 742       if (bitsize < HOST_BITS_PER_WIDE_INT)
 743         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 744
 745       if (v == 0)
 746         all_zero = 1;
 747       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 748                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 749                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 750         all_one = 1;
 751
 752       value = lshift_value (mode, value, bitpos, bitsize);
 753     }
 754   else
 755     {
 756       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 757                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 758
 759       if (GET_MODE (value) != mode)
 760         {
 761           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 762               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 763             value = gen_lowpart (mode, value);
 764           else
 765             value = convert_to_mode (mode, value, 1);
 766         }
 767
 768       if (must_and)
 769         value = expand_binop (mode, and_optab, value,
 770                               mask_rtx (mode, 0, bitsize, 0),
 771                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 772       if (bitpos > 0)
 773         value = expand_shift (LSHIFT_EXPR, mode, value,
 774                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 775     }
 776
 777   /* Now clear the chosen bits in OP0,
 778      except that if VALUE is -1 we need not bother.  */
 779
 780   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 781
 782   if (! all_one)
 783     {
 784       temp = expand_binop (mode, and_optab, op0,
 785                            mask_rtx (mode, bitpos, bitsize, 1),
 786                            subtarget, 1, OPTAB_LIB_WIDEN);
 787       subtarget = temp;
 788     }
 789   else
 790     temp = op0;
 791
 792   /* Now logical-or VALUE into OP0, unless it is zero.  */
 793
 794   if (! all_zero)
 795     temp = expand_binop (mode, ior_optab, temp, value,
 796                          subtarget, 1, OPTAB_LIB_WIDEN);
 797   if (op0 != temp)
 798     emit_move_insn (op0, temp);
 799 }
 800 \f
 801 /* Store a bit field that is split across multiple accessible memory objects.
 802
 803    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 804    BITSIZE is the field width; BITPOS the position of its first bit
 805    (within the word).
 806    VALUE is the value to store.
 807    ALIGN is the known alignment of OP0.
 808    This is also the size of the memory objects to be used.
 809
 810    This does not yet handle fields wider than BITS_PER_WORD.  */
 811
 812 static void
 813 store_split_bit_field (op0, bitsize, bitpos, value, align)
 814      rtx op0;
 815      unsigned HOST_WIDE_INT bitsize, bitpos;
 816      rtx value;
 817      unsigned int align;
 818 {
 819   unsigned int unit;
 820   unsigned int bitsdone = 0;
 821
 822   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 823      much at a time.  */
 824   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 825     unit = BITS_PER_WORD;
 826   else
 827     unit = MIN (align, BITS_PER_WORD);
 828
 829   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 830      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 831      that VALUE might be a floating-point constant.  */
 832   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 833     {
 834       rtx word = gen_lowpart_common (word_mode, value);
 835
 836       if (word && (value != word))
 837         value = word;
 838       else
 839         value = gen_lowpart_common (word_mode,
 840                                     force_reg (GET_MODE (value) != VOIDmode
 841                                                ? GET_MODE (value)
 842                                                : word_mode, value));
 843     }
 844   else if (GET_CODE (value) == ADDRESSOF)
 845     value = copy_to_reg (value);
 846
 847   while (bitsdone < bitsize)
 848     {
 849       unsigned HOST_WIDE_INT thissize;
 850       rtx part, word;
 851       unsigned HOST_WIDE_INT thispos;
 852       unsigned HOST_WIDE_INT offset;
 853
 854       offset = (bitpos + bitsdone) / unit;
 855       thispos = (bitpos + bitsdone) % unit;
 856
 857       /* THISSIZE must not overrun a word boundary.  Otherwise,
 858          store_fixed_bit_field will call us again, and we will mutually
 859          recurse forever.  */
 860       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 861       thissize = MIN (thissize, unit - thispos);
 862
 863       if (BYTES_BIG_ENDIAN)
 864         {
 865           int total_bits;
 866
 867           /* We must do an endian conversion exactly the same way as it is
 868              done in extract_bit_field, so that the two calls to
 869              extract_fixed_bit_field will have comparable arguments.  */
 870           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 871             total_bits = BITS_PER_WORD;
 872           else
 873             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 874
 875           /* Fetch successively less significant portions.  */
 876           if (GET_CODE (value) == CONST_INT)
 877             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 878                              >> (bitsize - bitsdone - thissize))
 879                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 880           else
 881             /* The args are chosen so that the last part includes the
 882                lsb.  Give extract_bit_field the value it needs (with
 883                endianness compensation) to fetch the piece we want.
 884
 885                ??? We have no idea what the alignment of VALUE is, so
 886                we have to use a guess.  */
 887             part
 888               = extract_fixed_bit_field
 889                 (word_mode, value, 0, thissize,
 890                  total_bits - bitsize + bitsdone, NULL_RTX, 1,
 891                  GET_MODE (value) == VOIDmode
 892                  ? UNITS_PER_WORD
 893                  : (GET_MODE (value) == BLKmode
 894                     ? 1 : GET_MODE_ALIGNMENT (GET_MODE (value))));
 895         }
 896       else
 897         {
 898           /* Fetch successively more significant portions.  */
 899           if (GET_CODE (value) == CONST_INT)
 900             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 901                              >> bitsdone)
 902                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 903           else
 904             part
 905               = extract_fixed_bit_field
 906                 (word_mode, value, 0, thissize, bitsdone, NULL_RTX, 1,
 907                  GET_MODE (value) == VOIDmode
 908                  ? UNITS_PER_WORD
 909                  : (GET_MODE (value) == BLKmode
 910                     ? 1 : GET_MODE_ALIGNMENT (GET_MODE (value))));
 911         }
 912
 913       /* If OP0 is a register, then handle OFFSET here.
 914
 915          When handling multiword bitfields, extract_bit_field may pass
 916          down a word_mode SUBREG of a larger REG for a bitfield that actually
 917          crosses a word boundary.  Thus, for a SUBREG, we must find
 918          the current word starting from the base register.  */
 919       if (GET_CODE (op0) == SUBREG)
 920         {
 921           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
 922           word = operand_subword_force (SUBREG_REG (op0), word_offset,
 923                                         GET_MODE (SUBREG_REG (op0)));
 924           offset = 0;
 925         }
 926       else if (GET_CODE (op0) == REG)
 927         {
 928           word = operand_subword_force (op0, offset, GET_MODE (op0));
 929           offset = 0;
 930         }
 931       else
 932         word = op0;
 933
 934       /* OFFSET is in UNITs, and UNIT is in bits.
 935          store_fixed_bit_field wants offset in bytes.  */
 936       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT,
 937                              thissize, thispos, part, align);
 938       bitsdone += thissize;
 939     }
 940 }
 941 \f
 942 /* Generate code to extract a byte-field from STR_RTX
 943    containing BITSIZE bits, starting at BITNUM,
 944    and put it in TARGET if possible (if TARGET is nonzero).
 945    Regardless of TARGET, we return the rtx for where the value is placed.
 946    It may be a QUEUED.
 947
 948    STR_RTX is the structure containing the byte (a REG or MEM).
 949    UNSIGNEDP is nonzero if this is an unsigned bit field.
 950    MODE is the natural mode of the field value once extracted.
 951    TMODE is the mode the caller would like the value to have;
 952    but the value may be returned with type MODE instead.
 953
 954    ALIGN is the alignment that STR_RTX is known to have.
 955    TOTAL_SIZE is the size in bytes of the containing structure,
 956    or -1 if varying.
 957
 958    If a TARGET is specified and we can store in it at no extra cost,
 959    we do so, and return TARGET.
 960    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
 961    if they are equally easy.  */
 962
 963 rtx
 964 extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 965                    target, mode, tmode, align, total_size)
 966      rtx str_rtx;
 967      unsigned HOST_WIDE_INT bitsize;
 968      unsigned HOST_WIDE_INT bitnum;
 969      int unsignedp;
 970      rtx target;
 971      enum machine_mode mode, tmode;
 972      unsigned int align;
 973      HOST_WIDE_INT total_size;
 974 {
 975   unsigned int unit
 976     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 977   unsigned HOST_WIDE_INT offset = bitnum / unit;
 978   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 979   register rtx op0 = str_rtx;
 980   rtx spec_target = target;
 981   rtx spec_target_subreg = 0;
 982   enum machine_mode int_mode;
 983 #ifdef HAVE_extv
 984   unsigned HOST_WIDE_INT extv_bitsize;
 985   enum machine_mode extv_mode;
 986 #endif
 987 #ifdef HAVE_extzv
 988   unsigned HOST_WIDE_INT extzv_bitsize;
 989   enum machine_mode extzv_mode;
 990 #endif
 991
 992 #ifdef HAVE_extv
 993   extv_mode = insn_data[(int) CODE_FOR_extv].operand[0].mode;
 994   if (extv_mode == VOIDmode)
 995     extv_mode = word_mode;
 996   extv_bitsize = GET_MODE_BITSIZE (extv_mode);
 997 #endif
 998
 999 #ifdef HAVE_extzv
1000   extzv_mode = insn_data[(int) CODE_FOR_extzv].operand[0].mode;
1001   if (extzv_mode == VOIDmode)
1002     extzv_mode = word_mode;
1003   extzv_bitsize = GET_MODE_BITSIZE (extzv_mode);
1004 #endif
1005
1006   /* Discount the part of the structure before the desired byte.
1007      We need to know how many bytes are safe to reference after it.  */
1008   if (total_size >= 0)
1009     total_size -= (bitpos / BIGGEST_ALIGNMENT
1010                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
1011
1012   if (tmode == VOIDmode)
1013     tmode = mode;
1014   while (GET_CODE (op0) == SUBREG)
1015     {
1016       int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
1017       int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
1018
1019       offset += SUBREG_BYTE (op0) / UNITS_PER_WORD;
1020
1021       inner_size = MIN (inner_size, BITS_PER_WORD);
1022
1023       if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
1024         {
1025           bitpos += inner_size - outer_size;
1026           if (bitpos > unit)
1027             {
1028               offset += (bitpos / unit);
1029               bitpos %= unit;
1030             }
1031         }
1032
1033       op0 = SUBREG_REG (op0);
1034     }
1035
1036   if (GET_CODE (op0) == REG
1037       && mode == GET_MODE (op0)
1038       && bitnum == 0
1039       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1040     {
1041       /* We're trying to extract a full register from itself.  */
1042       return op0;
1043     }
1044
1045   /* Make sure we are playing with integral modes.  Pun with subregs
1046      if we aren't.  */
1047   {
1048     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1049     if (imode != GET_MODE (op0))
1050       {
1051         if (GET_CODE (op0) == MEM)
1052           op0 = adjust_address (op0, imode, 0);
1053         else if (imode != BLKmode)
1054           op0 = gen_lowpart (imode, op0);
1055         else
1056           abort ();
1057       }
1058   }
1059
1060   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1061      If that's wrong, the solution is to test for it and set TARGET to 0
1062      if needed.  */
1063
1064   /* If OP0 is a register, BITPOS must count within a word.
1065      But as we have it, it counts within whatever size OP0 now has.
1066      On a bigendian machine, these are not the same, so convert.  */
1067   if (BYTES_BIG_ENDIAN
1068       && GET_CODE (op0) != MEM
1069       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1070     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1071
1072   /* Extracting a full-word or multi-word value
1073      from a structure in a register or aligned memory.
1074      This can be done with just SUBREG.
1075      So too extracting a subword value in
1076      the least significant part of the register.  */
1077
1078   if (((GET_CODE (op0) != MEM
1079         && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1080                                   GET_MODE_BITSIZE (GET_MODE (op0))))
1081        || (GET_CODE (op0) == MEM
1082            && (! SLOW_UNALIGNED_ACCESS (mode, align)
1083                || (offset * BITS_PER_UNIT % bitsize == 0
1084                    && align % bitsize == 0))))
1085       && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1086            && bitpos % BITS_PER_WORD == 0)
1087           || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
1088               /* ??? The big endian test here is wrong.  This is correct
1089                  if the value is in a register, and if mode_for_size is not
1090                  the same mode as op0.  This causes us to get unnecessarily
1091                  inefficient code from the Thumb port when -mbig-endian.  */
1092               && (BYTES_BIG_ENDIAN
1093                   ? bitpos + bitsize == BITS_PER_WORD
1094                   : bitpos == 0))))
1095     {
1096       enum machine_mode mode1
1097         = (VECTOR_MODE_P (tmode) ? mode
1098            : mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0));
1099
1100       if (mode1 != GET_MODE (op0))
1101         {
1102           if (GET_CODE (op0) == SUBREG)
1103             {
1104               if (GET_MODE (SUBREG_REG (op0)) == mode1
1105                   || GET_MODE_CLASS (mode1) == MODE_INT
1106                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1107                 op0 = SUBREG_REG (op0);
1108               else
1109                 /* Else we've got some float mode source being extracted into
1110                    a different float mode destination -- this combination of
1111                    subregs results in Severe Tire Damage.  */
1112                 abort ();
1113             }
1114           if (GET_CODE (op0) == REG)
1115             op0 = gen_rtx_SUBREG (mode1, op0,
1116                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
1117                                   + (offset * UNITS_PER_WORD));
1118           else
1119             op0 = adjust_address (op0, mode1, offset);
1120         }
1121       if (mode1 != mode)
1122         return convert_to_mode (tmode, op0, unsignedp);
1123       return op0;
1124     }
1125
1126   /* Handle fields bigger than a word.  */
1127
1128   if (bitsize > BITS_PER_WORD)
1129     {
1130       /* Here we transfer the words of the field
1131          in the order least significant first.
1132          This is because the most significant word is the one which may
1133          be less than full.  */
1134
1135       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1136       unsigned int i;
1137
1138       if (target == 0 || GET_CODE (target) != REG)
1139         target = gen_reg_rtx (mode);
1140
1141       /* Indicate for flow that the entire target reg is being set.  */
1142       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1143
1144       for (i = 0; i < nwords; i++)
1145         {
1146           /* If I is 0, use the low-order word in both field and target;
1147              if I is 1, use the next to lowest word; and so on.  */
1148           /* Word number in TARGET to use.  */
1149           unsigned int wordnum
1150             = (WORDS_BIG_ENDIAN
1151                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1152                : i);
1153           /* Offset from start of field in OP0.  */
1154           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1155                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1156                                                 * (int) BITS_PER_WORD))
1157                                      : (int) i * BITS_PER_WORD);
1158           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1159           rtx result_part
1160             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1161                                            bitsize - i * BITS_PER_WORD),
1162                                  bitnum + bit_offset, 1, target_part, mode,
1163                                  word_mode, align, total_size);
1164
1165           if (target_part == 0)
1166             abort ();
1167
1168           if (result_part != target_part)
1169             emit_move_insn (target_part, result_part);
1170         }
1171
1172       if (unsignedp)
1173         {
1174           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1175              need to be zero'd out.  */
1176           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1177             {
1178               unsigned int i, total_words;
1179
1180               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1181               for (i = nwords; i < total_words; i++)
1182                 {
1183                   int wordnum = WORDS_BIG_ENDIAN ? total_words - i - 1 : i;
1184                   rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1185                   emit_move_insn (target_part, const0_rtx);
1186                 }
1187             }
1188           return target;
1189         }
1190
1191       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1192       target = expand_shift (LSHIFT_EXPR, mode, target,
1193                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1194                              NULL_RTX, 0);
1195       return expand_shift (RSHIFT_EXPR, mode, target,
1196                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1197                            NULL_RTX, 0);
1198     }
1199
1200   /* From here on we know the desired field is smaller than a word.  */
1201
1202   /* Check if there is a correspondingly-sized integer field, so we can
1203      safely extract it as one size of integer, if necessary; then
1204      truncate or extend to the size that is wanted; then use SUBREGs or
1205      convert_to_mode to get one of the modes we really wanted.  */
1206
1207   int_mode = int_mode_for_mode (tmode);
1208   if (int_mode == BLKmode)
1209     int_mode = int_mode_for_mode (mode);
1210   if (int_mode == BLKmode)
1211     abort();    /* Should probably push op0 out to memory and then
1212                    do a load.  */
1213
1214   /* OFFSET is the number of words or bytes (UNIT says which)
1215      from STR_RTX to the first word or byte containing part of the field.  */
1216
1217   if (GET_CODE (op0) != MEM)
1218     {
1219       if (offset != 0
1220           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1221         {
1222           if (GET_CODE (op0) != REG)
1223             op0 = copy_to_reg (op0);
1224           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1225                                 op0, (offset * UNITS_PER_WORD));
1226         }
1227       offset = 0;
1228     }
1229   else
1230     {
1231       op0 = protect_from_queue (str_rtx, 1);
1232     }
1233
1234   /* Now OFFSET is nonzero only for memory operands.  */
1235
1236   if (unsignedp)
1237     {
1238 #ifdef HAVE_extzv
1239       if (HAVE_extzv
1240           && (extzv_bitsize >= bitsize)
1241           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1242                 && (bitsize + bitpos > extzv_bitsize)))
1243         {
1244           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1245           rtx bitsize_rtx, bitpos_rtx;
1246           rtx last = get_last_insn ();
1247           rtx xop0 = op0;
1248           rtx xtarget = target;
1249           rtx xspec_target = spec_target;
1250           rtx xspec_target_subreg = spec_target_subreg;
1251           rtx pat;
1252           enum machine_mode maxmode;
1253
1254           maxmode = insn_data[(int) CODE_FOR_extzv].operand[0].mode;
1255           if (maxmode == VOIDmode)
1256             maxmode = word_mode;
1257
1258           if (GET_CODE (xop0) == MEM)
1259             {
1260               int save_volatile_ok = volatile_ok;
1261               volatile_ok = 1;
1262
1263               /* Is the memory operand acceptable?  */
1264               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1265                      (xop0, GET_MODE (xop0))))
1266                 {
1267                   /* No, load into a reg and extract from there.  */
1268                   enum machine_mode bestmode;
1269
1270                   /* Get the mode to use for inserting into this field.  If
1271                      OP0 is BLKmode, get the smallest mode consistent with the
1272                      alignment. If OP0 is a non-BLKmode object that is no
1273                      wider than MAXMODE, use its mode. Otherwise, use the
1274                      smallest mode containing the field.  */
1275
1276                   if (GET_MODE (xop0) == BLKmode
1277                       || (GET_MODE_SIZE (GET_MODE (op0))
1278                           > GET_MODE_SIZE (maxmode)))
1279                     bestmode = get_best_mode (bitsize, bitnum, align, maxmode,
1280                                               MEM_VOLATILE_P (xop0));
1281                   else
1282                     bestmode = GET_MODE (xop0);
1283
1284                   if (bestmode == VOIDmode
1285                       || (SLOW_UNALIGNED_ACCESS (bestmode, align)
1286                           && GET_MODE_BITSIZE (bestmode) > align))
1287                     goto extzv_loses;
1288
1289                   /* Compute offset as multiple of this unit,
1290                      counting in bytes.  */
1291                   unit = GET_MODE_BITSIZE (bestmode);
1292                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1293                   xbitpos = bitnum % unit;
1294                   xop0 = adjust_address (xop0, bestmode, xoffset);
1295
1296                   /* Fetch it to a register in that size.  */
1297                   xop0 = force_reg (bestmode, xop0);
1298
1299                   /* XBITPOS counts within UNIT, which is what is expected.  */
1300                 }
1301               else
1302                 /* Get ref to first byte containing part of the field.  */
1303                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1304
1305               volatile_ok = save_volatile_ok;
1306             }
1307
1308           /* If op0 is a register, we need it in MAXMODE (which is usually
1309              SImode). to make it acceptable to the format of extzv.  */
1310           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1311             goto extzv_loses;
1312           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1313             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1314
1315           /* On big-endian machines, we count bits from the most significant.
1316              If the bit field insn does not, we must invert.  */
1317           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1318             xbitpos = unit - bitsize - xbitpos;
1319
1320           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1321           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1322             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1323
1324           unit = GET_MODE_BITSIZE (maxmode);
1325
1326           if (xtarget == 0
1327               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1328             xtarget = xspec_target = gen_reg_rtx (tmode);
1329
1330           if (GET_MODE (xtarget) != maxmode)
1331             {
1332               if (GET_CODE (xtarget) == REG)
1333                 {
1334                   int wider = (GET_MODE_SIZE (maxmode)
1335                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1336                   xtarget = gen_lowpart (maxmode, xtarget);
1337                   if (wider)
1338                     xspec_target_subreg = xtarget;
1339                 }
1340               else
1341                 xtarget = gen_reg_rtx (maxmode);
1342             }
1343
1344           /* If this machine's extzv insists on a register target,
1345              make sure we have one.  */
1346           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1347                  (xtarget, maxmode)))
1348             xtarget = gen_reg_rtx (maxmode);
1349
1350           bitsize_rtx = GEN_INT (bitsize);
1351           bitpos_rtx = GEN_INT (xbitpos);
1352
1353           pat = gen_extzv (protect_from_queue (xtarget, 1),
1354                            xop0, bitsize_rtx, bitpos_rtx);
1355           if (pat)
1356             {
1357               emit_insn (pat);
1358               target = xtarget;
1359               spec_target = xspec_target;
1360               spec_target_subreg = xspec_target_subreg;
1361             }
1362           else
1363             {
1364               delete_insns_since (last);
1365               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1366                                                 bitpos, target, 1, align);
1367             }
1368         }
1369       else
1370         extzv_loses:
1371 #endif
1372       target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1373                                         bitpos, target, 1, align);
1374     }
1375   else
1376     {
1377 #ifdef HAVE_extv
1378       if (HAVE_extv
1379           && (extv_bitsize >= bitsize)
1380           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1381                 && (bitsize + bitpos > extv_bitsize)))
1382         {
1383           int xbitpos = bitpos, xoffset = offset;
1384           rtx bitsize_rtx, bitpos_rtx;
1385           rtx last = get_last_insn ();
1386           rtx xop0 = op0, xtarget = target;
1387           rtx xspec_target = spec_target;
1388           rtx xspec_target_subreg = spec_target_subreg;
1389           rtx pat;
1390           enum machine_mode maxmode;
1391
1392           maxmode = insn_data[(int) CODE_FOR_extv].operand[0].mode;
1393           if (maxmode == VOIDmode)
1394             maxmode = word_mode;
1395
1396           if (GET_CODE (xop0) == MEM)
1397             {
1398               /* Is the memory operand acceptable?  */
1399               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1400                      (xop0, GET_MODE (xop0))))
1401                 {
1402                   /* No, load into a reg and extract from there.  */
1403                   enum machine_mode bestmode;
1404
1405                   /* Get the mode to use for inserting into this field.  If
1406                      OP0 is BLKmode, get the smallest mode consistent with the
1407                      alignment. If OP0 is a non-BLKmode object that is no
1408                      wider than MAXMODE, use its mode. Otherwise, use the
1409                      smallest mode containing the field.  */
1410
1411                   if (GET_MODE (xop0) == BLKmode
1412                       || (GET_MODE_SIZE (GET_MODE (op0))
1413                           > GET_MODE_SIZE (maxmode)))
1414                     bestmode = get_best_mode (bitsize, bitnum, align, maxmode,
1415                                               MEM_VOLATILE_P (xop0));
1416                   else
1417                     bestmode = GET_MODE (xop0);
1418
1419                   if (bestmode == VOIDmode
1420                       || (SLOW_UNALIGNED_ACCESS (bestmode, align)
1421                           && GET_MODE_BITSIZE (bestmode) > align))
1422                     goto extv_loses;
1423
1424                   /* Compute offset as multiple of this unit,
1425                      counting in bytes.  */
1426                   unit = GET_MODE_BITSIZE (bestmode);
1427                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1428                   xbitpos = bitnum % unit;
1429                   xop0 = adjust_address (xop0, bestmode, xoffset);
1430
1431                   /* Fetch it to a register in that size.  */
1432                   xop0 = force_reg (bestmode, xop0);
1433
1434                   /* XBITPOS counts within UNIT, which is what is expected.  */
1435                 }
1436               else
1437                 /* Get ref to first byte containing part of the field.  */
1438                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1439             }
1440
1441           /* If op0 is a register, we need it in MAXMODE (which is usually
1442              SImode) to make it acceptable to the format of extv.  */
1443           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1444             goto extv_loses;
1445           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1446             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1447
1448           /* On big-endian machines, we count bits from the most significant.
1449              If the bit field insn does not, we must invert.  */
1450           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1451             xbitpos = unit - bitsize - xbitpos;
1452
1453           /* XBITPOS counts within a size of UNIT.
1454              Adjust to count within a size of MAXMODE.  */
1455           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1456             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1457
1458           unit = GET_MODE_BITSIZE (maxmode);
1459
1460           if (xtarget == 0
1461               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1462             xtarget = xspec_target = gen_reg_rtx (tmode);
1463
1464           if (GET_MODE (xtarget) != maxmode)
1465             {
1466               if (GET_CODE (xtarget) == REG)
1467                 {
1468                   int wider = (GET_MODE_SIZE (maxmode)
1469                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1470                   xtarget = gen_lowpart (maxmode, xtarget);
1471                   if (wider)
1472                     xspec_target_subreg = xtarget;
1473                 }
1474               else
1475                 xtarget = gen_reg_rtx (maxmode);
1476             }
1477
1478           /* If this machine's extv insists on a register target,
1479              make sure we have one.  */
1480           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1481                  (xtarget, maxmode)))
1482             xtarget = gen_reg_rtx (maxmode);
1483
1484           bitsize_rtx = GEN_INT (bitsize);
1485           bitpos_rtx = GEN_INT (xbitpos);
1486
1487           pat = gen_extv (protect_from_queue (xtarget, 1),
1488                           xop0, bitsize_rtx, bitpos_rtx);
1489           if (pat)
1490             {
1491               emit_insn (pat);
1492               target = xtarget;
1493               spec_target = xspec_target;
1494               spec_target_subreg = xspec_target_subreg;
1495             }
1496           else
1497             {
1498               delete_insns_since (last);
1499               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1500                                                 bitpos, target, 0, align);
1501             }
1502         }
1503       else
1504         extv_loses:
1505 #endif
1506       target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1507                                         bitpos, target, 0, align);
1508     }
1509   if (target == spec_target)
1510     return target;
1511   if (target == spec_target_subreg)
1512     return spec_target;
1513   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1514     {
1515       /* If the target mode is floating-point, first convert to the
1516          integer mode of that size and then access it as a floating-point
1517          value via a SUBREG.  */
1518       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1519         {
1520           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1521                                                    MODE_INT, 0),
1522                                     target, unsignedp);
1523           if (GET_CODE (target) != REG)
1524             target = copy_to_reg (target);
1525           return gen_rtx_SUBREG (tmode, target, 0);
1526         }
1527       else
1528         return convert_to_mode (tmode, target, unsignedp);
1529     }
1530   return target;
1531 }
1532 \f
1533 /* Extract a bit field using shifts and boolean operations
1534    Returns an rtx to represent the value.
1535    OP0 addresses a register (word) or memory (byte).
1536    BITPOS says which bit within the word or byte the bit field starts in.
1537    OFFSET says how many bytes farther the bit field starts;
1538     it is 0 if OP0 is a register.
1539    BITSIZE says how many bits long the bit field is.
1540     (If OP0 is a register, it may be narrower than a full word,
1541      but BITPOS still counts within a full word,
1542      which is significant on bigendian machines.)
1543
1544    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1545    If TARGET is nonzero, attempts to store the value there
1546    and return TARGET, but this is not guaranteed.
1547    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.
1548
1549    ALIGN is the alignment that STR_RTX is known to have.  */
1550
1551 static rtx
1552 extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1553                          target, unsignedp, align)
1554      enum machine_mode tmode;
1555      register rtx op0, target;
1556      unsigned HOST_WIDE_INT offset, bitsize, bitpos;
1557      int unsignedp;
1558      unsigned int align;
1559 {
1560   unsigned int total_bits = BITS_PER_WORD;
1561   enum machine_mode mode;
1562
1563   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1564     {
1565       /* Special treatment for a bit field split across two registers.  */
1566       if (bitsize + bitpos > BITS_PER_WORD)
1567         return extract_split_bit_field (op0, bitsize, bitpos,
1568                                         unsignedp, align);
1569     }
1570   else
1571     {
1572       /* Get the proper mode to use for this field.  We want a mode that
1573          includes the entire field.  If such a mode would be larger than
1574          a word, we won't be doing the extraction the normal way.  */
1575
1576       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, align,
1577                             word_mode,
1578                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
1579
1580       if (mode == VOIDmode)
1581         /* The only way this should occur is if the field spans word
1582            boundaries.  */
1583         return extract_split_bit_field (op0, bitsize,
1584                                         bitpos + offset * BITS_PER_UNIT,
1585                                         unsignedp, align);
1586
1587       total_bits = GET_MODE_BITSIZE (mode);
1588
1589       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1590          be in the range 0 to total_bits-1, and put any excess bytes in
1591          OFFSET.  */
1592       if (bitpos >= total_bits)
1593         {
1594           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1595           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1596                      * BITS_PER_UNIT);
1597         }
1598
1599       /* Get ref to an aligned byte, halfword, or word containing the field.
1600          Adjust BITPOS to be position within a word,
1601          and OFFSET to be the offset of that word.
1602          Then alter OP0 to refer to that word.  */
1603       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1604       offset -= (offset % (total_bits / BITS_PER_UNIT));
1605       op0 = adjust_address (op0, mode, offset);
1606     }
1607
1608   mode = GET_MODE (op0);
1609
1610   if (BYTES_BIG_ENDIAN)
1611     {
1612       /* BITPOS is the distance between our msb and that of OP0.
1613          Convert it to the distance from the lsb.  */
1614
1615       bitpos = total_bits - bitsize - bitpos;
1616     }
1617
1618   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1619      We have reduced the big-endian case to the little-endian case.  */
1620
1621   if (unsignedp)
1622     {
1623       if (bitpos)
1624         {
1625           /* If the field does not already start at the lsb,
1626              shift it so it does.  */
1627           tree amount = build_int_2 (bitpos, 0);
1628           /* Maybe propagate the target for the shift.  */
1629           /* But not if we will return it--could confuse integrate.c.  */
1630           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1631                            && !REG_FUNCTION_VALUE_P (target)
1632                            ? target : 0);
1633           if (tmode != mode) subtarget = 0;
1634           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1635         }
1636       /* Convert the value to the desired mode.  */
1637       if (mode != tmode)
1638         op0 = convert_to_mode (tmode, op0, 1);
1639
1640       /* Unless the msb of the field used to be the msb when we shifted,
1641          mask out the upper bits.  */
1642
1643       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize
1644 #if 0
1645 #ifdef SLOW_ZERO_EXTEND
1646           /* Always generate an `and' if
1647              we just zero-extended op0 and SLOW_ZERO_EXTEND, since it
1648              will combine fruitfully with the zero-extend.  */
1649           || tmode != mode
1650 #endif
1651 #endif
1652           )
1653         return expand_binop (GET_MODE (op0), and_optab, op0,
1654                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1655                              target, 1, OPTAB_LIB_WIDEN);
1656       return op0;
1657     }
1658
1659   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1660      then arithmetic-shift its lsb to the lsb of the word.  */
1661   op0 = force_reg (mode, op0);
1662   if (mode != tmode)
1663     target = 0;
1664
1665   /* Find the narrowest integer mode that contains the field.  */
1666
1667   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1668        mode = GET_MODE_WIDER_MODE (mode))
1669     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1670       {
1671         op0 = convert_to_mode (mode, op0, 0);
1672         break;
1673       }
1674
1675   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1676     {
1677       tree amount = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1678       /* Maybe propagate the target for the shift.  */
1679       /* But not if we will return the result--could confuse integrate.c.  */
1680       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1681                        && ! REG_FUNCTION_VALUE_P (target)
1682                        ? target : 0);
1683       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1684     }
1685
1686   return expand_shift (RSHIFT_EXPR, mode, op0,
1687                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1688                        target, 0);
1689 }
1690 \f
1691 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1692    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1693    complement of that if COMPLEMENT.  The mask is truncated if
1694    necessary to the width of mode MODE.  The mask is zero-extended if
1695    BITSIZE+BITPOS is too small for MODE.  */
1696
1697 static rtx
1698 mask_rtx (mode, bitpos, bitsize, complement)
1699      enum machine_mode mode;
1700      int bitpos, bitsize, complement;
1701 {
1702   HOST_WIDE_INT masklow, maskhigh;
1703
1704   if (bitpos < HOST_BITS_PER_WIDE_INT)
1705     masklow = (HOST_WIDE_INT) -1 << bitpos;
1706   else
1707     masklow = 0;
1708
1709   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1710     masklow &= ((unsigned HOST_WIDE_INT) -1
1711                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1712
1713   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1714     maskhigh = -1;
1715   else
1716     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1717
1718   if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1719     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1720                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1721   else
1722     maskhigh = 0;
1723
1724   if (complement)
1725     {
1726       maskhigh = ~maskhigh;
1727       masklow = ~masklow;
1728     }
1729
1730   return immed_double_const (masklow, maskhigh, mode);
1731 }
1732
1733 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1734    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1735
1736 static rtx
1737 lshift_value (mode, value, bitpos, bitsize)
1738      enum machine_mode mode;
1739      rtx value;
1740      int bitpos, bitsize;
1741 {
1742   unsigned HOST_WIDE_INT v = INTVAL (value);
1743   HOST_WIDE_INT low, high;
1744
1745   if (bitsize < HOST_BITS_PER_WIDE_INT)
1746     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1747
1748   if (bitpos < HOST_BITS_PER_WIDE_INT)
1749     {
1750       low = v << bitpos;
1751       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1752     }
1753   else
1754     {
1755       low = 0;
1756       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1757     }
1758
1759   return immed_double_const (low, high, mode);
1760 }
1761 \f
1762 /* Extract a bit field that is split across two words
1763    and return an RTX for the result.
1764
1765    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1766    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1767    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
1768
1769    ALIGN is the known alignment of OP0.  This is also the size of the
1770    memory objects to be used.  */
1771
1772 static rtx
1773 extract_split_bit_field (op0, bitsize, bitpos, unsignedp, align)
1774      rtx op0;
1775      unsigned HOST_WIDE_INT bitsize, bitpos;
1776      int unsignedp;
1777      unsigned int align;
1778 {
1779   unsigned int unit;
1780   unsigned int bitsdone = 0;
1781   rtx result = NULL_RTX;
1782   int first = 1;
1783
1784   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1785      much at a time.  */
1786   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1787     unit = BITS_PER_WORD;
1788   else
1789     unit = MIN (align, BITS_PER_WORD);
1790
1791   while (bitsdone < bitsize)
1792     {
1793       unsigned HOST_WIDE_INT thissize;
1794       rtx part, word;
1795       unsigned HOST_WIDE_INT thispos;
1796       unsigned HOST_WIDE_INT offset;
1797
1798       offset = (bitpos + bitsdone) / unit;
1799       thispos = (bitpos + bitsdone) % unit;
1800
1801       /* THISSIZE must not overrun a word boundary.  Otherwise,
1802          extract_fixed_bit_field will call us again, and we will mutually
1803          recurse forever.  */
1804       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1805       thissize = MIN (thissize, unit - thispos);
1806
1807       /* If OP0 is a register, then handle OFFSET here.
1808
1809          When handling multiword bitfields, extract_bit_field may pass
1810          down a word_mode SUBREG of a larger REG for a bitfield that actually
1811          crosses a word boundary.  Thus, for a SUBREG, we must find
1812          the current word starting from the base register.  */
1813       if (GET_CODE (op0) == SUBREG)
1814         {
1815           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1816           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1817                                         GET_MODE (SUBREG_REG (op0)));
1818           offset = 0;
1819         }
1820       else if (GET_CODE (op0) == REG)
1821         {
1822           word = operand_subword_force (op0, offset, GET_MODE (op0));
1823           offset = 0;
1824         }
1825       else
1826         word = op0;
1827
1828       /* Extract the parts in bit-counting order,
1829          whose meaning is determined by BYTES_PER_UNIT.
1830          OFFSET is in UNITs, and UNIT is in bits.
1831          extract_fixed_bit_field wants offset in bytes.  */
1832       part = extract_fixed_bit_field (word_mode, word,
1833                                       offset * unit / BITS_PER_UNIT,
1834                                       thissize, thispos, 0, 1, align);
1835       bitsdone += thissize;
1836
1837       /* Shift this part into place for the result.  */
1838       if (BYTES_BIG_ENDIAN)
1839         {
1840           if (bitsize != bitsdone)
1841             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1842                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1843         }
1844       else
1845         {
1846           if (bitsdone != thissize)
1847             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1848                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1849         }
1850
1851       if (first)
1852         result = part;
1853       else
1854         /* Combine the parts with bitwise or.  This works
1855            because we extracted each part as an unsigned bit field.  */
1856         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1857                                OPTAB_LIB_WIDEN);
1858
1859       first = 0;
1860     }
1861
1862   /* Unsigned bit field: we are done.  */
1863   if (unsignedp)
1864     return result;
1865   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1866   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1867                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1868                          NULL_RTX, 0);
1869   return expand_shift (RSHIFT_EXPR, word_mode, result,
1870                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1871 }
1872 \f
1873 /* Add INC into TARGET.  */
1874
1875 void
1876 expand_inc (target, inc)
1877      rtx target, inc;
1878 {
1879   rtx value = expand_binop (GET_MODE (target), add_optab,
1880                             target, inc,
1881                             target, 0, OPTAB_LIB_WIDEN);
1882   if (value != target)
1883     emit_move_insn (target, value);
1884 }
1885
1886 /* Subtract DEC from TARGET.  */
1887
1888 void
1889 expand_dec (target, dec)
1890      rtx target, dec;
1891 {
1892   rtx value = expand_binop (GET_MODE (target), sub_optab,
1893                             target, dec,
1894                             target, 0, OPTAB_LIB_WIDEN);
1895   if (value != target)
1896     emit_move_insn (target, value);
1897 }
1898 \f
1899 /* Output a shift instruction for expression code CODE,
1900    with SHIFTED being the rtx for the value to shift,
1901    and AMOUNT the tree for the amount to shift by.
1902    Store the result in the rtx TARGET, if that is convenient.
1903    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1904    Return the rtx for where the value is.  */
1905
1906 rtx
1907 expand_shift (code, mode, shifted, amount, target, unsignedp)
1908      enum tree_code code;
1909      register enum machine_mode mode;
1910      rtx shifted;
1911      tree amount;
1912      register rtx target;
1913      int unsignedp;
1914 {
1915   register rtx op1, temp = 0;
1916   register int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1917   register int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1918   int try;
1919
1920   /* Previously detected shift-counts computed by NEGATE_EXPR
1921      and shifted in the other direction; but that does not work
1922      on all machines.  */
1923
1924   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1925
1926 #ifdef SHIFT_COUNT_TRUNCATED
1927   if (SHIFT_COUNT_TRUNCATED)
1928     {
1929       if (GET_CODE (op1) == CONST_INT
1930           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
1931               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
1932         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1933                        % GET_MODE_BITSIZE (mode));
1934       else if (GET_CODE (op1) == SUBREG
1935                && SUBREG_BYTE (op1) == 0)
1936         op1 = SUBREG_REG (op1);
1937     }
1938 #endif
1939
1940   if (op1 == const0_rtx)
1941     return shifted;
1942
1943   for (try = 0; temp == 0 && try < 3; try++)
1944     {
1945       enum optab_methods methods;
1946
1947       if (try == 0)
1948         methods = OPTAB_DIRECT;
1949       else if (try == 1)
1950         methods = OPTAB_WIDEN;
1951       else
1952         methods = OPTAB_LIB_WIDEN;
1953
1954       if (rotate)
1955         {
1956           /* Widening does not work for rotation.  */
1957           if (methods == OPTAB_WIDEN)
1958             continue;
1959           else if (methods == OPTAB_LIB_WIDEN)
1960             {
1961               /* If we have been unable to open-code this by a rotation,
1962                  do it as the IOR of two shifts.  I.e., to rotate A
1963                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1964                  where C is the bitsize of A.
1965
1966                  It is theoretically possible that the target machine might
1967                  not be able to perform either shift and hence we would
1968                  be making two libcalls rather than just the one for the
1969                  shift (similarly if IOR could not be done).  We will allow
1970                  this extremely unlikely lossage to avoid complicating the
1971                  code below.  */
1972
1973               rtx subtarget = target == shifted ? 0 : target;
1974               rtx temp1;
1975               tree type = TREE_TYPE (amount);
1976               tree new_amount = make_tree (type, op1);
1977               tree other_amount
1978                 = fold (build (MINUS_EXPR, type,
1979                                convert (type,
1980                                         build_int_2 (GET_MODE_BITSIZE (mode),
1981                                                      0)),
1982                                amount));
1983
1984               shifted = force_reg (mode, shifted);
1985
1986               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1987                                    mode, shifted, new_amount, subtarget, 1);
1988               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1989                                     mode, shifted, other_amount, 0, 1);
1990               return expand_binop (mode, ior_optab, temp, temp1, target,
1991                                    unsignedp, methods);
1992             }
1993
1994           temp = expand_binop (mode,
1995                                left ? rotl_optab : rotr_optab,
1996                                shifted, op1, target, unsignedp, methods);
1997
1998           /* If we don't have the rotate, but we are rotating by a constant
1999              that is in range, try a rotate in the opposite direction.  */
2000
2001           if (temp == 0 && GET_CODE (op1) == CONST_INT
2002               && INTVAL (op1) > 0 && INTVAL (op1) < GET_MODE_BITSIZE (mode))
2003             temp = expand_binop (mode,
2004                                  left ? rotr_optab : rotl_optab,
2005                                  shifted,
2006                                  GEN_INT (GET_MODE_BITSIZE (mode)
2007                                           - INTVAL (op1)),
2008                                  target, unsignedp, methods);
2009         }
2010       else if (unsignedp)
2011         temp = expand_binop (mode,
2012                              left ? ashl_optab : lshr_optab,
2013                              shifted, op1, target, unsignedp, methods);
2014
2015       /* Do arithmetic shifts.
2016          Also, if we are going to widen the operand, we can just as well
2017          use an arithmetic right-shift instead of a logical one.  */
2018       if (temp == 0 && ! rotate
2019           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2020         {
2021           enum optab_methods methods1 = methods;
2022
2023           /* If trying to widen a log shift to an arithmetic shift,
2024              don't accept an arithmetic shift of the same size.  */
2025           if (unsignedp)
2026             methods1 = OPTAB_MUST_WIDEN;
2027
2028           /* Arithmetic shift */
2029
2030           temp = expand_binop (mode,
2031                                left ? ashl_optab : ashr_optab,
2032                                shifted, op1, target, unsignedp, methods1);
2033         }
2034
2035       /* We used to try extzv here for logical right shifts, but that was
2036          only useful for one machine, the VAX, and caused poor code
2037          generation there for lshrdi3, so the code was deleted and a
2038          define_expand for lshrsi3 was added to vax.md.  */
2039     }
2040
2041   if (temp == 0)
2042     abort ();
2043   return temp;
2044 }
2045 \f
2046 enum alg_code { alg_zero, alg_m, alg_shift,
2047                   alg_add_t_m2, alg_sub_t_m2,
2048                   alg_add_factor, alg_sub_factor,
2049                   alg_add_t2_m, alg_sub_t2_m,
2050                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2051
2052 /* This structure records a sequence of operations.
2053    `ops' is the number of operations recorded.
2054    `cost' is their total cost.
2055    The operations are stored in `op' and the corresponding
2056    logarithms of the integer coefficients in `log'.
2057
2058    These are the operations:
2059    alg_zero             total := 0;
2060    alg_m                total := multiplicand;
2061    alg_shift            total := total * coeff
2062    alg_add_t_m2         total := total + multiplicand * coeff;
2063    alg_sub_t_m2         total := total - multiplicand * coeff;
2064    alg_add_factor       total := total * coeff + total;
2065    alg_sub_factor       total := total * coeff - total;
2066    alg_add_t2_m         total := total * coeff + multiplicand;
2067    alg_sub_t2_m         total := total * coeff - multiplicand;
2068
2069    The first operand must be either alg_zero or alg_m.  */
2070
2071 struct algorithm
2072 {
2073   short cost;
2074   short ops;
2075   /* The size of the OP and LOG fields are not directly related to the
2076      word size, but the worst-case algorithms will be if we have few
2077      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2078      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2079      in total wordsize operations.  */
2080   enum alg_code op[MAX_BITS_PER_WORD];
2081   char log[MAX_BITS_PER_WORD];
2082 };
2083
2084 static void synth_mult                  PARAMS ((struct algorithm *,
2085                                                  unsigned HOST_WIDE_INT,
2086                                                  int));
2087 static unsigned HOST_WIDE_INT choose_multiplier PARAMS ((unsigned HOST_WIDE_INT,
2088                                                          int, int,
2089                                                          unsigned HOST_WIDE_INT *,
2090                                                          int *, int *));
2091 static unsigned HOST_WIDE_INT invert_mod2n      PARAMS ((unsigned HOST_WIDE_INT,
2092                                                          int));
2093 /* Compute and return the best algorithm for multiplying by T.
2094    The algorithm must cost less than cost_limit
2095    If retval.cost >= COST_LIMIT, no algorithm was found and all
2096    other field of the returned struct are undefined.  */
2097
2098 static void
2099 synth_mult (alg_out, t, cost_limit)
2100      struct algorithm *alg_out;
2101      unsigned HOST_WIDE_INT t;
2102      int cost_limit;
2103 {
2104   int m;
2105   struct algorithm *alg_in, *best_alg;
2106   int cost;
2107   unsigned HOST_WIDE_INT q;
2108
2109   /* Indicate that no algorithm is yet found.  If no algorithm
2110      is found, this value will be returned and indicate failure.  */
2111   alg_out->cost = cost_limit;
2112
2113   if (cost_limit <= 0)
2114     return;
2115
2116   /* t == 1 can be done in zero cost.  */
2117   if (t == 1)
2118     {
2119       alg_out->ops = 1;
2120       alg_out->cost = 0;
2121       alg_out->op[0] = alg_m;
2122       return;
2123     }
2124
2125   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2126      fail now.  */
2127   if (t == 0)
2128     {
2129       if (zero_cost >= cost_limit)
2130         return;
2131       else
2132         {
2133           alg_out->ops = 1;
2134           alg_out->cost = zero_cost;
2135           alg_out->op[0] = alg_zero;
2136           return;
2137         }
2138     }
2139
2140   /* We'll be needing a couple extra algorithm structures now.  */
2141
2142   alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
2143   best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
2144
2145   /* If we have a group of zero bits at the low-order part of T, try
2146      multiplying by the remaining bits and then doing a shift.  */
2147
2148   if ((t & 1) == 0)
2149     {
2150       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2151       if (m < BITS_PER_WORD)
2152         {
2153           q = t >> m;
2154           cost = shift_cost[m];
2155           synth_mult (alg_in, q, cost_limit - cost);
2156
2157           cost += alg_in->cost;
2158           if (cost < cost_limit)
2159             {
2160               struct algorithm *x;
2161               x = alg_in, alg_in = best_alg, best_alg = x;
2162               best_alg->log[best_alg->ops] = m;
2163               best_alg->op[best_alg->ops] = alg_shift;
2164               cost_limit = cost;
2165             }
2166         }
2167     }
2168
2169   /* If we have an odd number, add or subtract one.  */
2170   if ((t & 1) != 0)
2171     {
2172       unsigned HOST_WIDE_INT w;
2173
2174       for (w = 1; (w & t) != 0; w <<= 1)
2175         ;
2176       /* If T was -1, then W will be zero after the loop.  This is another
2177          case where T ends with ...111.  Handling this with (T + 1) and
2178          subtract 1 produces slightly better code and results in algorithm
2179          selection much faster than treating it like the ...0111 case
2180          below.  */
2181       if (w == 0
2182           || (w > 2
2183               /* Reject the case where t is 3.
2184                  Thus we prefer addition in that case.  */
2185               && t != 3))
2186         {
2187           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2188
2189           cost = add_cost;
2190           synth_mult (alg_in, t + 1, cost_limit - cost);
2191
2192           cost += alg_in->cost;
2193           if (cost < cost_limit)
2194             {
2195               struct algorithm *x;
2196               x = alg_in, alg_in = best_alg, best_alg = x;
2197               best_alg->log[best_alg->ops] = 0;
2198               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2199               cost_limit = cost;
2200             }
2201         }
2202       else
2203         {
2204           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2205
2206           cost = add_cost;
2207           synth_mult (alg_in, t - 1, cost_limit - cost);
2208
2209           cost += alg_in->cost;
2210           if (cost < cost_limit)
2211             {
2212               struct algorithm *x;
2213               x = alg_in, alg_in = best_alg, best_alg = x;
2214               best_alg->log[best_alg->ops] = 0;
2215               best_alg->op[best_alg->ops] = alg_add_t_m2;
2216               cost_limit = cost;
2217             }
2218         }
2219     }
2220
2221   /* Look for factors of t of the form
2222      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2223      If we find such a factor, we can multiply by t using an algorithm that
2224      multiplies by q, shift the result by m and add/subtract it to itself.
2225
2226      We search for large factors first and loop down, even if large factors
2227      are less probable than small; if we find a large factor we will find a
2228      good sequence quickly, and therefore be able to prune (by decreasing
2229      COST_LIMIT) the search.  */
2230
2231   for (m = floor_log2 (t - 1); m >= 2; m--)
2232     {
2233       unsigned HOST_WIDE_INT d;
2234
2235       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2236       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2237         {
2238           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2239           synth_mult (alg_in, t / d, cost_limit - cost);
2240
2241           cost += alg_in->cost;
2242           if (cost < cost_limit)
2243             {
2244               struct algorithm *x;
2245               x = alg_in, alg_in = best_alg, best_alg = x;
2246               best_alg->log[best_alg->ops] = m;
2247               best_alg->op[best_alg->ops] = alg_add_factor;
2248               cost_limit = cost;
2249             }
2250           /* Other factors will have been taken care of in the recursion.  */
2251           break;
2252         }
2253
2254       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2255       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2256         {
2257           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2258           synth_mult (alg_in, t / d, cost_limit - cost);
2259
2260           cost += alg_in->cost;
2261           if (cost < cost_limit)
2262             {
2263               struct algorithm *x;
2264               x = alg_in, alg_in = best_alg, best_alg = x;
2265               best_alg->log[best_alg->ops] = m;
2266               best_alg->op[best_alg->ops] = alg_sub_factor;
2267               cost_limit = cost;
2268             }
2269           break;
2270         }
2271     }
2272
2273   /* Try shift-and-add (load effective address) instructions,
2274      i.e. do a*3, a*5, a*9.  */
2275   if ((t & 1) != 0)
2276     {
2277       q = t - 1;
2278       q = q & -q;
2279       m = exact_log2 (q);
2280       if (m >= 0 && m < BITS_PER_WORD)
2281         {
2282           cost = shiftadd_cost[m];
2283           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2284
2285           cost += alg_in->cost;
2286           if (cost < cost_limit)
2287             {
2288               struct algorithm *x;
2289               x = alg_in, alg_in = best_alg, best_alg = x;
2290               best_alg->log[best_alg->ops] = m;
2291               best_alg->op[best_alg->ops] = alg_add_t2_m;
2292               cost_limit = cost;
2293             }
2294         }
2295
2296       q = t + 1;
2297       q = q & -q;
2298       m = exact_log2 (q);
2299       if (m >= 0 && m < BITS_PER_WORD)
2300         {
2301           cost = shiftsub_cost[m];
2302           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2303
2304           cost += alg_in->cost;
2305           if (cost < cost_limit)
2306             {
2307               struct algorithm *x;
2308               x = alg_in, alg_in = best_alg, best_alg = x;
2309               best_alg->log[best_alg->ops] = m;
2310               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2311               cost_limit = cost;
2312             }
2313         }
2314     }
2315
2316   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2317      we have not found any algorithm.  */
2318   if (cost_limit == alg_out->cost)
2319     return;
2320
2321   /* If we are getting a too long sequence for `struct algorithm'
2322      to record, make this search fail.  */
2323   if (best_alg->ops == MAX_BITS_PER_WORD)
2324     return;
2325
2326   /* Copy the algorithm from temporary space to the space at alg_out.
2327      We avoid using structure assignment because the majority of
2328      best_alg is normally undefined, and this is a critical function.  */
2329   alg_out->ops = best_alg->ops + 1;
2330   alg_out->cost = cost_limit;
2331   memcpy (alg_out->op, best_alg->op,
2332           alg_out->ops * sizeof *alg_out->op);
2333   memcpy (alg_out->log, best_alg->log,
2334           alg_out->ops * sizeof *alg_out->log);
2335 }
2336 \f
2337 /* Perform a multiplication and return an rtx for the result.
2338    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2339    TARGET is a suggestion for where to store the result (an rtx).
2340
2341    We check specially for a constant integer as OP1.
2342    If you want this check for OP0 as well, then before calling
2343    you should swap the two operands if OP0 would be constant.  */
2344
2345 rtx
2346 expand_mult (mode, op0, op1, target, unsignedp)
2347      enum machine_mode mode;
2348      register rtx op0, op1, target;
2349      int unsignedp;
2350 {
2351   rtx const_op1 = op1;
2352
2353   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2354      less than or equal in size to `unsigned int' this doesn't matter.
2355      If the mode is larger than `unsigned int', then synth_mult works only
2356      if the constant value exactly fits in an `unsigned int' without any
2357      truncation.  This means that multiplying by negative values does
2358      not work; results are off by 2^32 on a 32 bit machine.  */
2359
2360   /* If we are multiplying in DImode, it may still be a win
2361      to try to work with shifts and adds.  */
2362   if (GET_CODE (op1) == CONST_DOUBLE
2363       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2364       && HOST_BITS_PER_INT >= BITS_PER_WORD
2365       && CONST_DOUBLE_HIGH (op1) == 0)
2366     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2367   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2368            && GET_CODE (op1) == CONST_INT
2369            && INTVAL (op1) < 0)
2370     const_op1 = 0;
2371
2372   /* We used to test optimize here, on the grounds that it's better to
2373      produce a smaller program when -O is not used.
2374      But this causes such a terrible slowdown sometimes
2375      that it seems better to use synth_mult always.  */
2376
2377   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2378       && (unsignedp || ! flag_trapv))
2379     {
2380       struct algorithm alg;
2381       struct algorithm alg2;
2382       HOST_WIDE_INT val = INTVAL (op1);
2383       HOST_WIDE_INT val_so_far;
2384       rtx insn;
2385       int mult_cost;
2386       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2387
2388       /* Try to do the computation three ways: multiply by the negative of OP1
2389          and then negate, do the multiplication directly, or do multiplication
2390          by OP1 - 1.  */
2391
2392       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2393       mult_cost = MIN (12 * add_cost, mult_cost);
2394
2395       synth_mult (&alg, val, mult_cost);
2396
2397       /* This works only if the inverted value actually fits in an
2398          `unsigned int' */
2399       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2400         {
2401           synth_mult (&alg2, - val,
2402                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2403           if (alg2.cost + negate_cost < alg.cost)
2404             alg = alg2, variant = negate_variant;
2405         }
2406
2407       /* This proves very useful for division-by-constant.  */
2408       synth_mult (&alg2, val - 1,
2409                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2410       if (alg2.cost + add_cost < alg.cost)
2411         alg = alg2, variant = add_variant;
2412
2413       if (alg.cost < mult_cost)
2414         {
2415           /* We found something cheaper than a multiply insn.  */
2416           int opno;
2417           rtx accum, tem;
2418           enum machine_mode nmode;
2419
2420           op0 = protect_from_queue (op0, 0);
2421
2422           /* Avoid referencing memory over and over.
2423              For speed, but also for correctness when mem is volatile.  */
2424           if (GET_CODE (op0) == MEM)
2425             op0 = force_reg (mode, op0);
2426
2427           /* ACCUM starts out either as OP0 or as a zero, depending on
2428              the first operation.  */
2429
2430           if (alg.op[0] == alg_zero)
2431             {
2432               accum = copy_to_mode_reg (mode, const0_rtx);
2433               val_so_far = 0;
2434             }
2435           else if (alg.op[0] == alg_m)
2436             {
2437               accum = copy_to_mode_reg (mode, op0);
2438               val_so_far = 1;
2439             }
2440           else
2441             abort ();
2442
2443           for (opno = 1; opno < alg.ops; opno++)
2444             {
2445               int log = alg.log[opno];
2446               int preserve = preserve_subexpressions_p ();
2447               rtx shift_subtarget = preserve ? 0 : accum;
2448               rtx add_target
2449                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2450                    && ! preserve)
2451                   ? target : 0;
2452               rtx accum_target = preserve ? 0 : accum;
2453
2454               switch (alg.op[opno])
2455                 {
2456                 case alg_shift:
2457                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2458                                         build_int_2 (log, 0), NULL_RTX, 0);
2459                   val_so_far <<= log;
2460                   break;
2461
2462                 case alg_add_t_m2:
2463                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2464                                       build_int_2 (log, 0), NULL_RTX, 0);
2465                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2466                                          add_target
2467                                          ? add_target : accum_target);
2468                   val_so_far += (HOST_WIDE_INT) 1 << log;
2469                   break;
2470
2471                 case alg_sub_t_m2:
2472                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2473                                       build_int_2 (log, 0), NULL_RTX, 0);
2474                   accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2475                                          add_target
2476                                          ? add_target : accum_target);
2477                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2478                   break;
2479
2480                 case alg_add_t2_m:
2481                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2482                                         build_int_2 (log, 0), shift_subtarget,
2483                                         0);
2484                   accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2485                                          add_target
2486                                          ? add_target : accum_target);
2487                   val_so_far = (val_so_far << log) + 1;
2488                   break;
2489
2490                 case alg_sub_t2_m:
2491                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2492                                         build_int_2 (log, 0), shift_subtarget,
2493                                         0);
2494                   accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2495                                          add_target
2496                                          ? add_target : accum_target);
2497                   val_so_far = (val_so_far << log) - 1;
2498                   break;
2499
2500                 case alg_add_factor:
2501                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2502                                       build_int_2 (log, 0), NULL_RTX, 0);
2503                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2504                                          add_target
2505                                          ? add_target : accum_target);
2506                   val_so_far += val_so_far << log;
2507                   break;
2508
2509                 case alg_sub_factor:
2510                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2511                                       build_int_2 (log, 0), NULL_RTX, 0);
2512                   accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2513                                          (add_target ? add_target
2514                                           : preserve ? 0 : tem));
2515                   val_so_far = (val_so_far << log) - val_so_far;
2516                   break;
2517
2518                 default:
2519                   abort ();
2520                 }
2521
2522               /* Write a REG_EQUAL note on the last insn so that we can cse
2523                  multiplication sequences.  Note that if ACCUM is a SUBREG,
2524                  we've set the inner register and must properly indicate
2525                  that.  */
2526
2527               tem = op0, nmode = mode;
2528               if (GET_CODE (accum) == SUBREG)
2529                 {
2530                   nmode = GET_MODE (SUBREG_REG (accum));
2531                   tem = gen_lowpart (nmode, op0);
2532                 }
2533
2534               insn = get_last_insn ();
2535               set_unique_reg_note (insn,
2536                                    REG_EQUAL,
2537                                    gen_rtx_MULT (nmode, tem,
2538                                                  GEN_INT (val_so_far)));
2539             }
2540
2541           if (variant == negate_variant)
2542             {
2543               val_so_far = - val_so_far;
2544               accum = expand_unop (mode, neg_optab, accum, target, 0);
2545             }
2546           else if (variant == add_variant)
2547             {
2548               val_so_far = val_so_far + 1;
2549               accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2550             }
2551
2552           if (val != val_so_far)
2553             abort ();
2554
2555           return accum;
2556         }
2557     }
2558
2559   /* This used to use umul_optab if unsigned, but for non-widening multiply
2560      there is no difference between signed and unsigned.  */
2561   op0 = expand_binop (mode,
2562                       ! unsignedp
2563                        && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2564                        ? smulv_optab : smul_optab,
2565                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2566   if (op0 == 0)
2567     abort ();
2568   return op0;
2569 }
2570 \f
2571 /* Return the smallest n such that 2**n >= X.  */
2572
2573 int
2574 ceil_log2 (x)
2575      unsigned HOST_WIDE_INT x;
2576 {
2577   return floor_log2 (x - 1) + 1;
2578 }
2579
2580 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2581    replace division by D, and put the least significant N bits of the result
2582    in *MULTIPLIER_PTR and return the most significant bit.
2583
2584    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2585    needed precision is in PRECISION (should be <= N).
2586
2587    PRECISION should be as small as possible so this function can choose
2588    multiplier more freely.
2589
2590    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2591    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2592
2593    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2594    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2595
2596 static
2597 unsigned HOST_WIDE_INT
2598 choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
2599      unsigned HOST_WIDE_INT d;
2600      int n;
2601      int precision;
2602      unsigned HOST_WIDE_INT *multiplier_ptr;
2603      int *post_shift_ptr;
2604      int *lgup_ptr;
2605 {
2606   HOST_WIDE_INT mhigh_hi, mlow_hi;
2607   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2608   int lgup, post_shift;
2609   int pow, pow2;
2610   unsigned HOST_WIDE_INT nl, dummy1;
2611   HOST_WIDE_INT nh, dummy2;
2612
2613   /* lgup = ceil(log2(divisor)); */
2614   lgup = ceil_log2 (d);
2615
2616   if (lgup > n)
2617     abort ();
2618
2619   pow = n + lgup;
2620   pow2 = n + lgup - precision;
2621
2622   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2623     {
2624       /* We could handle this with some effort, but this case is much better
2625          handled directly with a scc insn, so rely on caller using that.  */
2626       abort ();
2627     }
2628
2629   /* mlow = 2^(N + lgup)/d */
2630  if (pow >= HOST_BITS_PER_WIDE_INT)
2631     {
2632       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2633       nl = 0;
2634     }
2635   else
2636     {
2637       nh = 0;
2638       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2639     }
2640   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2641                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2642
2643   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2644   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2645     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2646   else
2647     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2648   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2649                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2650
2651   if (mhigh_hi && nh - d >= d)
2652     abort ();
2653   if (mhigh_hi > 1 || mlow_hi > 1)
2654     abort ();
2655   /* assert that mlow < mhigh.  */
2656   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2657     abort();
2658
2659   /* If precision == N, then mlow, mhigh exceed 2^N
2660      (but they do not exceed 2^(N+1)).  */
2661
2662   /* Reduce to lowest terms */
2663   for (post_shift = lgup; post_shift > 0; post_shift--)
2664     {
2665       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2666       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2667       if (ml_lo >= mh_lo)
2668         break;
2669
2670       mlow_hi = 0;
2671       mlow_lo = ml_lo;
2672       mhigh_hi = 0;
2673       mhigh_lo = mh_lo;
2674     }
2675
2676   *post_shift_ptr = post_shift;
2677   *lgup_ptr = lgup;
2678   if (n < HOST_BITS_PER_WIDE_INT)
2679     {
2680       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2681       *multiplier_ptr = mhigh_lo & mask;
2682       return mhigh_lo >= mask;
2683     }
2684   else
2685     {
2686       *multiplier_ptr = mhigh_lo;
2687       return mhigh_hi;
2688     }
2689 }
2690
2691 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2692    congruent to 1 (mod 2**N).  */
2693
2694 static unsigned HOST_WIDE_INT
2695 invert_mod2n (x, n)
2696      unsigned HOST_WIDE_INT x;
2697      int n;
2698 {
2699   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2700
2701   /* The algorithm notes that the choice y = x satisfies
2702      x*y == 1 mod 2^3, since x is assumed odd.
2703      Each iteration doubles the number of bits of significance in y.  */
2704
2705   unsigned HOST_WIDE_INT mask;
2706   unsigned HOST_WIDE_INT y = x;
2707   int nbit = 3;
2708
2709   mask = (n == HOST_BITS_PER_WIDE_INT
2710           ? ~(unsigned HOST_WIDE_INT) 0
2711           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2712
2713   while (nbit < n)
2714     {
2715       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2716       nbit *= 2;
2717     }
2718   return y;
2719 }
2720
2721 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2722    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2723    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2724    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2725    become signed.
2726
2727    The result is put in TARGET if that is convenient.
2728
2729    MODE is the mode of operation.  */
2730
2731 rtx
2732 expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
2733      enum machine_mode mode;
2734      register rtx adj_operand, op0, op1, target;
2735      int unsignedp;
2736 {
2737   rtx tem;
2738   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2739
2740   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2741                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2742                       NULL_RTX, 0);
2743   tem = expand_and (tem, op1, NULL_RTX);
2744   adj_operand
2745     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2746                      adj_operand);
2747
2748   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2749                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2750                       NULL_RTX, 0);
2751   tem = expand_and (tem, op0, NULL_RTX);
2752   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2753                           target);
2754
2755   return target;
2756 }
2757
2758 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2759    in TARGET if that is convenient, and return where the result is.  If the
2760    operation can not be performed, 0 is returned.
2761
2762    MODE is the mode of operation and result.
2763
2764    UNSIGNEDP nonzero means unsigned multiply.
2765
2766    MAX_COST is the total allowed cost for the expanded RTL.  */
2767
2768 rtx
2769 expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
2770      enum machine_mode mode;
2771      register rtx op0, target;
2772      unsigned HOST_WIDE_INT cnst1;
2773      int unsignedp;
2774      int max_cost;
2775 {
2776   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2777   optab mul_highpart_optab;
2778   optab moptab;
2779   rtx tem;
2780   int size = GET_MODE_BITSIZE (mode);
2781   rtx op1, wide_op1;
2782
2783   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2784   if (size > HOST_BITS_PER_WIDE_INT)
2785     abort ();
2786
2787   op1 = GEN_INT (trunc_int_for_mode (cnst1, mode));
2788
2789   if (GET_MODE_BITSIZE (wider_mode) <= HOST_BITS_PER_INT)
2790     wide_op1 = op1;
2791   else
2792     wide_op1
2793       = immed_double_const (cnst1,
2794                             (unsignedp
2795                              ? (HOST_WIDE_INT) 0
2796                              : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2797                             wider_mode);
2798
2799   /* expand_mult handles constant multiplication of word_mode
2800      or narrower.  It does a poor job for large modes.  */
2801   if (size < BITS_PER_WORD
2802       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2803     {
2804       /* We have to do this, since expand_binop doesn't do conversion for
2805          multiply.  Maybe change expand_binop to handle widening multiply?  */
2806       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2807
2808       /* We know that this can't have signed overflow, so pretend this is
2809          an unsigned multiply.  */
2810       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
2811       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2812                           build_int_2 (size, 0), NULL_RTX, 1);
2813       return convert_modes (mode, wider_mode, tem, unsignedp);
2814     }
2815
2816   if (target == 0)
2817     target = gen_reg_rtx (mode);
2818
2819   /* Firstly, try using a multiplication insn that only generates the needed
2820      high part of the product, and in the sign flavor of unsignedp.  */
2821   if (mul_highpart_cost[(int) mode] < max_cost)
2822     {
2823       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2824       target = expand_binop (mode, mul_highpart_optab,
2825                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2826       if (target)
2827         return target;
2828     }
2829
2830   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2831      Need to adjust the result after the multiplication.  */
2832   if (size - 1 < BITS_PER_WORD
2833       && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
2834           < max_cost))
2835     {
2836       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2837       target = expand_binop (mode, mul_highpart_optab,
2838                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2839       if (target)
2840         /* We used the wrong signedness.  Adjust the result.  */
2841         return expand_mult_highpart_adjust (mode, target, op0,
2842                                             op1, target, unsignedp);
2843     }
2844
2845   /* Try widening multiplication.  */
2846   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2847   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2848       && mul_widen_cost[(int) wider_mode] < max_cost)
2849     {
2850       op1 = force_reg (mode, op1);
2851       goto try;
2852     }
2853
2854   /* Try widening the mode and perform a non-widening multiplication.  */
2855   moptab = smul_optab;
2856   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2857       && size - 1 < BITS_PER_WORD
2858       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2859     {
2860       op1 = wide_op1;
2861       goto try;
2862     }
2863
2864   /* Try widening multiplication of opposite signedness, and adjust.  */
2865   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2866   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2867       && size - 1 < BITS_PER_WORD
2868       && (mul_widen_cost[(int) wider_mode]
2869           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2870     {
2871       rtx regop1 = force_reg (mode, op1);
2872       tem = expand_binop (wider_mode, moptab, op0, regop1,
2873                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2874       if (tem != 0)
2875         {
2876           /* Extract the high half of the just generated product.  */
2877           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2878                               build_int_2 (size, 0), NULL_RTX, 1);
2879           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2880           /* We used the wrong signedness.  Adjust the result.  */
2881           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2882                                               target, unsignedp);
2883         }
2884     }
2885
2886   return 0;
2887
2888  try:
2889   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2890   tem = expand_binop (wider_mode, moptab, op0, op1,
2891                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2892   if (tem == 0)
2893     return 0;
2894
2895   /* Extract the high half of the just generated product.  */
2896   if (mode == word_mode)
2897     {
2898       return gen_highpart (mode, tem);
2899     }
2900   else
2901     {
2902       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2903                           build_int_2 (size, 0), NULL_RTX, 1);
2904       return convert_modes (mode, wider_mode, tem, unsignedp);
2905     }
2906 }
2907 \f
2908 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2909    if that is convenient, and returning where the result is.
2910    You may request either the quotient or the remainder as the result;
2911    specify REM_FLAG nonzero to get the remainder.
2912
2913    CODE is the expression code for which kind of division this is;
2914    it controls how rounding is done.  MODE is the machine mode to use.
2915    UNSIGNEDP nonzero means do unsigned division.  */
2916
2917 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2918    and then correct it by or'ing in missing high bits
2919    if result of ANDI is nonzero.
2920    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2921    This could optimize to a bfexts instruction.
2922    But C doesn't use these operations, so their optimizations are
2923    left for later.  */
2924 /* ??? For modulo, we don't actually need the highpart of the first product,
2925    the low part will do nicely.  And for small divisors, the second multiply
2926    can also be a low-part only multiply or even be completely left out.
2927    E.g. to calculate the remainder of a division by 3 with a 32 bit
2928    multiply, multiply with 0x55555556 and extract the upper two bits;
2929    the result is exact for inputs up to 0x1fffffff.
2930    The input range can be reduced by using cross-sum rules.
2931    For odd divisors >= 3, the following table gives right shift counts
2932    so that if an number is shifted by an integer multiple of the given
2933    amount, the remainder stays the same:
2934    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
2935    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
2936    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
2937    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
2938    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
2939
2940    Cross-sum rules for even numbers can be derived by leaving as many bits
2941    to the right alone as the divisor has zeros to the right.
2942    E.g. if x is an unsigned 32 bit number:
2943    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
2944    */
2945
2946 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2947
2948 rtx
2949 expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
2950      int rem_flag;
2951      enum tree_code code;
2952      enum machine_mode mode;
2953      register rtx op0, op1, target;
2954      int unsignedp;
2955 {
2956   enum machine_mode compute_mode;
2957   register rtx tquotient;
2958   rtx quotient = 0, remainder = 0;
2959   rtx last;
2960   int size;
2961   rtx insn, set;
2962   optab optab1, optab2;
2963   int op1_is_constant, op1_is_pow2;
2964   int max_cost, extra_cost;
2965   static HOST_WIDE_INT last_div_const = 0;
2966
2967   op1_is_constant = GET_CODE (op1) == CONST_INT;
2968   op1_is_pow2 = (op1_is_constant
2969                  && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2970                       || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1))))));
2971
2972   /*
2973      This is the structure of expand_divmod:
2974
2975      First comes code to fix up the operands so we can perform the operations
2976      correctly and efficiently.
2977
2978      Second comes a switch statement with code specific for each rounding mode.
2979      For some special operands this code emits all RTL for the desired
2980      operation, for other cases, it generates only a quotient and stores it in
2981      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2982      to indicate that it has not done anything.
2983
2984      Last comes code that finishes the operation.  If QUOTIENT is set and
2985      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2986      QUOTIENT is not set, it is computed using trunc rounding.
2987
2988      We try to generate special code for division and remainder when OP1 is a
2989      constant.  If |OP1| = 2**n we can use shifts and some other fast
2990      operations.  For other values of OP1, we compute a carefully selected
2991      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2992      by m.
2993
2994      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2995      half of the product.  Different strategies for generating the product are
2996      implemented in expand_mult_highpart.
2997
2998      If what we actually want is the remainder, we generate that by another
2999      by-constant multiplication and a subtraction.  */
3000
3001   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3002      code below will malfunction if we are, so check here and handle
3003      the special case if so.  */
3004   if (op1 == const1_rtx)
3005     return rem_flag ? const0_rtx : op0;
3006
3007     /* When dividing by -1, we could get an overflow.
3008      negv_optab can handle overflows.  */
3009   if (! unsignedp && op1 == constm1_rtx)
3010     {
3011       if (rem_flag)
3012         return const0_rtx;
3013       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3014                         ? negv_optab : neg_optab, op0, target, 0);
3015     }
3016
3017   if (target
3018       /* Don't use the function value register as a target
3019          since we have to read it as well as write it,
3020          and function-inlining gets confused by this.  */
3021       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3022           /* Don't clobber an operand while doing a multi-step calculation.  */
3023           || ((rem_flag || op1_is_constant)
3024               && (reg_mentioned_p (target, op0)
3025                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
3026           || reg_mentioned_p (target, op1)
3027           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
3028     target = 0;
3029
3030   /* Get the mode in which to perform this computation.  Normally it will
3031      be MODE, but sometimes we can't do the desired operation in MODE.
3032      If so, pick a wider mode in which we can do the operation.  Convert
3033      to that mode at the start to avoid repeated conversions.
3034
3035      First see what operations we need.  These depend on the expression
3036      we are evaluating.  (We assume that divxx3 insns exist under the
3037      same conditions that modxx3 insns and that these insns don't normally
3038      fail.  If these assumptions are not correct, we may generate less
3039      efficient code in some cases.)
3040
3041      Then see if we find a mode in which we can open-code that operation
3042      (either a division, modulus, or shift).  Finally, check for the smallest
3043      mode for which we can do the operation with a library call.  */
3044
3045   /* We might want to refine this now that we have division-by-constant
3046      optimization.  Since expand_mult_highpart tries so many variants, it is
3047      not straightforward to generalize this.  Maybe we should make an array
3048      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3049
3050   optab1 = (op1_is_pow2 ? (unsignedp ? lshr_optab : ashr_optab)
3051             : (unsignedp ? udiv_optab : sdiv_optab));
3052   optab2 = (op1_is_pow2 ? optab1 : (unsignedp ? udivmod_optab : sdivmod_optab));
3053
3054   for (compute_mode = mode; compute_mode != VOIDmode;
3055        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3056     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
3057         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
3058       break;
3059
3060   if (compute_mode == VOIDmode)
3061     for (compute_mode = mode; compute_mode != VOIDmode;
3062          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3063       if (optab1->handlers[(int) compute_mode].libfunc
3064           || optab2->handlers[(int) compute_mode].libfunc)
3065         break;
3066
3067   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3068      in expand_binop.  */
3069   if (compute_mode == VOIDmode)
3070     compute_mode = mode;
3071
3072   if (target && GET_MODE (target) == compute_mode)
3073     tquotient = target;
3074   else
3075     tquotient = gen_reg_rtx (compute_mode);
3076
3077   size = GET_MODE_BITSIZE (compute_mode);
3078 #if 0
3079   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3080      (mode), and thereby get better code when OP1 is a constant.  Do that
3081      later.  It will require going over all usages of SIZE below.  */
3082   size = GET_MODE_BITSIZE (mode);
3083 #endif
3084
3085   /* Only deduct something for a REM if the last divide done was
3086      for a different constant.   Then set the constant of the last
3087      divide.  */
3088   max_cost = div_cost[(int) compute_mode]
3089     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3090                       && INTVAL (op1) == last_div_const)
3091        ? mul_cost[(int) compute_mode] + add_cost : 0);
3092
3093   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3094
3095   /* Now convert to the best mode to use.  */
3096   if (compute_mode != mode)
3097     {
3098       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3099       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3100
3101       /* convert_modes may have placed op1 into a register, so we
3102          must recompute the following.  */
3103       op1_is_constant = GET_CODE (op1) == CONST_INT;
3104       op1_is_pow2 = (op1_is_constant
3105                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3106                           || (! unsignedp
3107                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3108     }
3109
3110   /* If one of the operands is a volatile MEM, copy it into a register.  */
3111
3112   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3113     op0 = force_reg (compute_mode, op0);
3114   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3115     op1 = force_reg (compute_mode, op1);
3116
3117   /* If we need the remainder or if OP1 is constant, we need to
3118      put OP0 in a register in case it has any queued subexpressions.  */
3119   if (rem_flag || op1_is_constant)
3120     op0 = force_reg (compute_mode, op0);
3121
3122   last = get_last_insn ();
3123
3124   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3125   if (unsignedp)
3126     {
3127       if (code == FLOOR_DIV_EXPR)
3128         code = TRUNC_DIV_EXPR;
3129       if (code == FLOOR_MOD_EXPR)
3130         code = TRUNC_MOD_EXPR;
3131       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3132         code = TRUNC_DIV_EXPR;
3133     }
3134
3135   if (op1 != const0_rtx)
3136     switch (code)
3137       {
3138       case TRUNC_MOD_EXPR:
3139       case TRUNC_DIV_EXPR:
3140         if (op1_is_constant)
3141           {
3142             if (unsignedp)
3143               {
3144                 unsigned HOST_WIDE_INT mh, ml;
3145                 int pre_shift, post_shift;
3146                 int dummy;
3147                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3148
3149                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3150                   {
3151                     pre_shift = floor_log2 (d);
3152                     if (rem_flag)
3153                       {
3154                         remainder
3155                           = expand_binop (compute_mode, and_optab, op0,
3156                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3157                                           remainder, 1,
3158                                           OPTAB_LIB_WIDEN);
3159                         if (remainder)
3160                           return gen_lowpart (mode, remainder);
3161                       }
3162                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3163                                              build_int_2 (pre_shift, 0),
3164                                              tquotient, 1);
3165                   }
3166                 else if (size <= HOST_BITS_PER_WIDE_INT)
3167                   {
3168                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3169                       {
3170                         /* Most significant bit of divisor is set; emit an scc
3171                            insn.  */
3172                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3173                                                     compute_mode, 1, 1);
3174                         if (quotient == 0)
3175                           goto fail1;
3176                       }
3177                     else
3178                       {
3179                         /* Find a suitable multiplier and right shift count
3180                            instead of multiplying with D.  */
3181
3182                         mh = choose_multiplier (d, size, size,
3183                                                 &ml, &post_shift, &dummy);
3184
3185                         /* If the suggested multiplier is more than SIZE bits,
3186                            we can do better for even divisors, using an
3187                            initial right shift.  */
3188                         if (mh != 0 && (d & 1) == 0)
3189                           {
3190                             pre_shift = floor_log2 (d & -d);
3191                             mh = choose_multiplier (d >> pre_shift, size,
3192                                                     size - pre_shift,
3193                                                     &ml, &post_shift, &dummy);
3194                             if (mh)
3195                               abort ();
3196                           }
3197                         else
3198                           pre_shift = 0;
3199
3200                         if (mh != 0)
3201                           {
3202                             rtx t1, t2, t3, t4;
3203
3204                             if (post_shift - 1 >= BITS_PER_WORD)
3205                               goto fail1;
3206
3207                             extra_cost = (shift_cost[post_shift - 1]
3208                                           + shift_cost[1] + 2 * add_cost);
3209                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3210                                                        NULL_RTX, 1,
3211                                                        max_cost - extra_cost);
3212                             if (t1 == 0)
3213                               goto fail1;
3214                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3215                                                                op0, t1),
3216                                                 NULL_RTX);
3217                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3218                                                build_int_2 (1, 0), NULL_RTX,1);
3219                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3220                                                               t1, t3),
3221                                                 NULL_RTX);
3222                             quotient
3223                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3224                                               build_int_2 (post_shift - 1, 0),
3225                                               tquotient, 1);
3226                           }
3227                         else
3228                           {
3229                             rtx t1, t2;
3230
3231                             if (pre_shift >= BITS_PER_WORD
3232                                 || post_shift >= BITS_PER_WORD)
3233                               goto fail1;
3234
3235                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3236                                                build_int_2 (pre_shift, 0),
3237                                                NULL_RTX, 1);
3238                             extra_cost = (shift_cost[pre_shift]
3239                                           + shift_cost[post_shift]);
3240                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3241                                                        NULL_RTX, 1,
3242                                                        max_cost - extra_cost);
3243                             if (t2 == 0)
3244                               goto fail1;
3245                             quotient
3246                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3247                                               build_int_2 (post_shift, 0),
3248                                               tquotient, 1);
3249                           }
3250                       }
3251                   }
3252                 else            /* Too wide mode to use tricky code */
3253                   break;
3254
3255                 insn = get_last_insn ();
3256                 if (insn != last
3257                     && (set = single_set (insn)) != 0
3258                     && SET_DEST (set) == quotient)
3259                   set_unique_reg_note (insn,
3260                                        REG_EQUAL,
3261                                        gen_rtx_UDIV (compute_mode, op0, op1));
3262               }
3263             else                /* TRUNC_DIV, signed */
3264               {
3265                 unsigned HOST_WIDE_INT ml;
3266                 int lgup, post_shift;
3267                 HOST_WIDE_INT d = INTVAL (op1);
3268                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3269
3270                 /* n rem d = n rem -d */
3271                 if (rem_flag && d < 0)
3272                   {
3273                     d = abs_d;
3274                     op1 = GEN_INT (trunc_int_for_mode (abs_d, compute_mode));
3275                   }
3276
3277                 if (d == 1)
3278                   quotient = op0;
3279                 else if (d == -1)
3280                   quotient = expand_unop (compute_mode, neg_optab, op0,
3281                                           tquotient, 0);
3282                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3283                   {
3284                     /* This case is not handled correctly below.  */
3285                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3286                                                 compute_mode, 1, 1);
3287                     if (quotient == 0)
3288                       goto fail1;
3289                   }
3290                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3291                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap))
3292                   ;
3293                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3294                   {
3295                     lgup = floor_log2 (abs_d);
3296                     if (BRANCH_COST < 1 || (abs_d != 2 && BRANCH_COST < 3))
3297                       {
3298                         rtx label = gen_label_rtx ();
3299                         rtx t1;
3300
3301                         t1 = copy_to_mode_reg (compute_mode, op0);
3302                         do_cmp_and_jump (t1, const0_rtx, GE,
3303                                          compute_mode, label);
3304                         expand_inc (t1, GEN_INT (trunc_int_for_mode
3305                                                  (abs_d - 1, compute_mode)));
3306                         emit_label (label);
3307                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3308                                                  build_int_2 (lgup, 0),
3309                                                  tquotient, 0);
3310                       }
3311                     else
3312                       {
3313                         rtx t1, t2, t3;
3314                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3315                                            build_int_2 (size - 1, 0),
3316                                            NULL_RTX, 0);
3317                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3318                                            build_int_2 (size - lgup, 0),
3319                                            NULL_RTX, 1);
3320                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3321                                                           op0, t2),
3322                                             NULL_RTX);
3323                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3324                                                  build_int_2 (lgup, 0),
3325                                                  tquotient, 0);
3326                       }
3327
3328                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3329                        the quotient.  */
3330                     if (d < 0)
3331                       {
3332                         insn = get_last_insn ();
3333                         if (insn != last
3334                             && (set = single_set (insn)) != 0
3335                             && SET_DEST (set) == quotient
3336                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3337                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3338                           set_unique_reg_note (insn,
3339                                                REG_EQUAL,
3340                                                gen_rtx_DIV (compute_mode,
3341                                                             op0,
3342                                                             GEN_INT
3343                                                             (trunc_int_for_mode
3344                                                              (abs_d,
3345                                                               compute_mode))));
3346
3347                         quotient = expand_unop (compute_mode, neg_optab,
3348                                                 quotient, quotient, 0);
3349                       }
3350                   }
3351                 else if (size <= HOST_BITS_PER_WIDE_INT)
3352                   {
3353                     choose_multiplier (abs_d, size, size - 1,
3354                                        &ml, &post_shift, &lgup);
3355                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3356                       {
3357                         rtx t1, t2, t3;
3358
3359                         if (post_shift >= BITS_PER_WORD
3360                             || size - 1 >= BITS_PER_WORD)
3361                           goto fail1;
3362
3363                         extra_cost = (shift_cost[post_shift]
3364                                       + shift_cost[size - 1] + add_cost);
3365                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3366                                                    NULL_RTX, 0,
3367                                                    max_cost - extra_cost);
3368                         if (t1 == 0)
3369                           goto fail1;
3370                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3371                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3372                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3373                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3374                         if (d < 0)
3375                           quotient
3376                             = force_operand (gen_rtx_MINUS (compute_mode,
3377                                                             t3, t2),
3378                                              tquotient);
3379                         else
3380                           quotient
3381                             = force_operand (gen_rtx_MINUS (compute_mode,
3382                                                             t2, t3),
3383                                              tquotient);
3384                       }
3385                     else
3386                       {
3387                         rtx t1, t2, t3, t4;
3388
3389                         if (post_shift >= BITS_PER_WORD
3390                             || size - 1 >= BITS_PER_WORD)
3391                           goto fail1;
3392
3393                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3394                         extra_cost = (shift_cost[post_shift]
3395                                       + shift_cost[size - 1] + 2 * add_cost);
3396                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3397                                                    NULL_RTX, 0,
3398                                                    max_cost - extra_cost);
3399                         if (t1 == 0)
3400                           goto fail1;
3401                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3402                                                           t1, op0),
3403                                             NULL_RTX);
3404                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3405                                            build_int_2 (post_shift, 0),
3406                                            NULL_RTX, 0);
3407                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3408                                            build_int_2 (size - 1, 0),
3409                                            NULL_RTX, 0);
3410                         if (d < 0)
3411                           quotient
3412                             = force_operand (gen_rtx_MINUS (compute_mode,
3413                                                             t4, t3),
3414                                              tquotient);
3415                         else
3416                           quotient
3417                             = force_operand (gen_rtx_MINUS (compute_mode,
3418                                                             t3, t4),
3419                                              tquotient);
3420                       }
3421                   }
3422                 else            /* Too wide mode to use tricky code */
3423                   break;
3424
3425                 insn = get_last_insn ();
3426                 if (insn != last
3427                     && (set = single_set (insn)) != 0
3428                     && SET_DEST (set) == quotient)
3429                   set_unique_reg_note (insn,
3430                                        REG_EQUAL,
3431                                        gen_rtx_DIV (compute_mode, op0, op1));
3432               }
3433             break;
3434           }
3435       fail1:
3436         delete_insns_since (last);
3437         break;
3438
3439       case FLOOR_DIV_EXPR:
3440       case FLOOR_MOD_EXPR:
3441       /* We will come here only for signed operations.  */
3442         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3443           {
3444             unsigned HOST_WIDE_INT mh, ml;
3445             int pre_shift, lgup, post_shift;
3446             HOST_WIDE_INT d = INTVAL (op1);
3447
3448             if (d > 0)
3449               {
3450                 /* We could just as easily deal with negative constants here,
3451                    but it does not seem worth the trouble for GCC 2.6.  */
3452                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3453                   {
3454                     pre_shift = floor_log2 (d);
3455                     if (rem_flag)
3456                       {
3457                         remainder = expand_binop (compute_mode, and_optab, op0,
3458                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3459                                                   remainder, 0, OPTAB_LIB_WIDEN);
3460                         if (remainder)
3461                           return gen_lowpart (mode, remainder);
3462                       }
3463                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3464                                              build_int_2 (pre_shift, 0),
3465                                              tquotient, 0);
3466                   }
3467                 else
3468                   {
3469                     rtx t1, t2, t3, t4;
3470
3471                     mh = choose_multiplier (d, size, size - 1,
3472                                             &ml, &post_shift, &lgup);
3473                     if (mh)
3474                       abort ();
3475
3476                     if (post_shift < BITS_PER_WORD
3477                         && size - 1 < BITS_PER_WORD)
3478                       {
3479                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3480                                            build_int_2 (size - 1, 0),
3481                                            NULL_RTX, 0);
3482                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3483                                            NULL_RTX, 0, OPTAB_WIDEN);
3484                         extra_cost = (shift_cost[post_shift]
3485                                       + shift_cost[size - 1] + 2 * add_cost);
3486                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3487                                                    NULL_RTX, 1,
3488                                                    max_cost - extra_cost);
3489                         if (t3 != 0)
3490                           {
3491                             t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3492                                                build_int_2 (post_shift, 0),
3493                                                NULL_RTX, 1);
3494                             quotient = expand_binop (compute_mode, xor_optab,
3495                                                      t4, t1, tquotient, 0,
3496                                                      OPTAB_WIDEN);
3497                           }
3498                       }
3499                   }
3500               }
3501             else
3502               {
3503                 rtx nsign, t1, t2, t3, t4;
3504                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3505                                                   op0, constm1_rtx), NULL_RTX);
3506                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3507                                    0, OPTAB_WIDEN);
3508                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3509                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3510                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3511                                     NULL_RTX);
3512                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3513                                     NULL_RTX, 0);
3514                 if (t4)
3515                   {
3516                     rtx t5;
3517                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3518                                       NULL_RTX, 0);
3519                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3520                                                             t4, t5),
3521                                               tquotient);
3522                   }
3523               }
3524           }
3525
3526         if (quotient != 0)
3527           break;
3528         delete_insns_since (last);
3529
3530         /* Try using an instruction that produces both the quotient and
3531            remainder, using truncation.  We can easily compensate the quotient
3532            or remainder to get floor rounding, once we have the remainder.
3533            Notice that we compute also the final remainder value here,
3534            and return the result right away.  */
3535         if (target == 0 || GET_MODE (target) != compute_mode)
3536           target = gen_reg_rtx (compute_mode);
3537
3538         if (rem_flag)
3539           {
3540             remainder
3541               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3542             quotient = gen_reg_rtx (compute_mode);
3543           }
3544         else
3545           {
3546             quotient
3547               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3548             remainder = gen_reg_rtx (compute_mode);
3549           }
3550
3551         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3552                                  quotient, remainder, 0))
3553           {
3554             /* This could be computed with a branch-less sequence.
3555                Save that for later.  */
3556             rtx tem;
3557             rtx label = gen_label_rtx ();
3558             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3559             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3560                                 NULL_RTX, 0, OPTAB_WIDEN);
3561             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3562             expand_dec (quotient, const1_rtx);
3563             expand_inc (remainder, op1);
3564             emit_label (label);
3565             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3566           }
3567
3568         /* No luck with division elimination or divmod.  Have to do it
3569            by conditionally adjusting op0 *and* the result.  */
3570         {
3571           rtx label1, label2, label3, label4, label5;
3572           rtx adjusted_op0;
3573           rtx tem;
3574
3575           quotient = gen_reg_rtx (compute_mode);
3576           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3577           label1 = gen_label_rtx ();
3578           label2 = gen_label_rtx ();
3579           label3 = gen_label_rtx ();
3580           label4 = gen_label_rtx ();
3581           label5 = gen_label_rtx ();
3582           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3583           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3584           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3585                               quotient, 0, OPTAB_LIB_WIDEN);
3586           if (tem != quotient)
3587             emit_move_insn (quotient, tem);
3588           emit_jump_insn (gen_jump (label5));
3589           emit_barrier ();
3590           emit_label (label1);
3591           expand_inc (adjusted_op0, const1_rtx);
3592           emit_jump_insn (gen_jump (label4));
3593           emit_barrier ();
3594           emit_label (label2);
3595           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3596           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3597                               quotient, 0, OPTAB_LIB_WIDEN);
3598           if (tem != quotient)
3599             emit_move_insn (quotient, tem);
3600           emit_jump_insn (gen_jump (label5));
3601           emit_barrier ();
3602           emit_label (label3);
3603           expand_dec (adjusted_op0, const1_rtx);
3604           emit_label (label4);
3605           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3606                               quotient, 0, OPTAB_LIB_WIDEN);
3607           if (tem != quotient)
3608             emit_move_insn (quotient, tem);
3609           expand_dec (quotient, const1_rtx);
3610           emit_label (label5);
3611         }
3612         break;
3613
3614       case CEIL_DIV_EXPR:
3615       case CEIL_MOD_EXPR:
3616         if (unsignedp)
3617           {
3618             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3619               {
3620                 rtx t1, t2, t3;
3621                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3622                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3623                                    build_int_2 (floor_log2 (d), 0),
3624                                    tquotient, 1);
3625                 t2 = expand_binop (compute_mode, and_optab, op0,
3626                                    GEN_INT (d - 1),
3627                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3628                 t3 = gen_reg_rtx (compute_mode);
3629                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3630                                       compute_mode, 1, 1);
3631                 if (t3 == 0)
3632                   {
3633                     rtx lab;
3634                     lab = gen_label_rtx ();
3635                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3636                     expand_inc (t1, const1_rtx);
3637                     emit_label (lab);
3638                     quotient = t1;
3639                   }
3640                 else
3641                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3642                                                           t1, t3),
3643                                             tquotient);
3644                 break;
3645               }
3646
3647             /* Try using an instruction that produces both the quotient and
3648                remainder, using truncation.  We can easily compensate the
3649                quotient or remainder to get ceiling rounding, once we have the
3650                remainder.  Notice that we compute also the final remainder
3651                value here, and return the result right away.  */
3652             if (target == 0 || GET_MODE (target) != compute_mode)
3653               target = gen_reg_rtx (compute_mode);
3654
3655             if (rem_flag)
3656               {
3657                 remainder = (GET_CODE (target) == REG
3658                              ? target : gen_reg_rtx (compute_mode));
3659                 quotient = gen_reg_rtx (compute_mode);
3660               }
3661             else
3662               {
3663                 quotient = (GET_CODE (target) == REG
3664                             ? target : gen_reg_rtx (compute_mode));
3665                 remainder = gen_reg_rtx (compute_mode);
3666               }
3667
3668             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3669                                      remainder, 1))
3670               {
3671                 /* This could be computed with a branch-less sequence.
3672                    Save that for later.  */
3673                 rtx label = gen_label_rtx ();
3674                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3675                                  compute_mode, label);
3676                 expand_inc (quotient, const1_rtx);
3677                 expand_dec (remainder, op1);
3678                 emit_label (label);
3679                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3680               }
3681
3682             /* No luck with division elimination or divmod.  Have to do it
3683                by conditionally adjusting op0 *and* the result.  */
3684             {
3685               rtx label1, label2;
3686               rtx adjusted_op0, tem;
3687
3688               quotient = gen_reg_rtx (compute_mode);
3689               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3690               label1 = gen_label_rtx ();
3691               label2 = gen_label_rtx ();
3692               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3693                                compute_mode, label1);
3694               emit_move_insn  (quotient, const0_rtx);
3695               emit_jump_insn (gen_jump (label2));
3696               emit_barrier ();
3697               emit_label (label1);
3698               expand_dec (adjusted_op0, const1_rtx);
3699               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3700                                   quotient, 1, OPTAB_LIB_WIDEN);
3701               if (tem != quotient)
3702                 emit_move_insn (quotient, tem);
3703               expand_inc (quotient, const1_rtx);
3704               emit_label (label2);
3705             }
3706           }
3707         else /* signed */
3708           {
3709             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3710                 && INTVAL (op1) >= 0)
3711               {
3712                 /* This is extremely similar to the code for the unsigned case
3713                    above.  For 2.7 we should merge these variants, but for
3714                    2.6.1 I don't want to touch the code for unsigned since that
3715                    get used in C.  The signed case will only be used by other
3716                    languages (Ada).  */
3717
3718                 rtx t1, t2, t3;
3719                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3720                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3721                                    build_int_2 (floor_log2 (d), 0),
3722                                    tquotient, 0);
3723                 t2 = expand_binop (compute_mode, and_optab, op0,
3724                                    GEN_INT (d - 1),
3725                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3726                 t3 = gen_reg_rtx (compute_mode);
3727                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3728                                       compute_mode, 1, 1);
3729                 if (t3 == 0)
3730                   {
3731                     rtx lab;
3732                     lab = gen_label_rtx ();
3733                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3734                     expand_inc (t1, const1_rtx);
3735                     emit_label (lab);
3736                     quotient = t1;
3737                   }
3738                 else
3739                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3740                                                           t1, t3),
3741                                             tquotient);
3742                 break;
3743               }
3744
3745             /* Try using an instruction that produces both the quotient and
3746                remainder, using truncation.  We can easily compensate the
3747                quotient or remainder to get ceiling rounding, once we have the
3748                remainder.  Notice that we compute also the final remainder
3749                value here, and return the result right away.  */
3750             if (target == 0 || GET_MODE (target) != compute_mode)
3751               target = gen_reg_rtx (compute_mode);
3752             if (rem_flag)
3753               {
3754                 remainder= (GET_CODE (target) == REG
3755                             ? target : gen_reg_rtx (compute_mode));
3756                 quotient = gen_reg_rtx (compute_mode);
3757               }
3758             else
3759               {
3760                 quotient = (GET_CODE (target) == REG
3761                             ? target : gen_reg_rtx (compute_mode));
3762                 remainder = gen_reg_rtx (compute_mode);
3763               }
3764
3765             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3766                                      remainder, 0))
3767               {
3768                 /* This could be computed with a branch-less sequence.
3769                    Save that for later.  */
3770                 rtx tem;
3771                 rtx label = gen_label_rtx ();
3772                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3773                                  compute_mode, label);
3774                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3775                                     NULL_RTX, 0, OPTAB_WIDEN);
3776                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3777                 expand_inc (quotient, const1_rtx);
3778                 expand_dec (remainder, op1);
3779                 emit_label (label);
3780                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3781               }
3782
3783             /* No luck with division elimination or divmod.  Have to do it
3784                by conditionally adjusting op0 *and* the result.  */
3785             {
3786               rtx label1, label2, label3, label4, label5;
3787               rtx adjusted_op0;
3788               rtx tem;
3789
3790               quotient = gen_reg_rtx (compute_mode);
3791               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3792               label1 = gen_label_rtx ();
3793               label2 = gen_label_rtx ();
3794               label3 = gen_label_rtx ();
3795               label4 = gen_label_rtx ();
3796               label5 = gen_label_rtx ();
3797               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3798               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3799                                compute_mode, label1);
3800               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3801                                   quotient, 0, OPTAB_LIB_WIDEN);
3802               if (tem != quotient)
3803                 emit_move_insn (quotient, tem);
3804               emit_jump_insn (gen_jump (label5));
3805               emit_barrier ();
3806               emit_label (label1);
3807               expand_dec (adjusted_op0, const1_rtx);
3808               emit_jump_insn (gen_jump (label4));
3809               emit_barrier ();
3810               emit_label (label2);
3811               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3812                                compute_mode, label3);
3813               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3814                                   quotient, 0, OPTAB_LIB_WIDEN);
3815               if (tem != quotient)
3816                 emit_move_insn (quotient, tem);
3817               emit_jump_insn (gen_jump (label5));
3818               emit_barrier ();
3819               emit_label (label3);
3820               expand_inc (adjusted_op0, const1_rtx);
3821               emit_label (label4);
3822               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3823                                   quotient, 0, OPTAB_LIB_WIDEN);
3824               if (tem != quotient)
3825                 emit_move_insn (quotient, tem);
3826               expand_inc (quotient, const1_rtx);
3827               emit_label (label5);
3828             }
3829           }
3830         break;
3831
3832       case EXACT_DIV_EXPR:
3833         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3834           {
3835             HOST_WIDE_INT d = INTVAL (op1);
3836             unsigned HOST_WIDE_INT ml;
3837             int pre_shift;
3838             rtx t1;
3839
3840             pre_shift = floor_log2 (d & -d);
3841             ml = invert_mod2n (d >> pre_shift, size);
3842             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3843                                build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
3844             quotient = expand_mult (compute_mode, t1,
3845                                     GEN_INT (trunc_int_for_mode
3846                                              (ml, compute_mode)),
3847                                     NULL_RTX, 0);
3848
3849             insn = get_last_insn ();
3850             set_unique_reg_note (insn,
3851                                  REG_EQUAL,
3852                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3853                                                  compute_mode,
3854                                                  op0, op1));
3855           }
3856         break;
3857
3858       case ROUND_DIV_EXPR:
3859       case ROUND_MOD_EXPR:
3860         if (unsignedp)
3861           {
3862             rtx tem;
3863             rtx label;
3864             label = gen_label_rtx ();
3865             quotient = gen_reg_rtx (compute_mode);
3866             remainder = gen_reg_rtx (compute_mode);
3867             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3868               {
3869                 rtx tem;
3870                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3871                                          quotient, 1, OPTAB_LIB_WIDEN);
3872                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3873                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3874                                           remainder, 1, OPTAB_LIB_WIDEN);
3875               }
3876             tem = plus_constant (op1, -1);
3877             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3878                                 build_int_2 (1, 0), NULL_RTX, 1);
3879             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3880             expand_inc (quotient, const1_rtx);
3881             expand_dec (remainder, op1);
3882             emit_label (label);
3883           }
3884         else
3885           {
3886             rtx abs_rem, abs_op1, tem, mask;
3887             rtx label;
3888             label = gen_label_rtx ();
3889             quotient = gen_reg_rtx (compute_mode);
3890             remainder = gen_reg_rtx (compute_mode);
3891             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3892               {
3893                 rtx tem;
3894                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3895                                          quotient, 0, OPTAB_LIB_WIDEN);
3896                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3897                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3898                                           remainder, 0, OPTAB_LIB_WIDEN);
3899               }
3900             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
3901             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
3902             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3903                                 build_int_2 (1, 0), NULL_RTX, 1);
3904             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3905             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3906                                 NULL_RTX, 0, OPTAB_WIDEN);
3907             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3908                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3909             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3910                                 NULL_RTX, 0, OPTAB_WIDEN);
3911             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3912                                 NULL_RTX, 0, OPTAB_WIDEN);
3913             expand_inc (quotient, tem);
3914             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3915                                 NULL_RTX, 0, OPTAB_WIDEN);
3916             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3917                                 NULL_RTX, 0, OPTAB_WIDEN);
3918             expand_dec (remainder, tem);
3919             emit_label (label);
3920           }
3921         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3922
3923       default:
3924         abort ();
3925       }
3926
3927   if (quotient == 0)
3928     {
3929       if (target && GET_MODE (target) != compute_mode)
3930         target = 0;
3931
3932       if (rem_flag)
3933         {
3934           /* Try to produce the remainder without producing the quotient.
3935              If we seem to have a divmod patten that does not require widening,
3936              don't try windening here.  We should really have an WIDEN argument
3937              to expand_twoval_binop, since what we'd really like to do here is
3938              1) try a mod insn in compute_mode
3939              2) try a divmod insn in compute_mode
3940              3) try a div insn in compute_mode and multiply-subtract to get
3941                 remainder
3942              4) try the same things with widening allowed.  */
3943           remainder
3944             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3945                                  op0, op1, target,
3946                                  unsignedp,
3947                                  ((optab2->handlers[(int) compute_mode].insn_code
3948                                    != CODE_FOR_nothing)
3949                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
3950           if (remainder == 0)
3951             {
3952               /* No luck there.  Can we do remainder and divide at once
3953                  without a library call?  */
3954               remainder = gen_reg_rtx (compute_mode);
3955               if (! expand_twoval_binop ((unsignedp
3956                                           ? udivmod_optab
3957                                           : sdivmod_optab),
3958                                          op0, op1,
3959                                          NULL_RTX, remainder, unsignedp))
3960                 remainder = 0;
3961             }
3962
3963           if (remainder)
3964             return gen_lowpart (mode, remainder);
3965         }
3966
3967       /* Produce the quotient.  Try a quotient insn, but not a library call.
3968          If we have a divmod in this mode, use it in preference to widening
3969          the div (for this test we assume it will not fail). Note that optab2
3970          is set to the one of the two optabs that the call below will use.  */
3971       quotient
3972         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3973                              op0, op1, rem_flag ? NULL_RTX : target,
3974                              unsignedp,
3975                              ((optab2->handlers[(int) compute_mode].insn_code
3976                                != CODE_FOR_nothing)
3977                               ? OPTAB_DIRECT : OPTAB_WIDEN));
3978
3979       if (quotient == 0)
3980         {
3981           /* No luck there.  Try a quotient-and-remainder insn,
3982              keeping the quotient alone.  */
3983           quotient = gen_reg_rtx (compute_mode);
3984           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3985                                      op0, op1,
3986                                      quotient, NULL_RTX, unsignedp))
3987             {
3988               quotient = 0;
3989               if (! rem_flag)
3990                 /* Still no luck.  If we are not computing the remainder,
3991                    use a library call for the quotient.  */
3992                 quotient = sign_expand_binop (compute_mode,
3993                                               udiv_optab, sdiv_optab,
3994                                               op0, op1, target,
3995                                               unsignedp, OPTAB_LIB_WIDEN);
3996             }
3997         }
3998     }
3999
4000   if (rem_flag)
4001     {
4002       if (target && GET_MODE (target) != compute_mode)
4003         target = 0;
4004
4005       if (quotient == 0)
4006         /* No divide instruction either.  Use library for remainder.  */
4007         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4008                                        op0, op1, target,
4009                                        unsignedp, OPTAB_LIB_WIDEN);
4010       else
4011         {
4012           /* We divided.  Now finish doing X - Y * (X / Y).  */
4013           remainder = expand_mult (compute_mode, quotient, op1,
4014                                    NULL_RTX, unsignedp);
4015           remainder = expand_binop (compute_mode, sub_optab, op0,
4016                                     remainder, target, unsignedp,
4017                                     OPTAB_LIB_WIDEN);
4018         }
4019     }
4020
4021   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4022 }
4023 \f
4024 /* Return a tree node with data type TYPE, describing the value of X.
4025    Usually this is an RTL_EXPR, if there is no obvious better choice.
4026    X may be an expression, however we only support those expressions
4027    generated by loop.c.   */
4028
4029 tree
4030 make_tree (type, x)
4031      tree type;
4032      rtx x;
4033 {
4034   tree t;
4035
4036   switch (GET_CODE (x))
4037     {
4038     case CONST_INT:
4039       t = build_int_2 (INTVAL (x),
4040                        (TREE_UNSIGNED (type)
4041                         && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
4042                        || INTVAL (x) >= 0 ? 0 : -1);
4043       TREE_TYPE (t) = type;
4044       return t;
4045
4046     case CONST_DOUBLE:
4047       if (GET_MODE (x) == VOIDmode)
4048         {
4049           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4050           TREE_TYPE (t) = type;
4051         }
4052       else
4053         {
4054           REAL_VALUE_TYPE d;
4055
4056           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4057           t = build_real (type, d);
4058         }
4059
4060       return t;
4061
4062     case PLUS:
4063       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4064                           make_tree (type, XEXP (x, 1))));
4065
4066     case MINUS:
4067       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4068                           make_tree (type, XEXP (x, 1))));
4069
4070     case NEG:
4071       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4072
4073     case MULT:
4074       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4075                           make_tree (type, XEXP (x, 1))));
4076
4077     case ASHIFT:
4078       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4079                           make_tree (type, XEXP (x, 1))));
4080
4081     case LSHIFTRT:
4082       return fold (convert (type,
4083                             build (RSHIFT_EXPR, unsigned_type (type),
4084                                    make_tree (unsigned_type (type),
4085                                               XEXP (x, 0)),
4086                                    make_tree (type, XEXP (x, 1)))));
4087
4088     case ASHIFTRT:
4089       return fold (convert (type,
4090                             build (RSHIFT_EXPR, signed_type (type),
4091                                    make_tree (signed_type (type), XEXP (x, 0)),
4092                                    make_tree (type, XEXP (x, 1)))));
4093
4094     case DIV:
4095       if (TREE_CODE (type) != REAL_TYPE)
4096         t = signed_type (type);
4097       else
4098         t = type;
4099
4100       return fold (convert (type,
4101                             build (TRUNC_DIV_EXPR, t,
4102                                    make_tree (t, XEXP (x, 0)),
4103                                    make_tree (t, XEXP (x, 1)))));
4104     case UDIV:
4105       t = unsigned_type (type);
4106       return fold (convert (type,
4107                             build (TRUNC_DIV_EXPR, t,
4108                                    make_tree (t, XEXP (x, 0)),
4109                                    make_tree (t, XEXP (x, 1)))));
4110    default:
4111       t = make_node (RTL_EXPR);
4112       TREE_TYPE (t) = type;
4113
4114 #ifdef POINTERS_EXTEND_UNSIGNED
4115       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4116          ptr_mode.  So convert.  */
4117       if (POINTER_TYPE_P (type) && GET_MODE (x) != TYPE_MODE (type))
4118         x = convert_memory_address (TYPE_MODE (type), x);
4119 #endif
4120
4121       RTL_EXPR_RTL (t) = x;
4122       /* There are no insns to be output
4123          when this rtl_expr is used.  */
4124       RTL_EXPR_SEQUENCE (t) = 0;
4125       return t;
4126     }
4127 }
4128
4129 /* Return an rtx representing the value of X * MULT + ADD.
4130    TARGET is a suggestion for where to store the result (an rtx).
4131    MODE is the machine mode for the computation.
4132    X and MULT must have mode MODE.  ADD may have a different mode.
4133    So can X (defaults to same as MODE).
4134    UNSIGNEDP is non-zero to do unsigned multiplication.
4135    This may emit insns.  */
4136
4137 rtx
4138 expand_mult_add (x, target, mult, add, mode, unsignedp)
4139      rtx x, target, mult, add;
4140      enum machine_mode mode;
4141      int unsignedp;
4142 {
4143   tree type = type_for_mode (mode, unsignedp);
4144   tree add_type = (GET_MODE (add) == VOIDmode
4145                    ? type : type_for_mode (GET_MODE (add), unsignedp));
4146   tree result =  fold (build (PLUS_EXPR, type,
4147                               fold (build (MULT_EXPR, type,
4148                                            make_tree (type, x),
4149                                            make_tree (type, mult))),
4150                               make_tree (add_type, add)));
4151
4152   return expand_expr (result, target, VOIDmode, 0);
4153 }
4154 \f
4155 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4156    and returning TARGET.
4157
4158    If TARGET is 0, a pseudo-register or constant is returned.  */
4159
4160 rtx
4161 expand_and (op0, op1, target)
4162      rtx op0, op1, target;
4163 {
4164   enum machine_mode mode = VOIDmode;
4165   rtx tem;
4166
4167   if (GET_MODE (op0) != VOIDmode)
4168     mode = GET_MODE (op0);
4169   else if (GET_MODE (op1) != VOIDmode)
4170     mode = GET_MODE (op1);
4171
4172   if (mode != VOIDmode)
4173     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4174   else if (GET_CODE (op0) == CONST_INT && GET_CODE (op1) == CONST_INT)
4175     tem = GEN_INT (INTVAL (op0) & INTVAL (op1));
4176   else
4177     abort ();
4178
4179   if (target == 0)
4180     target = tem;
4181   else if (tem != target)
4182     emit_move_insn (target, tem);
4183   return target;
4184 }
4185 \f
4186 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4187    and storing in TARGET.  Normally return TARGET.
4188    Return 0 if that cannot be done.
4189
4190    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4191    it is VOIDmode, they cannot both be CONST_INT.
4192
4193    UNSIGNEDP is for the case where we have to widen the operands
4194    to perform the operation.  It says to use zero-extension.
4195
4196    NORMALIZEP is 1 if we should convert the result to be either zero
4197    or one.  Normalize is -1 if we should convert the result to be
4198    either zero or -1.  If NORMALIZEP is zero, the result will be left
4199    "raw" out of the scc insn.  */
4200
4201 rtx
4202 emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
4203      rtx target;
4204      enum rtx_code code;
4205      rtx op0, op1;
4206      enum machine_mode mode;
4207      int unsignedp;
4208      int normalizep;
4209 {
4210   rtx subtarget;
4211   enum insn_code icode;
4212   enum machine_mode compare_mode;
4213   enum machine_mode target_mode = GET_MODE (target);
4214   rtx tem;
4215   rtx last = get_last_insn ();
4216   rtx pattern, comparison;
4217
4218   if (unsignedp)
4219     code = unsigned_condition (code);
4220
4221   /* If one operand is constant, make it the second one.  Only do this
4222      if the other operand is not constant as well.  */
4223
4224   if (swap_commutative_operands_p (op0, op1))
4225     {
4226       tem = op0;
4227       op0 = op1;
4228       op1 = tem;
4229       code = swap_condition (code);
4230     }
4231
4232   if (mode == VOIDmode)
4233     mode = GET_MODE (op0);
4234
4235   /* For some comparisons with 1 and -1, we can convert this to
4236      comparisons with zero.  This will often produce more opportunities for
4237      store-flag insns.  */
4238
4239   switch (code)
4240     {
4241     case LT:
4242       if (op1 == const1_rtx)
4243         op1 = const0_rtx, code = LE;
4244       break;
4245     case LE:
4246       if (op1 == constm1_rtx)
4247         op1 = const0_rtx, code = LT;
4248       break;
4249     case GE:
4250       if (op1 == const1_rtx)
4251         op1 = const0_rtx, code = GT;
4252       break;
4253     case GT:
4254       if (op1 == constm1_rtx)
4255         op1 = const0_rtx, code = GE;
4256       break;
4257     case GEU:
4258       if (op1 == const1_rtx)
4259         op1 = const0_rtx, code = NE;
4260       break;
4261     case LTU:
4262       if (op1 == const1_rtx)
4263         op1 = const0_rtx, code = EQ;
4264       break;
4265     default:
4266       break;
4267     }
4268
4269   /* If we are comparing a double-word integer with zero, we can convert
4270      the comparison into one involving a single word.  */
4271   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4272       && GET_MODE_CLASS (mode) == MODE_INT
4273       && op1 == const0_rtx)
4274     {
4275       if (code == EQ || code == NE)
4276         {
4277           /* Do a logical OR of the two words and compare the result.  */
4278           rtx op0h = gen_highpart (word_mode, op0);
4279           rtx op0l = gen_lowpart (word_mode, op0);
4280           rtx op0both = expand_binop (word_mode, ior_optab, op0h, op0l,
4281                                       NULL_RTX, unsignedp, OPTAB_DIRECT);
4282           if (op0both != 0)
4283             return emit_store_flag (target, code, op0both, op1, word_mode,
4284                                     unsignedp, normalizep);
4285         }
4286       else if (code == LT || code == GE)
4287         /* If testing the sign bit, can just test on high word.  */
4288         return emit_store_flag (target, code, gen_highpart (word_mode, op0),
4289                                 op1, word_mode, unsignedp, normalizep);
4290     }
4291
4292   /* From now on, we won't change CODE, so set ICODE now.  */
4293   icode = setcc_gen_code[(int) code];
4294
4295   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4296      complement of A (for GE) and shifting the sign bit to the low bit.  */
4297   if (op1 == const0_rtx && (code == LT || code == GE)
4298       && GET_MODE_CLASS (mode) == MODE_INT
4299       && (normalizep || STORE_FLAG_VALUE == 1
4300           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4301               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4302                   == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4303     {
4304       subtarget = target;
4305
4306       /* If the result is to be wider than OP0, it is best to convert it
4307          first.  If it is to be narrower, it is *incorrect* to convert it
4308          first.  */
4309       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4310         {
4311           op0 = protect_from_queue (op0, 0);
4312           op0 = convert_modes (target_mode, mode, op0, 0);
4313           mode = target_mode;
4314         }
4315
4316       if (target_mode != mode)
4317         subtarget = 0;
4318
4319       if (code == GE)
4320         op0 = expand_unop (mode, one_cmpl_optab, op0,
4321                            ((STORE_FLAG_VALUE == 1 || normalizep)
4322                             ? 0 : subtarget), 0);
4323
4324       if (STORE_FLAG_VALUE == 1 || normalizep)
4325         /* If we are supposed to produce a 0/1 value, we want to do
4326            a logical shift from the sign bit to the low-order bit; for
4327            a -1/0 value, we do an arithmetic shift.  */
4328         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4329                             size_int (GET_MODE_BITSIZE (mode) - 1),
4330                             subtarget, normalizep != -1);
4331
4332       if (mode != target_mode)
4333         op0 = convert_modes (target_mode, mode, op0, 0);
4334
4335       return op0;
4336     }
4337
4338   if (icode != CODE_FOR_nothing)
4339     {
4340       insn_operand_predicate_fn pred;
4341
4342       /* We think we may be able to do this with a scc insn.  Emit the
4343          comparison and then the scc insn.
4344
4345          compare_from_rtx may call emit_queue, which would be deleted below
4346          if the scc insn fails.  So call it ourselves before setting LAST.
4347          Likewise for do_pending_stack_adjust.  */
4348
4349       emit_queue ();
4350       do_pending_stack_adjust ();
4351       last = get_last_insn ();
4352
4353       comparison
4354         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
4355       if (GET_CODE (comparison) == CONST_INT)
4356         return (comparison == const0_rtx ? const0_rtx
4357                 : normalizep == 1 ? const1_rtx
4358                 : normalizep == -1 ? constm1_rtx
4359                 : const_true_rtx);
4360
4361       /* If the code of COMPARISON doesn't match CODE, something is
4362          wrong; we can no longer be sure that we have the operation.
4363          We could handle this case, but it should not happen.  */
4364
4365       if (GET_CODE (comparison) != code)
4366         abort ();
4367
4368       /* Get a reference to the target in the proper mode for this insn.  */
4369       compare_mode = insn_data[(int) icode].operand[0].mode;
4370       subtarget = target;
4371       pred = insn_data[(int) icode].operand[0].predicate;
4372       if (preserve_subexpressions_p ()
4373           || ! (*pred) (subtarget, compare_mode))
4374         subtarget = gen_reg_rtx (compare_mode);
4375
4376       pattern = GEN_FCN (icode) (subtarget);
4377       if (pattern)
4378         {
4379           emit_insn (pattern);
4380
4381           /* If we are converting to a wider mode, first convert to
4382              TARGET_MODE, then normalize.  This produces better combining
4383              opportunities on machines that have a SIGN_EXTRACT when we are
4384              testing a single bit.  This mostly benefits the 68k.
4385
4386              If STORE_FLAG_VALUE does not have the sign bit set when
4387              interpreted in COMPARE_MODE, we can do this conversion as
4388              unsigned, which is usually more efficient.  */
4389           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4390             {
4391               convert_move (target, subtarget,
4392                             (GET_MODE_BITSIZE (compare_mode)
4393                              <= HOST_BITS_PER_WIDE_INT)
4394                             && 0 == (STORE_FLAG_VALUE
4395                                      & ((HOST_WIDE_INT) 1
4396                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4397               op0 = target;
4398               compare_mode = target_mode;
4399             }
4400           else
4401             op0 = subtarget;
4402
4403           /* If we want to keep subexpressions around, don't reuse our
4404              last target.  */
4405
4406           if (preserve_subexpressions_p ())
4407             subtarget = 0;
4408
4409           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4410              we don't have to do anything.  */
4411           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4412             ;
4413           /* STORE_FLAG_VALUE might be the most negative number, so write
4414              the comparison this way to avoid a compiler-time warning.  */
4415           else if (- normalizep == STORE_FLAG_VALUE)
4416             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4417
4418           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4419              makes it hard to use a value of just the sign bit due to
4420              ANSI integer constant typing rules.  */
4421           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4422                    && (STORE_FLAG_VALUE
4423                        & ((HOST_WIDE_INT) 1
4424                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4425             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4426                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4427                                 subtarget, normalizep == 1);
4428           else if (STORE_FLAG_VALUE & 1)
4429             {
4430               op0 = expand_and (op0, const1_rtx, subtarget);
4431               if (normalizep == -1)
4432                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4433             }
4434           else
4435             abort ();
4436
4437           /* If we were converting to a smaller mode, do the
4438              conversion now.  */
4439           if (target_mode != compare_mode)
4440             {
4441               convert_move (target, op0, 0);
4442               return target;
4443             }
4444           else
4445             return op0;
4446         }
4447     }
4448
4449   delete_insns_since (last);
4450
4451   /* If expensive optimizations, use different pseudo registers for each
4452      insn, instead of reusing the same pseudo.  This leads to better CSE,
4453      but slows down the compiler, since there are more pseudos */
4454   subtarget = (!flag_expensive_optimizations
4455                && (target_mode == mode)) ? target : NULL_RTX;
4456
4457   /* If we reached here, we can't do this with a scc insn.  However, there
4458      are some comparisons that can be done directly.  For example, if
4459      this is an equality comparison of integers, we can try to exclusive-or
4460      (or subtract) the two operands and use a recursive call to try the
4461      comparison with zero.  Don't do any of these cases if branches are
4462      very cheap.  */
4463
4464   if (BRANCH_COST > 0
4465       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4466       && op1 != const0_rtx)
4467     {
4468       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4469                           OPTAB_WIDEN);
4470
4471       if (tem == 0)
4472         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4473                             OPTAB_WIDEN);
4474       if (tem != 0)
4475         tem = emit_store_flag (target, code, tem, const0_rtx,
4476                                mode, unsignedp, normalizep);
4477       if (tem == 0)
4478         delete_insns_since (last);
4479       return tem;
4480     }
4481
4482   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4483      the constant zero.  Reject all other comparisons at this point.  Only
4484      do LE and GT if branches are expensive since they are expensive on
4485      2-operand machines.  */
4486
4487   if (BRANCH_COST == 0
4488       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4489       || (code != EQ && code != NE
4490           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4491     return 0;
4492
4493   /* See what we need to return.  We can only return a 1, -1, or the
4494      sign bit.  */
4495
4496   if (normalizep == 0)
4497     {
4498       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4499         normalizep = STORE_FLAG_VALUE;
4500
4501       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4502                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4503                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4504         ;
4505       else
4506         return 0;
4507     }
4508
4509   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4510      do the necessary operation below.  */
4511
4512   tem = 0;
4513
4514   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4515      the sign bit set.  */
4516
4517   if (code == LE)
4518     {
4519       /* This is destructive, so SUBTARGET can't be OP0.  */
4520       if (rtx_equal_p (subtarget, op0))
4521         subtarget = 0;
4522
4523       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4524                           OPTAB_WIDEN);
4525       if (tem)
4526         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4527                             OPTAB_WIDEN);
4528     }
4529
4530   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4531      number of bits in the mode of OP0, minus one.  */
4532
4533   if (code == GT)
4534     {
4535       if (rtx_equal_p (subtarget, op0))
4536         subtarget = 0;
4537
4538       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4539                           size_int (GET_MODE_BITSIZE (mode) - 1),
4540                           subtarget, 0);
4541       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4542                           OPTAB_WIDEN);
4543     }
4544
4545   if (code == EQ || code == NE)
4546     {
4547       /* For EQ or NE, one way to do the comparison is to apply an operation
4548          that converts the operand into a positive number if it is non-zero
4549          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4550          for NE we negate.  This puts the result in the sign bit.  Then we
4551          normalize with a shift, if needed.
4552
4553          Two operations that can do the above actions are ABS and FFS, so try
4554          them.  If that doesn't work, and MODE is smaller than a full word,
4555          we can use zero-extension to the wider mode (an unsigned conversion)
4556          as the operation.  */
4557
4558       /* Note that ABS doesn't yield a positive number for INT_MIN, but
4559          that is compensated by the subsequent overflow when subtracting
4560          one / negating. */
4561
4562       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4563         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4564       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4565         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4566       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4567         {
4568           op0 = protect_from_queue (op0, 0);
4569           tem = convert_modes (word_mode, mode, op0, 1);
4570           mode = word_mode;
4571         }
4572
4573       if (tem != 0)
4574         {
4575           if (code == EQ)
4576             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4577                                 0, OPTAB_WIDEN);
4578           else
4579             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4580         }
4581
4582       /* If we couldn't do it that way, for NE we can "or" the two's complement
4583          of the value with itself.  For EQ, we take the one's complement of
4584          that "or", which is an extra insn, so we only handle EQ if branches
4585          are expensive.  */
4586
4587       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4588         {
4589           if (rtx_equal_p (subtarget, op0))
4590             subtarget = 0;
4591
4592           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4593           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4594                               OPTAB_WIDEN);
4595
4596           if (tem && code == EQ)
4597             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4598         }
4599     }
4600
4601   if (tem && normalizep)
4602     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4603                         size_int (GET_MODE_BITSIZE (mode) - 1),
4604                         subtarget, normalizep == 1);
4605
4606   if (tem)
4607     {
4608       if (GET_MODE (tem) != target_mode)
4609         {
4610           convert_move (target, tem, 0);
4611           tem = target;
4612         }
4613       else if (!subtarget)
4614         {
4615           emit_move_insn (target, tem);
4616           tem = target;
4617         }
4618     }
4619   else
4620     delete_insns_since (last);
4621
4622   return tem;
4623 }
4624
4625 /* Like emit_store_flag, but always succeeds.  */
4626
4627 rtx
4628 emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
4629      rtx target;
4630      enum rtx_code code;
4631      rtx op0, op1;
4632      enum machine_mode mode;
4633      int unsignedp;
4634      int normalizep;
4635 {
4636   rtx tem, label;
4637
4638   /* First see if emit_store_flag can do the job.  */
4639   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4640   if (tem != 0)
4641     return tem;
4642
4643   if (normalizep == 0)
4644     normalizep = 1;
4645
4646   /* If this failed, we have to do this with set/compare/jump/set code.  */
4647
4648   if (GET_CODE (target) != REG
4649       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4650     target = gen_reg_rtx (GET_MODE (target));
4651
4652   emit_move_insn (target, const1_rtx);
4653   label = gen_label_rtx ();
4654   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, 0,
4655                            NULL_RTX, label);
4656
4657   emit_move_insn (target, const0_rtx);
4658   emit_label (label);
4659
4660   return target;
4661 }
4662 \f
4663 /* Perform possibly multi-word comparison and conditional jump to LABEL
4664    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4665
4666    The algorithm is based on the code in expr.c:do_jump.
4667
4668    Note that this does not perform a general comparison.  Only variants
4669    generated within expmed.c are correctly handled, others abort (but could
4670    be handled if needed).  */
4671
4672 static void
4673 do_cmp_and_jump (arg1, arg2, op, mode, label)
4674      rtx arg1, arg2, label;
4675      enum rtx_code op;
4676      enum machine_mode mode;
4677 {
4678   /* If this mode is an integer too wide to compare properly,
4679      compare word by word.  Rely on cse to optimize constant cases.  */
4680
4681   if (GET_MODE_CLASS (mode) == MODE_INT
4682       && ! can_compare_p (op, mode, ccp_jump))
4683     {
4684       rtx label2 = gen_label_rtx ();
4685
4686       switch (op)
4687         {
4688         case LTU:
4689           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4690           break;
4691
4692         case LEU:
4693           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4694           break;
4695
4696         case LT:
4697           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4698           break;
4699
4700         case GT:
4701           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4702           break;
4703
4704         case GE:
4705           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4706           break;
4707
4708           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
4709              that's the only equality operations we do */
4710         case EQ:
4711           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4712             abort();
4713           do_jump_by_parts_equality_rtx (arg1, label2, label);
4714           break;
4715
4716         case NE:
4717           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4718             abort();
4719           do_jump_by_parts_equality_rtx (arg1, label, label2);
4720           break;
4721
4722         default:
4723           abort();
4724         }
4725
4726       emit_label (label2);
4727     }
4728   else
4729     {
4730       emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, 0, label);
4731     }
4732 }