gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000 Free Software Foundation, Inc.
   5
   6 This file is part of GNU CC.
   7
   8 GNU CC is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU CC is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU CC; see the file COPYING.  If not, write to
  20 the Free Software Foundation, 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "toplev.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "tm_p.h"
  30 #include "flags.h"
  31 #include "insn-flags.h"
  32 #include "insn-codes.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "real.h"
  36 #include "recog.h"
  37
  38 static void store_fixed_bit_field       PARAMS ((rtx, unsigned HOST_WIDE_INT,
  39                                                  unsigned HOST_WIDE_INT,
  40                                                  unsigned HOST_WIDE_INT, rtx,
  41                                                  unsigned int));
  42 static void store_split_bit_field       PARAMS ((rtx, unsigned HOST_WIDE_INT,
  43                                                  unsigned HOST_WIDE_INT, rtx,
  44                                                  unsigned int));
  45 static rtx extract_fixed_bit_field      PARAMS ((enum machine_mode, rtx,
  46                                                  unsigned HOST_WIDE_INT,
  47                                                  unsigned HOST_WIDE_INT,
  48                                                  unsigned HOST_WIDE_INT,
  49                                                  rtx, int, unsigned int));
  50 static rtx mask_rtx                     PARAMS ((enum machine_mode, int,
  51                                                  int, int));
  52 static rtx lshift_value                 PARAMS ((enum machine_mode, rtx,
  53                                                  int, int));
  54 static rtx extract_split_bit_field      PARAMS ((rtx, unsigned HOST_WIDE_INT,
  55                                                  unsigned HOST_WIDE_INT, int,
  56                                                  unsigned int));
  57 static void do_cmp_and_jump             PARAMS ((rtx, rtx, enum rtx_code,
  58                                                  enum machine_mode, rtx));
  59
  60 /* Non-zero means divides or modulus operations are relatively cheap for
  61    powers of two, so don't use branches; emit the operation instead.
  62    Usually, this will mean that the MD file will emit non-branch
  63    sequences.  */
  64
  65 static int sdiv_pow2_cheap, smod_pow2_cheap;
  66
  67 #ifndef SLOW_UNALIGNED_ACCESS
  68 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  69 #endif
  70
  71 /* For compilers that support multiple targets with different word sizes,
  72    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  73    is the H8/300(H) compiler.  */
  74
  75 #ifndef MAX_BITS_PER_WORD
  76 #define MAX_BITS_PER_WORD BITS_PER_WORD
  77 #endif
  78
  79 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  80    shift count and some by mode.  */
  81 static int add_cost, negate_cost, zero_cost;
  82 static int shift_cost[MAX_BITS_PER_WORD];
  83 static int shiftadd_cost[MAX_BITS_PER_WORD];
  84 static int shiftsub_cost[MAX_BITS_PER_WORD];
  85 static int mul_cost[NUM_MACHINE_MODES];
  86 static int div_cost[NUM_MACHINE_MODES];
  87 static int mul_widen_cost[NUM_MACHINE_MODES];
  88 static int mul_highpart_cost[NUM_MACHINE_MODES];
  89
  90 void
  91 init_expmed ()
  92 {
  93   /* This is "some random pseudo register" for purposes of calling recog
  94      to see what insns exist.  */
  95   rtx reg = gen_rtx_REG (word_mode, 10000);
  96   rtx shift_insn, shiftadd_insn, shiftsub_insn;
  97   int dummy;
  98   int m;
  99   enum machine_mode mode, wider_mode;
 100
 101   start_sequence ();
 102
 103   reg = gen_rtx_REG (word_mode, 10000);
 104
 105   zero_cost = rtx_cost (const0_rtx, 0);
 106   add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
 107
 108   shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
 109                                        gen_rtx_ASHIFT (word_mode, reg,
 110                                                        const0_rtx)));
 111
 112   shiftadd_insn
 113     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 114                               gen_rtx_PLUS (word_mode,
 115                                             gen_rtx_MULT (word_mode,
 116                                                           reg, const0_rtx),
 117                                             reg)));
 118
 119   shiftsub_insn
 120     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 121                               gen_rtx_MINUS (word_mode,
 122                                              gen_rtx_MULT (word_mode,
 123                                                            reg, const0_rtx),
 124                                              reg)));
 125
 126   init_recog ();
 127
 128   shift_cost[0] = 0;
 129   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 130
 131   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 132     {
 133       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 134
 135       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 136       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 137         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 138
 139       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1)
 140         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 141       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 142         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 143
 144       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1)
 145         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 146       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 147         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 148     }
 149
 150   negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
 151
 152   sdiv_pow2_cheap
 153     = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
 154        <= 2 * add_cost);
 155   smod_pow2_cheap
 156     = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
 157        <= 2 * add_cost);
 158
 159   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 160        mode != VOIDmode;
 161        mode = GET_MODE_WIDER_MODE (mode))
 162     {
 163       reg = gen_rtx_REG (mode, 10000);
 164       div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
 165       mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
 166       wider_mode = GET_MODE_WIDER_MODE (mode);
 167       if (wider_mode != VOIDmode)
 168         {
 169           mul_widen_cost[(int) wider_mode]
 170             = rtx_cost (gen_rtx_MULT (wider_mode,
 171                                       gen_rtx_ZERO_EXTEND (wider_mode, reg),
 172                                       gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 173                         SET);
 174           mul_highpart_cost[(int) mode]
 175             = rtx_cost (gen_rtx_TRUNCATE
 176                         (mode,
 177                          gen_rtx_LSHIFTRT (wider_mode,
 178                                            gen_rtx_MULT (wider_mode,
 179                                                          gen_rtx_ZERO_EXTEND
 180                                                          (wider_mode, reg),
 181                                                          gen_rtx_ZERO_EXTEND
 182                                                          (wider_mode, reg)),
 183                                            GEN_INT (GET_MODE_BITSIZE (mode)))),
 184                         SET);
 185         }
 186     }
 187
 188   end_sequence ();
 189 }
 190
 191 /* Return an rtx representing minus the value of X.
 192    MODE is the intended mode of the result,
 193    useful if X is a CONST_INT.  */
 194
 195 rtx
 196 negate_rtx (mode, x)
 197      enum machine_mode mode;
 198      rtx x;
 199 {
 200   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 201
 202   if (result == 0)
 203     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 204
 205   return result;
 206 }
 207 \f
 208 /* Generate code to store value from rtx VALUE
 209    into a bit-field within structure STR_RTX
 210    containing BITSIZE bits starting at bit BITNUM.
 211    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 212    ALIGN is the alignment that STR_RTX is known to have.
 213    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 214
 215 /* ??? Note that there are two different ideas here for how
 216    to determine the size to count bits within, for a register.
 217    One is BITS_PER_WORD, and the other is the size of operand 3
 218    of the insv pattern.
 219
 220    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 221    else, we use the mode of operand 3.  */
 222
 223 rtx
 224 store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
 225      rtx str_rtx;
 226      unsigned HOST_WIDE_INT bitsize;
 227      unsigned HOST_WIDE_INT bitnum;
 228      enum machine_mode fieldmode;
 229      rtx value;
 230      unsigned int align;
 231      HOST_WIDE_INT total_size;
 232 {
 233   unsigned int unit
 234     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 235   unsigned HOST_WIDE_INT offset = bitnum / unit;
 236   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 237   register rtx op0 = str_rtx;
 238 #ifdef HAVE_insv
 239   unsigned HOST_WIDE_INT insv_bitsize;
 240   enum machine_mode op_mode;
 241
 242   op_mode = insn_data[(int) CODE_FOR_insv].operand[3].mode;
 243   if (op_mode == VOIDmode)
 244     op_mode = word_mode;
 245   insv_bitsize = GET_MODE_BITSIZE (op_mode);
 246 #endif
 247
 248   if (GET_CODE (str_rtx) == MEM && ! MEM_IN_STRUCT_P (str_rtx))
 249     abort ();
 250
 251   /* Discount the part of the structure before the desired byte.
 252      We need to know how many bytes are safe to reference after it.  */
 253   if (total_size >= 0)
 254     total_size -= (bitpos / BIGGEST_ALIGNMENT
 255                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 256
 257   while (GET_CODE (op0) == SUBREG)
 258     {
 259       /* The following line once was done only if WORDS_BIG_ENDIAN,
 260          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 261          meaningful at a much higher level; when structures are copied
 262          between memory and regs, the higher-numbered regs
 263          always get higher addresses.  */
 264       offset += SUBREG_WORD (op0);
 265       /* We used to adjust BITPOS here, but now we do the whole adjustment
 266          right after the loop.  */
 267       op0 = SUBREG_REG (op0);
 268     }
 269
 270   /* Make sure we are playing with integral modes.  Pun with subregs
 271      if we aren't.  */
 272   {
 273     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 274     if (imode != GET_MODE (op0))
 275       {
 276         if (GET_CODE (op0) == MEM)
 277           op0 = change_address (op0, imode, NULL_RTX);
 278         else if (imode != BLKmode)
 279           op0 = gen_lowpart (imode, op0);
 280         else
 281           abort ();
 282       }
 283   }
 284
 285   /* If OP0 is a register, BITPOS must count within a word.
 286      But as we have it, it counts within whatever size OP0 now has.
 287      On a bigendian machine, these are not the same, so convert.  */
 288   if (BYTES_BIG_ENDIAN
 289       && GET_CODE (op0) != MEM
 290       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 291     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 292
 293   value = protect_from_queue (value, 0);
 294
 295   if (flag_force_mem)
 296     value = force_not_mem (value);
 297
 298   if ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 299        || (GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode)
 300            && GET_MODE_SIZE (fieldmode) != 0))
 301       && (GET_CODE (op0) != MEM
 302           || ! SLOW_UNALIGNED_ACCESS (fieldmode, align)
 303           || (offset * BITS_PER_UNIT % bitsize == 0
 304               && align % GET_MODE_BITSIZE (fieldmode) == 0))
 305       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 306       && bitsize == GET_MODE_BITSIZE (fieldmode))
 307     {
 308       /* Storing in a full-word or multi-word field in a register
 309          can be done with just SUBREG.  Also, storing in the entire object
 310          can be done with just SUBREG.  */
 311       if (GET_MODE (op0) != fieldmode)
 312         {
 313           if (GET_CODE (op0) == SUBREG)
 314             {
 315               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 316                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 317                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 318                 op0 = SUBREG_REG (op0);
 319               else
 320                 /* Else we've got some float mode source being extracted into
 321                    a different float mode destination -- this combination of
 322                    subregs results in Severe Tire Damage.  */
 323                 abort ();
 324             }
 325           if (GET_CODE (op0) == REG)
 326             op0 = gen_rtx_SUBREG (fieldmode, op0, offset);
 327           else
 328             op0 = change_address (op0, fieldmode,
 329                                   plus_constant (XEXP (op0, 0), offset));
 330         }
 331       emit_move_insn (op0, value);
 332       return value;
 333     }
 334
 335   /* Storing an lsb-aligned field in a register
 336      can be done with a movestrict instruction.  */
 337
 338   if (GET_CODE (op0) != MEM
 339       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 340       && bitsize == GET_MODE_BITSIZE (fieldmode)
 341       && (movstrict_optab->handlers[(int) fieldmode].insn_code
 342           != CODE_FOR_nothing))
 343     {
 344       int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 345
 346       /* Get appropriate low part of the value being stored.  */
 347       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 348         value = gen_lowpart (fieldmode, value);
 349       else if (!(GET_CODE (value) == SYMBOL_REF
 350                  || GET_CODE (value) == LABEL_REF
 351                  || GET_CODE (value) == CONST))
 352         value = convert_to_mode (fieldmode, value, 0);
 353
 354       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 355         value = copy_to_mode_reg (fieldmode, value);
 356
 357       if (GET_CODE (op0) == SUBREG)
 358         {
 359           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 360               || GET_MODE_CLASS (fieldmode) == MODE_INT
 361               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 362             op0 = SUBREG_REG (op0);
 363           else
 364             /* Else we've got some float mode source being extracted into
 365                a different float mode destination -- this combination of
 366                subregs results in Severe Tire Damage.  */
 367             abort ();
 368         }
 369
 370       emit_insn (GEN_FCN (icode)
 371                  (gen_rtx_SUBREG (fieldmode, op0, offset), value));
 372
 373       return value;
 374     }
 375
 376   /* Handle fields bigger than a word.  */
 377
 378   if (bitsize > BITS_PER_WORD)
 379     {
 380       /* Here we transfer the words of the field
 381          in the order least significant first.
 382          This is because the most significant word is the one which may
 383          be less than full.
 384          However, only do that if the value is not BLKmode.  */
 385
 386       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 387       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 388       unsigned int i;
 389
 390       /* This is the mode we must force value to, so that there will be enough
 391          subwords to extract.  Note that fieldmode will often (always?) be
 392          VOIDmode, because that is what store_field uses to indicate that this
 393          is a bit field, but passing VOIDmode to operand_subword_force will
 394          result in an abort.  */
 395       fieldmode = mode_for_size (nwords * BITS_PER_WORD, MODE_INT, 0);
 396
 397       for (i = 0; i < nwords; i++)
 398         {
 399           /* If I is 0, use the low-order word in both field and target;
 400              if I is 1, use the next to lowest word; and so on.  */
 401           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 402           unsigned int bit_offset = (backwards
 403                             ? MAX ((int) bitsize - ((int) i + 1)
 404                                    * BITS_PER_WORD,
 405                                    0)
 406                             : (int) i * BITS_PER_WORD);
 407
 408           store_bit_field (op0, MIN (BITS_PER_WORD,
 409                                      bitsize - i * BITS_PER_WORD),
 410                            bitnum + bit_offset, word_mode,
 411                            operand_subword_force (value, wordnum,
 412                                                   (GET_MODE (value) == VOIDmode
 413                                                    ? fieldmode
 414                                                    : GET_MODE (value))),
 415                            align, total_size);
 416         }
 417       return value;
 418     }
 419
 420   /* From here on we can assume that the field to be stored in is
 421      a full-word (whatever type that is), since it is shorter than a word.  */
 422
 423   /* OFFSET is the number of words or bytes (UNIT says which)
 424      from STR_RTX to the first word or byte containing part of the field.  */
 425
 426   if (GET_CODE (op0) != MEM)
 427     {
 428       if (offset != 0
 429           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 430         {
 431           if (GET_CODE (op0) != REG)
 432             {
 433               /* Since this is a destination (lvalue), we can't copy it to a
 434                  pseudo.  We can trivially remove a SUBREG that does not
 435                  change the size of the operand.  Such a SUBREG may have been
 436                  added above.  Otherwise, abort.  */
 437               if (GET_CODE (op0) == SUBREG
 438                   && (GET_MODE_SIZE (GET_MODE (op0))
 439                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 440                 op0 = SUBREG_REG (op0);
 441               else
 442                 abort ();
 443             }
 444           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 445                                 op0, offset);
 446         }
 447       offset = 0;
 448     }
 449   else
 450     {
 451       op0 = protect_from_queue (op0, 1);
 452     }
 453
 454   /* If VALUE is a floating-point mode, access it as an integer of the
 455      corresponding size.  This can occur on a machine with 64 bit registers
 456      that uses SFmode for float.  This can also occur for unaligned float
 457      structure fields.  */
 458   if (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT)
 459     {
 460       if (GET_CODE (value) != REG)
 461         value = copy_to_reg (value);
 462       value = gen_rtx_SUBREG (word_mode, value, 0);
 463     }
 464
 465   /* Now OFFSET is nonzero only if OP0 is memory
 466      and is therefore always measured in bytes.  */
 467
 468 #ifdef HAVE_insv
 469   if (HAVE_insv
 470       && GET_MODE (value) != BLKmode
 471       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 472       /* Ensure insv's size is wide enough for this field.  */
 473       && (insv_bitsize >= bitsize)
 474       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 475             && (bitsize + bitpos > insv_bitsize)))
 476     {
 477       int xbitpos = bitpos;
 478       rtx value1;
 479       rtx xop0 = op0;
 480       rtx last = get_last_insn ();
 481       rtx pat;
 482       enum machine_mode maxmode;
 483       int save_volatile_ok = volatile_ok;
 484
 485       maxmode = insn_data[(int) CODE_FOR_insv].operand[3].mode;
 486       if (maxmode == VOIDmode)
 487         maxmode = word_mode;
 488
 489       volatile_ok = 1;
 490
 491       /* If this machine's insv can only insert into a register, copy OP0
 492          into a register and save it back later.  */
 493       /* This used to check flag_force_mem, but that was a serious
 494          de-optimization now that flag_force_mem is enabled by -O2.  */
 495       if (GET_CODE (op0) == MEM
 496           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 497                 (op0, VOIDmode)))
 498         {
 499           rtx tempreg;
 500           enum machine_mode bestmode;
 501
 502           /* Get the mode to use for inserting into this field.  If OP0 is
 503              BLKmode, get the smallest mode consistent with the alignment. If
 504              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 505              mode. Otherwise, use the smallest mode containing the field.  */
 506
 507           if (GET_MODE (op0) == BLKmode
 508               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 509             bestmode
 510               = get_best_mode (bitsize, bitnum, align, maxmode,
 511                                MEM_VOLATILE_P (op0));
 512           else
 513             bestmode = GET_MODE (op0);
 514
 515           if (bestmode == VOIDmode
 516               || (SLOW_UNALIGNED_ACCESS (bestmode, align)
 517                   && GET_MODE_BITSIZE (bestmode) > align))
 518             goto insv_loses;
 519
 520           /* Adjust address to point to the containing unit of that mode.  */
 521           unit = GET_MODE_BITSIZE (bestmode);
 522           /* Compute offset as multiple of this unit, counting in bytes.  */
 523           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 524           bitpos = bitnum % unit;
 525           op0 = change_address (op0, bestmode,
 526                                 plus_constant (XEXP (op0, 0), offset));
 527
 528           /* Fetch that unit, store the bitfield in it, then store
 529              the unit.  */
 530           tempreg = copy_to_reg (op0);
 531           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 532                            align, total_size);
 533           emit_move_insn (op0, tempreg);
 534           return value;
 535         }
 536       volatile_ok = save_volatile_ok;
 537
 538       /* Add OFFSET into OP0's address.  */
 539       if (GET_CODE (xop0) == MEM)
 540         xop0 = change_address (xop0, byte_mode,
 541                                plus_constant (XEXP (xop0, 0), offset));
 542
 543       /* If xop0 is a register, we need it in MAXMODE
 544          to make it acceptable to the format of insv.  */
 545       if (GET_CODE (xop0) == SUBREG)
 546         /* We can't just change the mode, because this might clobber op0,
 547            and we will need the original value of op0 if insv fails.  */
 548         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_WORD (xop0));
 549       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 550         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 551
 552       /* On big-endian machines, we count bits from the most significant.
 553          If the bit field insn does not, we must invert.  */
 554
 555       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 556         xbitpos = unit - bitsize - xbitpos;
 557
 558       /* We have been counting XBITPOS within UNIT.
 559          Count instead within the size of the register.  */
 560       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 561         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 562
 563       unit = GET_MODE_BITSIZE (maxmode);
 564
 565       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 566       value1 = value;
 567       if (GET_MODE (value) != maxmode)
 568         {
 569           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 570             {
 571               /* Optimization: Don't bother really extending VALUE
 572                  if it has all the bits we will actually use.  However,
 573                  if we must narrow it, be sure we do it correctly.  */
 574
 575               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 576                 {
 577                   /* Avoid making subreg of a subreg, or of a mem.  */
 578                   if (GET_CODE (value1) != REG)
 579                     value1 = copy_to_reg (value1);
 580                   value1 = gen_rtx_SUBREG (maxmode, value1, 0);
 581                 }
 582               else
 583                 value1 = gen_lowpart (maxmode, value1);
 584             }
 585           else if (!CONSTANT_P (value))
 586             /* Parse phase is supposed to make VALUE's data type
 587                match that of the component reference, which is a type
 588                at least as wide as the field; so VALUE should have
 589                a mode that corresponds to that type.  */
 590             abort ();
 591         }
 592
 593       /* If this machine's insv insists on a register,
 594          get VALUE1 into a register.  */
 595       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 596              (value1, maxmode)))
 597         value1 = force_reg (maxmode, value1);
 598
 599       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 600       if (pat)
 601         emit_insn (pat);
 602       else
 603         {
 604           delete_insns_since (last);
 605           store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 606         }
 607     }
 608   else
 609     insv_loses:
 610 #endif
 611     /* Insv is not available; store using shifts and boolean ops.  */
 612     store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 613   return value;
 614 }
 615 \f
 616 /* Use shifts and boolean operations to store VALUE
 617    into a bit field of width BITSIZE
 618    in a memory location specified by OP0 except offset by OFFSET bytes.
 619      (OFFSET must be 0 if OP0 is a register.)
 620    The field starts at position BITPOS within the byte.
 621     (If OP0 is a register, it may be a full word or a narrower mode,
 622      but BITPOS still counts within a full word,
 623      which is significant on bigendian machines.)
 624    STRUCT_ALIGN is the alignment the structure is known to have.
 625
 626    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 627
 628 static void
 629 store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align)
 630      register rtx op0;
 631      unsigned HOST_WIDE_INT offset, bitsize, bitpos;
 632      register rtx value;
 633      unsigned int struct_align;
 634 {
 635   register enum machine_mode mode;
 636   unsigned int total_bits = BITS_PER_WORD;
 637   rtx subtarget, temp;
 638   int all_zero = 0;
 639   int all_one = 0;
 640
 641   if (! SLOW_UNALIGNED_ACCESS (word_mode, struct_align))
 642     struct_align = BIGGEST_ALIGNMENT;
 643
 644   /* There is a case not handled here:
 645      a structure with a known alignment of just a halfword
 646      and a field split across two aligned halfwords within the structure.
 647      Or likewise a structure with a known alignment of just a byte
 648      and a field split across two bytes.
 649      Such cases are not supposed to be able to occur.  */
 650
 651   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 652     {
 653       if (offset != 0)
 654         abort ();
 655       /* Special treatment for a bit field split across two registers.  */
 656       if (bitsize + bitpos > BITS_PER_WORD)
 657         {
 658           store_split_bit_field (op0, bitsize, bitpos,
 659                                  value, BITS_PER_WORD);
 660           return;
 661         }
 662     }
 663   else
 664     {
 665       /* Get the proper mode to use for this field.  We want a mode that
 666          includes the entire field.  If such a mode would be larger than
 667          a word, we won't be doing the extraction the normal way.  */
 668
 669       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 670                             struct_align, word_mode,
 671                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
 672
 673       if (mode == VOIDmode)
 674         {
 675           /* The only way this should occur is if the field spans word
 676              boundaries.  */
 677           store_split_bit_field (op0,
 678                                  bitsize, bitpos + offset * BITS_PER_UNIT,
 679                                  value, struct_align);
 680           return;
 681         }
 682
 683       total_bits = GET_MODE_BITSIZE (mode);
 684
 685       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 686          be in the range 0 to total_bits-1, and put any excess bytes in
 687          OFFSET.  */
 688       if (bitpos >= total_bits)
 689         {
 690           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 691           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 692                      * BITS_PER_UNIT);
 693         }
 694
 695       /* Get ref to an aligned byte, halfword, or word containing the field.
 696          Adjust BITPOS to be position within a word,
 697          and OFFSET to be the offset of that word.
 698          Then alter OP0 to refer to that word.  */
 699       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 700       offset -= (offset % (total_bits / BITS_PER_UNIT));
 701       op0 = change_address (op0, mode,
 702                             plus_constant (XEXP (op0, 0), offset));
 703     }
 704
 705   mode = GET_MODE (op0);
 706
 707   /* Now MODE is either some integral mode for a MEM as OP0,
 708      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 709      The bit field is contained entirely within OP0.
 710      BITPOS is the starting bit number within OP0.
 711      (OP0's mode may actually be narrower than MODE.)  */
 712
 713   if (BYTES_BIG_ENDIAN)
 714       /* BITPOS is the distance between our msb
 715          and that of the containing datum.
 716          Convert it to the distance from the lsb.  */
 717       bitpos = total_bits - bitsize - bitpos;
 718
 719   /* Now BITPOS is always the distance between our lsb
 720      and that of OP0.  */
 721
 722   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 723      we must first convert its mode to MODE.  */
 724
 725   if (GET_CODE (value) == CONST_INT)
 726     {
 727       register HOST_WIDE_INT v = INTVAL (value);
 728
 729       if (bitsize < HOST_BITS_PER_WIDE_INT)
 730         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 731
 732       if (v == 0)
 733         all_zero = 1;
 734       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 735                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 736                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 737         all_one = 1;
 738
 739       value = lshift_value (mode, value, bitpos, bitsize);
 740     }
 741   else
 742     {
 743       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 744                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 745
 746       if (GET_MODE (value) != mode)
 747         {
 748           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 749               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 750             value = gen_lowpart (mode, value);
 751           else
 752             value = convert_to_mode (mode, value, 1);
 753         }
 754
 755       if (must_and)
 756         value = expand_binop (mode, and_optab, value,
 757                               mask_rtx (mode, 0, bitsize, 0),
 758                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 759       if (bitpos > 0)
 760         value = expand_shift (LSHIFT_EXPR, mode, value,
 761                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 762     }
 763
 764   /* Now clear the chosen bits in OP0,
 765      except that if VALUE is -1 we need not bother.  */
 766
 767   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 768
 769   if (! all_one)
 770     {
 771       temp = expand_binop (mode, and_optab, op0,
 772                            mask_rtx (mode, bitpos, bitsize, 1),
 773                            subtarget, 1, OPTAB_LIB_WIDEN);
 774       subtarget = temp;
 775     }
 776   else
 777     temp = op0;
 778
 779   /* Now logical-or VALUE into OP0, unless it is zero.  */
 780
 781   if (! all_zero)
 782     temp = expand_binop (mode, ior_optab, temp, value,
 783                          subtarget, 1, OPTAB_LIB_WIDEN);
 784   if (op0 != temp)
 785     emit_move_insn (op0, temp);
 786 }
 787 \f
 788 /* Store a bit field that is split across multiple accessible memory objects.
 789
 790    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 791    BITSIZE is the field width; BITPOS the position of its first bit
 792    (within the word).
 793    VALUE is the value to store.
 794    ALIGN is the known alignment of OP0.
 795    This is also the size of the memory objects to be used.
 796
 797    This does not yet handle fields wider than BITS_PER_WORD.  */
 798
 799 static void
 800 store_split_bit_field (op0, bitsize, bitpos, value, align)
 801      rtx op0;
 802      unsigned HOST_WIDE_INT bitsize, bitpos;
 803      rtx value;
 804      unsigned int align;
 805 {
 806   unsigned int unit;
 807   unsigned int bitsdone = 0;
 808
 809   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 810      much at a time.  */
 811   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 812     unit = BITS_PER_WORD;
 813   else
 814     unit = MIN (align, BITS_PER_WORD);
 815
 816   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 817      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 818      that VALUE might be a floating-point constant.  */
 819   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 820     {
 821       rtx word = gen_lowpart_common (word_mode, value);
 822
 823       if (word && (value != word))
 824         value = word;
 825       else
 826         value = gen_lowpart_common (word_mode,
 827                                     force_reg (GET_MODE (value) != VOIDmode
 828                                                ? GET_MODE (value)
 829                                                : word_mode, value));
 830     }
 831   else if (GET_CODE (value) == ADDRESSOF)
 832     value = copy_to_reg (value);
 833
 834   while (bitsdone < bitsize)
 835     {
 836       unsigned HOST_WIDE_INT thissize;
 837       rtx part, word;
 838       unsigned HOST_WIDE_INT thispos;
 839       unsigned HOST_WIDE_INT offset;
 840
 841       offset = (bitpos + bitsdone) / unit;
 842       thispos = (bitpos + bitsdone) % unit;
 843
 844       /* THISSIZE must not overrun a word boundary.  Otherwise,
 845          store_fixed_bit_field will call us again, and we will mutually
 846          recurse forever.  */
 847       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 848       thissize = MIN (thissize, unit - thispos);
 849
 850       if (BYTES_BIG_ENDIAN)
 851         {
 852           int total_bits;
 853
 854           /* We must do an endian conversion exactly the same way as it is
 855              done in extract_bit_field, so that the two calls to
 856              extract_fixed_bit_field will have comparable arguments.  */
 857           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 858             total_bits = BITS_PER_WORD;
 859           else
 860             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 861
 862           /* Fetch successively less significant portions.  */
 863           if (GET_CODE (value) == CONST_INT)
 864             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 865                              >> (bitsize - bitsdone - thissize))
 866                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 867           else
 868             /* The args are chosen so that the last part includes the
 869                lsb.  Give extract_bit_field the value it needs (with
 870                endianness compensation) to fetch the piece we want.
 871
 872                ??? We have no idea what the alignment of VALUE is, so
 873                we have to use a guess.  */
 874             part
 875               = extract_fixed_bit_field
 876                 (word_mode, value, 0, thissize,
 877                  total_bits - bitsize + bitsdone, NULL_RTX, 1,
 878                  GET_MODE (value) == VOIDmode
 879                  ? UNITS_PER_WORD
 880                  : (GET_MODE (value) == BLKmode
 881                     ? 1 : GET_MODE_ALIGNMENT (GET_MODE (value))));
 882         }
 883       else
 884         {
 885           /* Fetch successively more significant portions.  */
 886           if (GET_CODE (value) == CONST_INT)
 887             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 888                              >> bitsdone)
 889                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 890           else
 891             part
 892               = extract_fixed_bit_field
 893                 (word_mode, value, 0, thissize, bitsdone, NULL_RTX, 1,
 894                  GET_MODE (value) == VOIDmode
 895                  ? UNITS_PER_WORD
 896                  : (GET_MODE (value) == BLKmode
 897                     ? 1 : GET_MODE_ALIGNMENT (GET_MODE (value))));
 898         }
 899
 900       /* If OP0 is a register, then handle OFFSET here.
 901
 902          When handling multiword bitfields, extract_bit_field may pass
 903          down a word_mode SUBREG of a larger REG for a bitfield that actually
 904          crosses a word boundary.  Thus, for a SUBREG, we must find
 905          the current word starting from the base register.  */
 906       if (GET_CODE (op0) == SUBREG)
 907         {
 908           word = operand_subword_force (SUBREG_REG (op0),
 909                                         SUBREG_WORD (op0) + offset,
 910                                         GET_MODE (SUBREG_REG (op0)));
 911           offset = 0;
 912         }
 913       else if (GET_CODE (op0) == REG)
 914         {
 915           word = operand_subword_force (op0, offset, GET_MODE (op0));
 916           offset = 0;
 917         }
 918       else
 919         word = op0;
 920
 921       /* OFFSET is in UNITs, and UNIT is in bits.
 922          store_fixed_bit_field wants offset in bytes.  */
 923       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT,
 924                              thissize, thispos, part, align);
 925       bitsdone += thissize;
 926     }
 927 }
 928 \f
 929 /* Generate code to extract a byte-field from STR_RTX
 930    containing BITSIZE bits, starting at BITNUM,
 931    and put it in TARGET if possible (if TARGET is nonzero).
 932    Regardless of TARGET, we return the rtx for where the value is placed.
 933    It may be a QUEUED.
 934
 935    STR_RTX is the structure containing the byte (a REG or MEM).
 936    UNSIGNEDP is nonzero if this is an unsigned bit field.
 937    MODE is the natural mode of the field value once extracted.
 938    TMODE is the mode the caller would like the value to have;
 939    but the value may be returned with type MODE instead.
 940
 941    ALIGN is the alignment that STR_RTX is known to have.
 942    TOTAL_SIZE is the size in bytes of the containing structure,
 943    or -1 if varying.
 944
 945    If a TARGET is specified and we can store in it at no extra cost,
 946    we do so, and return TARGET.
 947    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
 948    if they are equally easy.  */
 949
 950 rtx
 951 extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 952                    target, mode, tmode, align, total_size)
 953      rtx str_rtx;
 954      unsigned HOST_WIDE_INT bitsize;
 955      unsigned HOST_WIDE_INT bitnum;
 956      int unsignedp;
 957      rtx target;
 958      enum machine_mode mode, tmode;
 959      unsigned int align;
 960      HOST_WIDE_INT total_size;
 961 {
 962   unsigned int unit
 963     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 964   unsigned HOST_WIDE_INT offset = bitnum / unit;
 965   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 966   register rtx op0 = str_rtx;
 967   rtx spec_target = target;
 968   rtx spec_target_subreg = 0;
 969   enum machine_mode int_mode;
 970 #ifdef HAVE_extv
 971   unsigned HOST_WIDE_INT extv_bitsize;
 972   enum machine_mode extv_mode;
 973 #endif
 974 #ifdef HAVE_extzv
 975   unsigned HOST_WIDE_INT extzv_bitsize;
 976   enum machine_mode extzv_mode;
 977 #endif
 978
 979 #ifdef HAVE_extv
 980   extv_mode = insn_data[(int) CODE_FOR_extv].operand[0].mode;
 981   if (extv_mode == VOIDmode)
 982     extv_mode = word_mode;
 983   extv_bitsize = GET_MODE_BITSIZE (extv_mode);
 984 #endif
 985
 986 #ifdef HAVE_extzv
 987   extzv_mode = insn_data[(int) CODE_FOR_extzv].operand[0].mode;
 988   if (extzv_mode == VOIDmode)
 989     extzv_mode = word_mode;
 990   extzv_bitsize = GET_MODE_BITSIZE (extzv_mode);
 991 #endif
 992
 993   /* Discount the part of the structure before the desired byte.
 994      We need to know how many bytes are safe to reference after it.  */
 995   if (total_size >= 0)
 996     total_size -= (bitpos / BIGGEST_ALIGNMENT
 997                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 998
 999   if (tmode == VOIDmode)
1000     tmode = mode;
1001   while (GET_CODE (op0) == SUBREG)
1002     {
1003       int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
1004       int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
1005
1006       offset += SUBREG_WORD (op0);
1007
1008       inner_size = MIN (inner_size, BITS_PER_WORD);
1009
1010       if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
1011         {
1012           bitpos += inner_size - outer_size;
1013           if (bitpos > unit)
1014             {
1015               offset += (bitpos / unit);
1016               bitpos %= unit;
1017             }
1018         }
1019
1020       op0 = SUBREG_REG (op0);
1021     }
1022
1023   /* Make sure we are playing with integral modes.  Pun with subregs
1024      if we aren't.  */
1025   {
1026     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1027     if (imode != GET_MODE (op0))
1028       {
1029         if (GET_CODE (op0) == MEM)
1030           op0 = change_address (op0, imode, NULL_RTX);
1031         else if (imode != BLKmode)
1032           op0 = gen_lowpart (imode, op0);
1033         else
1034           abort ();
1035       }
1036   }
1037
1038   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1039      If that's wrong, the solution is to test for it and set TARGET to 0
1040      if needed.  */
1041
1042   /* If OP0 is a register, BITPOS must count within a word.
1043      But as we have it, it counts within whatever size OP0 now has.
1044      On a bigendian machine, these are not the same, so convert.  */
1045   if (BYTES_BIG_ENDIAN
1046       && GET_CODE (op0) != MEM
1047       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1048     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1049
1050   /* Extracting a full-word or multi-word value
1051      from a structure in a register or aligned memory.
1052      This can be done with just SUBREG.
1053      So too extracting a subword value in
1054      the least significant part of the register.  */
1055
1056   if (((GET_CODE (op0) != MEM
1057         && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1058                                   GET_MODE_BITSIZE (GET_MODE (op0))))
1059        || (GET_CODE (op0) == MEM
1060            && (! SLOW_UNALIGNED_ACCESS (mode, align)
1061                || (offset * BITS_PER_UNIT % bitsize == 0
1062                    && align % bitsize == 0))))
1063       && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1064            && bitpos % BITS_PER_WORD == 0)
1065           || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
1066               /* ??? The big endian test here is wrong.  This is correct
1067                  if the value is in a register, and if mode_for_size is not
1068                  the same mode as op0.  This causes us to get unnecessarily
1069                  inefficient code from the Thumb port when -mbig-endian.  */
1070               && (BYTES_BIG_ENDIAN
1071                   ? bitpos + bitsize == BITS_PER_WORD
1072                   : bitpos == 0))))
1073     {
1074       enum machine_mode mode1
1075         = mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0);
1076
1077       if (mode1 != GET_MODE (op0))
1078         {
1079           if (GET_CODE (op0) == SUBREG)
1080             {
1081               if (GET_MODE (SUBREG_REG (op0)) == mode1
1082                   || GET_MODE_CLASS (mode1) == MODE_INT
1083                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1084                 op0 = SUBREG_REG (op0);
1085               else
1086                 /* Else we've got some float mode source being extracted into
1087                    a different float mode destination -- this combination of
1088                    subregs results in Severe Tire Damage.  */
1089                 abort ();
1090             }
1091           if (GET_CODE (op0) == REG)
1092             op0 = gen_rtx_SUBREG (mode1, op0, offset);
1093           else
1094             op0 = change_address (op0, mode1,
1095                                   plus_constant (XEXP (op0, 0), offset));
1096         }
1097       if (mode1 != mode)
1098         return convert_to_mode (tmode, op0, unsignedp);
1099       return op0;
1100     }
1101
1102   /* Handle fields bigger than a word.  */
1103
1104   if (bitsize > BITS_PER_WORD)
1105     {
1106       /* Here we transfer the words of the field
1107          in the order least significant first.
1108          This is because the most significant word is the one which may
1109          be less than full.  */
1110
1111       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1112       unsigned int i;
1113
1114       if (target == 0 || GET_CODE (target) != REG)
1115         target = gen_reg_rtx (mode);
1116
1117       /* Indicate for flow that the entire target reg is being set.  */
1118       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1119
1120       for (i = 0; i < nwords; i++)
1121         {
1122           /* If I is 0, use the low-order word in both field and target;
1123              if I is 1, use the next to lowest word; and so on.  */
1124           /* Word number in TARGET to use.  */
1125           unsigned int wordnum
1126             = (WORDS_BIG_ENDIAN
1127                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1128                : i);
1129           /* Offset from start of field in OP0.  */
1130           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1131                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1132                                                 * (int) BITS_PER_WORD))
1133                                      : (int) i * BITS_PER_WORD);
1134           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1135           rtx result_part
1136             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1137                                            bitsize - i * BITS_PER_WORD),
1138                                  bitnum + bit_offset, 1, target_part, mode,
1139                                  word_mode, align, total_size);
1140
1141           if (target_part == 0)
1142             abort ();
1143
1144           if (result_part != target_part)
1145             emit_move_insn (target_part, result_part);
1146         }
1147
1148       if (unsignedp)
1149         {
1150           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1151              need to be zero'd out.  */
1152           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1153             {
1154               unsigned int i, total_words;
1155
1156               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1157               for (i = nwords; i < total_words; i++)
1158                 {
1159                   int wordnum = WORDS_BIG_ENDIAN ? total_words - i - 1 : i;
1160                   rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1161                   emit_move_insn (target_part, const0_rtx);
1162                 }
1163             }
1164           return target;
1165         }
1166
1167       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1168       target = expand_shift (LSHIFT_EXPR, mode, target,
1169                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1170                              NULL_RTX, 0);
1171       return expand_shift (RSHIFT_EXPR, mode, target,
1172                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1173                            NULL_RTX, 0);
1174     }
1175
1176   /* From here on we know the desired field is smaller than a word.  */
1177
1178   /* Check if there is a correspondingly-sized integer field, so we can
1179      safely extract it as one size of integer, if necessary; then
1180      truncate or extend to the size that is wanted; then use SUBREGs or
1181      convert_to_mode to get one of the modes we really wanted.  */
1182
1183   int_mode = int_mode_for_mode (tmode);
1184   if (int_mode == BLKmode)
1185     int_mode = int_mode_for_mode (mode);
1186   if (int_mode == BLKmode)
1187     abort();    /* Should probably push op0 out to memory and then
1188                    do a load.  */
1189
1190   /* OFFSET is the number of words or bytes (UNIT says which)
1191      from STR_RTX to the first word or byte containing part of the field.  */
1192
1193   if (GET_CODE (op0) != MEM)
1194     {
1195       if (offset != 0
1196           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1197         {
1198           if (GET_CODE (op0) != REG)
1199             op0 = copy_to_reg (op0);
1200           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1201                                 op0, offset);
1202         }
1203       offset = 0;
1204     }
1205   else
1206     {
1207       op0 = protect_from_queue (str_rtx, 1);
1208     }
1209
1210   /* Now OFFSET is nonzero only for memory operands.  */
1211
1212   if (unsignedp)
1213     {
1214 #ifdef HAVE_extzv
1215       if (HAVE_extzv
1216           && (extzv_bitsize >= bitsize)
1217           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1218                 && (bitsize + bitpos > extzv_bitsize)))
1219         {
1220           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1221           rtx bitsize_rtx, bitpos_rtx;
1222           rtx last = get_last_insn ();
1223           rtx xop0 = op0;
1224           rtx xtarget = target;
1225           rtx xspec_target = spec_target;
1226           rtx xspec_target_subreg = spec_target_subreg;
1227           rtx pat;
1228           enum machine_mode maxmode;
1229
1230           maxmode = insn_data[(int) CODE_FOR_extzv].operand[0].mode;
1231           if (maxmode == VOIDmode)
1232             maxmode = word_mode;
1233
1234           if (GET_CODE (xop0) == MEM)
1235             {
1236               int save_volatile_ok = volatile_ok;
1237               volatile_ok = 1;
1238
1239               /* Is the memory operand acceptable?  */
1240               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1241                      (xop0, GET_MODE (xop0))))
1242                 {
1243                   /* No, load into a reg and extract from there.  */
1244                   enum machine_mode bestmode;
1245
1246                   /* Get the mode to use for inserting into this field.  If
1247                      OP0 is BLKmode, get the smallest mode consistent with the
1248                      alignment. If OP0 is a non-BLKmode object that is no
1249                      wider than MAXMODE, use its mode. Otherwise, use the
1250                      smallest mode containing the field.  */
1251
1252                   if (GET_MODE (xop0) == BLKmode
1253                       || (GET_MODE_SIZE (GET_MODE (op0))
1254                           > GET_MODE_SIZE (maxmode)))
1255                     bestmode = get_best_mode (bitsize, bitnum, align, maxmode,
1256                                               MEM_VOLATILE_P (xop0));
1257                   else
1258                     bestmode = GET_MODE (xop0);
1259
1260                   if (bestmode == VOIDmode
1261                       || (SLOW_UNALIGNED_ACCESS (bestmode, align)
1262                           && GET_MODE_BITSIZE (bestmode) > align))
1263                     goto extzv_loses;
1264
1265                   /* Compute offset as multiple of this unit,
1266                      counting in bytes.  */
1267                   unit = GET_MODE_BITSIZE (bestmode);
1268                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1269                   xbitpos = bitnum % unit;
1270                   xop0 = change_address (xop0, bestmode,
1271                                          plus_constant (XEXP (xop0, 0),
1272                                                         xoffset));
1273                   /* Fetch it to a register in that size.  */
1274                   xop0 = force_reg (bestmode, xop0);
1275
1276                   /* XBITPOS counts within UNIT, which is what is expected.  */
1277                 }
1278               else
1279                 /* Get ref to first byte containing part of the field.  */
1280                 xop0 = change_address (xop0, byte_mode,
1281                                        plus_constant (XEXP (xop0, 0), xoffset));
1282
1283               volatile_ok = save_volatile_ok;
1284             }
1285
1286           /* If op0 is a register, we need it in MAXMODE (which is usually
1287              SImode). to make it acceptable to the format of extzv.  */
1288           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1289             goto extzv_loses;
1290           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1291             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1292
1293           /* On big-endian machines, we count bits from the most significant.
1294              If the bit field insn does not, we must invert.  */
1295           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1296             xbitpos = unit - bitsize - xbitpos;
1297
1298           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1299           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1300             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1301
1302           unit = GET_MODE_BITSIZE (maxmode);
1303
1304           if (xtarget == 0
1305               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1306             xtarget = xspec_target = gen_reg_rtx (tmode);
1307
1308           if (GET_MODE (xtarget) != maxmode)
1309             {
1310               if (GET_CODE (xtarget) == REG)
1311                 {
1312                   int wider = (GET_MODE_SIZE (maxmode)
1313                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1314                   xtarget = gen_lowpart (maxmode, xtarget);
1315                   if (wider)
1316                     xspec_target_subreg = xtarget;
1317                 }
1318               else
1319                 xtarget = gen_reg_rtx (maxmode);
1320             }
1321
1322           /* If this machine's extzv insists on a register target,
1323              make sure we have one.  */
1324           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1325                  (xtarget, maxmode)))
1326             xtarget = gen_reg_rtx (maxmode);
1327
1328           bitsize_rtx = GEN_INT (bitsize);
1329           bitpos_rtx = GEN_INT (xbitpos);
1330
1331           pat = gen_extzv (protect_from_queue (xtarget, 1),
1332                            xop0, bitsize_rtx, bitpos_rtx);
1333           if (pat)
1334             {
1335               emit_insn (pat);
1336               target = xtarget;
1337               spec_target = xspec_target;
1338               spec_target_subreg = xspec_target_subreg;
1339             }
1340           else
1341             {
1342               delete_insns_since (last);
1343               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1344                                                 bitpos, target, 1, align);
1345             }
1346         }
1347       else
1348         extzv_loses:
1349 #endif
1350       target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1351                                         bitpos, target, 1, align);
1352     }
1353   else
1354     {
1355 #ifdef HAVE_extv
1356       if (HAVE_extv
1357           && (extv_bitsize >= bitsize)
1358           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1359                 && (bitsize + bitpos > extv_bitsize)))
1360         {
1361           int xbitpos = bitpos, xoffset = offset;
1362           rtx bitsize_rtx, bitpos_rtx;
1363           rtx last = get_last_insn ();
1364           rtx xop0 = op0, xtarget = target;
1365           rtx xspec_target = spec_target;
1366           rtx xspec_target_subreg = spec_target_subreg;
1367           rtx pat;
1368           enum machine_mode maxmode;
1369
1370           maxmode = insn_data[(int) CODE_FOR_extv].operand[0].mode;
1371           if (maxmode == VOIDmode)
1372             maxmode = word_mode;
1373
1374           if (GET_CODE (xop0) == MEM)
1375             {
1376               /* Is the memory operand acceptable?  */
1377               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1378                      (xop0, GET_MODE (xop0))))
1379                 {
1380                   /* No, load into a reg and extract from there.  */
1381                   enum machine_mode bestmode;
1382
1383                   /* Get the mode to use for inserting into this field.  If
1384                      OP0 is BLKmode, get the smallest mode consistent with the
1385                      alignment. If OP0 is a non-BLKmode object that is no
1386                      wider than MAXMODE, use its mode. Otherwise, use the
1387                      smallest mode containing the field.  */
1388
1389                   if (GET_MODE (xop0) == BLKmode
1390                       || (GET_MODE_SIZE (GET_MODE (op0))
1391                           > GET_MODE_SIZE (maxmode)))
1392                     bestmode = get_best_mode (bitsize, bitnum, align, maxmode,
1393                                               MEM_VOLATILE_P (xop0));
1394                   else
1395                     bestmode = GET_MODE (xop0);
1396
1397                   if (bestmode == VOIDmode
1398                       || (SLOW_UNALIGNED_ACCESS (bestmode, align)
1399                           && GET_MODE_BITSIZE (bestmode) > align))
1400                     goto extv_loses;
1401
1402                   /* Compute offset as multiple of this unit,
1403                      counting in bytes.  */
1404                   unit = GET_MODE_BITSIZE (bestmode);
1405                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1406                   xbitpos = bitnum % unit;
1407                   xop0 = change_address (xop0, bestmode,
1408                                          plus_constant (XEXP (xop0, 0),
1409                                                         xoffset));
1410                   /* Fetch it to a register in that size.  */
1411                   xop0 = force_reg (bestmode, xop0);
1412
1413                   /* XBITPOS counts within UNIT, which is what is expected.  */
1414                 }
1415               else
1416                 /* Get ref to first byte containing part of the field.  */
1417                 xop0 = change_address (xop0, byte_mode,
1418                                        plus_constant (XEXP (xop0, 0), xoffset));
1419             }
1420
1421           /* If op0 is a register, we need it in MAXMODE (which is usually
1422              SImode) to make it acceptable to the format of extv.  */
1423           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1424             goto extv_loses;
1425           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1426             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1427
1428           /* On big-endian machines, we count bits from the most significant.
1429              If the bit field insn does not, we must invert.  */
1430           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1431             xbitpos = unit - bitsize - xbitpos;
1432
1433           /* XBITPOS counts within a size of UNIT.
1434              Adjust to count within a size of MAXMODE.  */
1435           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1436             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1437
1438           unit = GET_MODE_BITSIZE (maxmode);
1439
1440           if (xtarget == 0
1441               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1442             xtarget = xspec_target = gen_reg_rtx (tmode);
1443
1444           if (GET_MODE (xtarget) != maxmode)
1445             {
1446               if (GET_CODE (xtarget) == REG)
1447                 {
1448                   int wider = (GET_MODE_SIZE (maxmode)
1449                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1450                   xtarget = gen_lowpart (maxmode, xtarget);
1451                   if (wider)
1452                     xspec_target_subreg = xtarget;
1453                 }
1454               else
1455                 xtarget = gen_reg_rtx (maxmode);
1456             }
1457
1458           /* If this machine's extv insists on a register target,
1459              make sure we have one.  */
1460           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1461                  (xtarget, maxmode)))
1462             xtarget = gen_reg_rtx (maxmode);
1463
1464           bitsize_rtx = GEN_INT (bitsize);
1465           bitpos_rtx = GEN_INT (xbitpos);
1466
1467           pat = gen_extv (protect_from_queue (xtarget, 1),
1468                           xop0, bitsize_rtx, bitpos_rtx);
1469           if (pat)
1470             {
1471               emit_insn (pat);
1472               target = xtarget;
1473               spec_target = xspec_target;
1474               spec_target_subreg = xspec_target_subreg;
1475             }
1476           else
1477             {
1478               delete_insns_since (last);
1479               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1480                                                 bitpos, target, 0, align);
1481             }
1482         }
1483       else
1484         extv_loses:
1485 #endif
1486       target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1487                                         bitpos, target, 0, align);
1488     }
1489   if (target == spec_target)
1490     return target;
1491   if (target == spec_target_subreg)
1492     return spec_target;
1493   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1494     {
1495       /* If the target mode is floating-point, first convert to the
1496          integer mode of that size and then access it as a floating-point
1497          value via a SUBREG.  */
1498       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1499         {
1500           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1501                                                    MODE_INT, 0),
1502                                     target, unsignedp);
1503           if (GET_CODE (target) != REG)
1504             target = copy_to_reg (target);
1505           return gen_rtx_SUBREG (tmode, target, 0);
1506         }
1507       else
1508         return convert_to_mode (tmode, target, unsignedp);
1509     }
1510   return target;
1511 }
1512 \f
1513 /* Extract a bit field using shifts and boolean operations
1514    Returns an rtx to represent the value.
1515    OP0 addresses a register (word) or memory (byte).
1516    BITPOS says which bit within the word or byte the bit field starts in.
1517    OFFSET says how many bytes farther the bit field starts;
1518     it is 0 if OP0 is a register.
1519    BITSIZE says how many bits long the bit field is.
1520     (If OP0 is a register, it may be narrower than a full word,
1521      but BITPOS still counts within a full word,
1522      which is significant on bigendian machines.)
1523
1524    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1525    If TARGET is nonzero, attempts to store the value there
1526    and return TARGET, but this is not guaranteed.
1527    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.
1528
1529    ALIGN is the alignment that STR_RTX is known to have.  */
1530
1531 static rtx
1532 extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1533                          target, unsignedp, align)
1534      enum machine_mode tmode;
1535      register rtx op0, target;
1536      unsigned HOST_WIDE_INT offset, bitsize, bitpos;
1537      int unsignedp;
1538      unsigned int align;
1539 {
1540   unsigned int total_bits = BITS_PER_WORD;
1541   enum machine_mode mode;
1542
1543   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1544     {
1545       /* Special treatment for a bit field split across two registers.  */
1546       if (bitsize + bitpos > BITS_PER_WORD)
1547         return extract_split_bit_field (op0, bitsize, bitpos,
1548                                         unsignedp, align);
1549     }
1550   else
1551     {
1552       /* Get the proper mode to use for this field.  We want a mode that
1553          includes the entire field.  If such a mode would be larger than
1554          a word, we won't be doing the extraction the normal way.  */
1555
1556       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, align,
1557                             word_mode,
1558                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
1559
1560       if (mode == VOIDmode)
1561         /* The only way this should occur is if the field spans word
1562            boundaries.  */
1563         return extract_split_bit_field (op0, bitsize,
1564                                         bitpos + offset * BITS_PER_UNIT,
1565                                         unsignedp, align);
1566
1567       total_bits = GET_MODE_BITSIZE (mode);
1568
1569       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1570          be in the range 0 to total_bits-1, and put any excess bytes in
1571          OFFSET.  */
1572       if (bitpos >= total_bits)
1573         {
1574           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1575           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1576                      * BITS_PER_UNIT);
1577         }
1578
1579       /* Get ref to an aligned byte, halfword, or word containing the field.
1580          Adjust BITPOS to be position within a word,
1581          and OFFSET to be the offset of that word.
1582          Then alter OP0 to refer to that word.  */
1583       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1584       offset -= (offset % (total_bits / BITS_PER_UNIT));
1585       op0 = change_address (op0, mode,
1586                             plus_constant (XEXP (op0, 0), offset));
1587     }
1588
1589   mode = GET_MODE (op0);
1590
1591   if (BYTES_BIG_ENDIAN)
1592     {
1593       /* BITPOS is the distance between our msb and that of OP0.
1594          Convert it to the distance from the lsb.  */
1595
1596       bitpos = total_bits - bitsize - bitpos;
1597     }
1598
1599   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1600      We have reduced the big-endian case to the little-endian case.  */
1601
1602   if (unsignedp)
1603     {
1604       if (bitpos)
1605         {
1606           /* If the field does not already start at the lsb,
1607              shift it so it does.  */
1608           tree amount = build_int_2 (bitpos, 0);
1609           /* Maybe propagate the target for the shift.  */
1610           /* But not if we will return it--could confuse integrate.c.  */
1611           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1612                            && !REG_FUNCTION_VALUE_P (target)
1613                            ? target : 0);
1614           if (tmode != mode) subtarget = 0;
1615           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1616         }
1617       /* Convert the value to the desired mode.  */
1618       if (mode != tmode)
1619         op0 = convert_to_mode (tmode, op0, 1);
1620
1621       /* Unless the msb of the field used to be the msb when we shifted,
1622          mask out the upper bits.  */
1623
1624       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize
1625 #if 0
1626 #ifdef SLOW_ZERO_EXTEND
1627           /* Always generate an `and' if
1628              we just zero-extended op0 and SLOW_ZERO_EXTEND, since it
1629              will combine fruitfully with the zero-extend.  */
1630           || tmode != mode
1631 #endif
1632 #endif
1633           )
1634         return expand_binop (GET_MODE (op0), and_optab, op0,
1635                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1636                              target, 1, OPTAB_LIB_WIDEN);
1637       return op0;
1638     }
1639
1640   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1641      then arithmetic-shift its lsb to the lsb of the word.  */
1642   op0 = force_reg (mode, op0);
1643   if (mode != tmode)
1644     target = 0;
1645
1646   /* Find the narrowest integer mode that contains the field.  */
1647
1648   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1649        mode = GET_MODE_WIDER_MODE (mode))
1650     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1651       {
1652         op0 = convert_to_mode (mode, op0, 0);
1653         break;
1654       }
1655
1656   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1657     {
1658       tree amount = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1659       /* Maybe propagate the target for the shift.  */
1660       /* But not if we will return the result--could confuse integrate.c.  */
1661       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1662                        && ! REG_FUNCTION_VALUE_P (target)
1663                        ? target : 0);
1664       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1665     }
1666
1667   return expand_shift (RSHIFT_EXPR, mode, op0,
1668                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1669                        target, 0);
1670 }
1671 \f
1672 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1673    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1674    complement of that if COMPLEMENT.  The mask is truncated if
1675    necessary to the width of mode MODE.  The mask is zero-extended if
1676    BITSIZE+BITPOS is too small for MODE.  */
1677
1678 static rtx
1679 mask_rtx (mode, bitpos, bitsize, complement)
1680      enum machine_mode mode;
1681      int bitpos, bitsize, complement;
1682 {
1683   HOST_WIDE_INT masklow, maskhigh;
1684
1685   if (bitpos < HOST_BITS_PER_WIDE_INT)
1686     masklow = (HOST_WIDE_INT) -1 << bitpos;
1687   else
1688     masklow = 0;
1689
1690   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1691     masklow &= ((unsigned HOST_WIDE_INT) -1
1692                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1693
1694   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1695     maskhigh = -1;
1696   else
1697     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1698
1699   if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1700     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1701                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1702   else
1703     maskhigh = 0;
1704
1705   if (complement)
1706     {
1707       maskhigh = ~maskhigh;
1708       masklow = ~masklow;
1709     }
1710
1711   return immed_double_const (masklow, maskhigh, mode);
1712 }
1713
1714 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1715    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1716
1717 static rtx
1718 lshift_value (mode, value, bitpos, bitsize)
1719      enum machine_mode mode;
1720      rtx value;
1721      int bitpos, bitsize;
1722 {
1723   unsigned HOST_WIDE_INT v = INTVAL (value);
1724   HOST_WIDE_INT low, high;
1725
1726   if (bitsize < HOST_BITS_PER_WIDE_INT)
1727     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1728
1729   if (bitpos < HOST_BITS_PER_WIDE_INT)
1730     {
1731       low = v << bitpos;
1732       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1733     }
1734   else
1735     {
1736       low = 0;
1737       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1738     }
1739
1740   return immed_double_const (low, high, mode);
1741 }
1742 \f
1743 /* Extract a bit field that is split across two words
1744    and return an RTX for the result.
1745
1746    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1747    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1748    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
1749
1750    ALIGN is the known alignment of OP0.  This is also the size of the
1751    memory objects to be used.  */
1752
1753 static rtx
1754 extract_split_bit_field (op0, bitsize, bitpos, unsignedp, align)
1755      rtx op0;
1756      unsigned HOST_WIDE_INT bitsize, bitpos;
1757      int unsignedp;
1758      unsigned int align;
1759 {
1760   unsigned int unit;
1761   unsigned int bitsdone = 0;
1762   rtx result = NULL_RTX;
1763   int first = 1;
1764
1765   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1766      much at a time.  */
1767   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1768     unit = BITS_PER_WORD;
1769   else
1770     unit = MIN (align, BITS_PER_WORD);
1771
1772   while (bitsdone < bitsize)
1773     {
1774       unsigned HOST_WIDE_INT thissize;
1775       rtx part, word;
1776       unsigned HOST_WIDE_INT thispos;
1777       unsigned HOST_WIDE_INT offset;
1778
1779       offset = (bitpos + bitsdone) / unit;
1780       thispos = (bitpos + bitsdone) % unit;
1781
1782       /* THISSIZE must not overrun a word boundary.  Otherwise,
1783          extract_fixed_bit_field will call us again, and we will mutually
1784          recurse forever.  */
1785       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1786       thissize = MIN (thissize, unit - thispos);
1787
1788       /* If OP0 is a register, then handle OFFSET here.
1789
1790          When handling multiword bitfields, extract_bit_field may pass
1791          down a word_mode SUBREG of a larger REG for a bitfield that actually
1792          crosses a word boundary.  Thus, for a SUBREG, we must find
1793          the current word starting from the base register.  */
1794       if (GET_CODE (op0) == SUBREG)
1795         {
1796           word = operand_subword_force (SUBREG_REG (op0),
1797                                         SUBREG_WORD (op0) + offset,
1798                                         GET_MODE (SUBREG_REG (op0)));
1799           offset = 0;
1800         }
1801       else if (GET_CODE (op0) == REG)
1802         {
1803           word = operand_subword_force (op0, offset, GET_MODE (op0));
1804           offset = 0;
1805         }
1806       else
1807         word = op0;
1808
1809       /* Extract the parts in bit-counting order,
1810          whose meaning is determined by BYTES_PER_UNIT.
1811          OFFSET is in UNITs, and UNIT is in bits.
1812          extract_fixed_bit_field wants offset in bytes.  */
1813       part = extract_fixed_bit_field (word_mode, word,
1814                                       offset * unit / BITS_PER_UNIT,
1815                                       thissize, thispos, 0, 1, align);
1816       bitsdone += thissize;
1817
1818       /* Shift this part into place for the result.  */
1819       if (BYTES_BIG_ENDIAN)
1820         {
1821           if (bitsize != bitsdone)
1822             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1823                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1824         }
1825       else
1826         {
1827           if (bitsdone != thissize)
1828             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1829                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1830         }
1831
1832       if (first)
1833         result = part;
1834       else
1835         /* Combine the parts with bitwise or.  This works
1836            because we extracted each part as an unsigned bit field.  */
1837         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1838                                OPTAB_LIB_WIDEN);
1839
1840       first = 0;
1841     }
1842
1843   /* Unsigned bit field: we are done.  */
1844   if (unsignedp)
1845     return result;
1846   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1847   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1848                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1849                          NULL_RTX, 0);
1850   return expand_shift (RSHIFT_EXPR, word_mode, result,
1851                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1852 }
1853 \f
1854 /* Add INC into TARGET.  */
1855
1856 void
1857 expand_inc (target, inc)
1858      rtx target, inc;
1859 {
1860   rtx value = expand_binop (GET_MODE (target), add_optab,
1861                             target, inc,
1862                             target, 0, OPTAB_LIB_WIDEN);
1863   if (value != target)
1864     emit_move_insn (target, value);
1865 }
1866
1867 /* Subtract DEC from TARGET.  */
1868
1869 void
1870 expand_dec (target, dec)
1871      rtx target, dec;
1872 {
1873   rtx value = expand_binop (GET_MODE (target), sub_optab,
1874                             target, dec,
1875                             target, 0, OPTAB_LIB_WIDEN);
1876   if (value != target)
1877     emit_move_insn (target, value);
1878 }
1879 \f
1880 /* Output a shift instruction for expression code CODE,
1881    with SHIFTED being the rtx for the value to shift,
1882    and AMOUNT the tree for the amount to shift by.
1883    Store the result in the rtx TARGET, if that is convenient.
1884    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1885    Return the rtx for where the value is.  */
1886
1887 rtx
1888 expand_shift (code, mode, shifted, amount, target, unsignedp)
1889      enum tree_code code;
1890      register enum machine_mode mode;
1891      rtx shifted;
1892      tree amount;
1893      register rtx target;
1894      int unsignedp;
1895 {
1896   register rtx op1, temp = 0;
1897   register int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1898   register int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1899   int try;
1900
1901   /* Previously detected shift-counts computed by NEGATE_EXPR
1902      and shifted in the other direction; but that does not work
1903      on all machines.  */
1904
1905   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1906
1907 #ifdef SHIFT_COUNT_TRUNCATED
1908   if (SHIFT_COUNT_TRUNCATED)
1909     {
1910       if (GET_CODE (op1) == CONST_INT
1911           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
1912               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
1913         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1914                        % GET_MODE_BITSIZE (mode));
1915       else if (GET_CODE (op1) == SUBREG
1916                && SUBREG_WORD (op1) == 0)
1917         op1 = SUBREG_REG (op1);
1918     }
1919 #endif
1920
1921   if (op1 == const0_rtx)
1922     return shifted;
1923
1924   for (try = 0; temp == 0 && try < 3; try++)
1925     {
1926       enum optab_methods methods;
1927
1928       if (try == 0)
1929         methods = OPTAB_DIRECT;
1930       else if (try == 1)
1931         methods = OPTAB_WIDEN;
1932       else
1933         methods = OPTAB_LIB_WIDEN;
1934
1935       if (rotate)
1936         {
1937           /* Widening does not work for rotation.  */
1938           if (methods == OPTAB_WIDEN)
1939             continue;
1940           else if (methods == OPTAB_LIB_WIDEN)
1941             {
1942               /* If we have been unable to open-code this by a rotation,
1943                  do it as the IOR of two shifts.  I.e., to rotate A
1944                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1945                  where C is the bitsize of A.
1946
1947                  It is theoretically possible that the target machine might
1948                  not be able to perform either shift and hence we would
1949                  be making two libcalls rather than just the one for the
1950                  shift (similarly if IOR could not be done).  We will allow
1951                  this extremely unlikely lossage to avoid complicating the
1952                  code below.  */
1953
1954               rtx subtarget = target == shifted ? 0 : target;
1955               rtx temp1;
1956               tree type = TREE_TYPE (amount);
1957               tree new_amount = make_tree (type, op1);
1958               tree other_amount
1959                 = fold (build (MINUS_EXPR, type,
1960                                convert (type,
1961                                         build_int_2 (GET_MODE_BITSIZE (mode),
1962                                                      0)),
1963                                amount));
1964
1965               shifted = force_reg (mode, shifted);
1966
1967               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1968                                    mode, shifted, new_amount, subtarget, 1);
1969               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1970                                     mode, shifted, other_amount, 0, 1);
1971               return expand_binop (mode, ior_optab, temp, temp1, target,
1972                                    unsignedp, methods);
1973             }
1974
1975           temp = expand_binop (mode,
1976                                left ? rotl_optab : rotr_optab,
1977                                shifted, op1, target, unsignedp, methods);
1978
1979           /* If we don't have the rotate, but we are rotating by a constant
1980              that is in range, try a rotate in the opposite direction.  */
1981
1982           if (temp == 0 && GET_CODE (op1) == CONST_INT
1983               && INTVAL (op1) > 0 && INTVAL (op1) < GET_MODE_BITSIZE (mode))
1984             temp = expand_binop (mode,
1985                                  left ? rotr_optab : rotl_optab,
1986                                  shifted,
1987                                  GEN_INT (GET_MODE_BITSIZE (mode)
1988                                           - INTVAL (op1)),
1989                                  target, unsignedp, methods);
1990         }
1991       else if (unsignedp)
1992         temp = expand_binop (mode,
1993                              left ? ashl_optab : lshr_optab,
1994                              shifted, op1, target, unsignedp, methods);
1995
1996       /* Do arithmetic shifts.
1997          Also, if we are going to widen the operand, we can just as well
1998          use an arithmetic right-shift instead of a logical one.  */
1999       if (temp == 0 && ! rotate
2000           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2001         {
2002           enum optab_methods methods1 = methods;
2003
2004           /* If trying to widen a log shift to an arithmetic shift,
2005              don't accept an arithmetic shift of the same size.  */
2006           if (unsignedp)
2007             methods1 = OPTAB_MUST_WIDEN;
2008
2009           /* Arithmetic shift */
2010
2011           temp = expand_binop (mode,
2012                                left ? ashl_optab : ashr_optab,
2013                                shifted, op1, target, unsignedp, methods1);
2014         }
2015
2016       /* We used to try extzv here for logical right shifts, but that was
2017          only useful for one machine, the VAX, and caused poor code
2018          generation there for lshrdi3, so the code was deleted and a
2019          define_expand for lshrsi3 was added to vax.md.  */
2020     }
2021
2022   if (temp == 0)
2023     abort ();
2024   return temp;
2025 }
2026 \f
2027 enum alg_code { alg_zero, alg_m, alg_shift,
2028                   alg_add_t_m2, alg_sub_t_m2,
2029                   alg_add_factor, alg_sub_factor,
2030                   alg_add_t2_m, alg_sub_t2_m,
2031                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2032
2033 /* This structure records a sequence of operations.
2034    `ops' is the number of operations recorded.
2035    `cost' is their total cost.
2036    The operations are stored in `op' and the corresponding
2037    logarithms of the integer coefficients in `log'.
2038
2039    These are the operations:
2040    alg_zero             total := 0;
2041    alg_m                total := multiplicand;
2042    alg_shift            total := total * coeff
2043    alg_add_t_m2         total := total + multiplicand * coeff;
2044    alg_sub_t_m2         total := total - multiplicand * coeff;
2045    alg_add_factor       total := total * coeff + total;
2046    alg_sub_factor       total := total * coeff - total;
2047    alg_add_t2_m         total := total * coeff + multiplicand;
2048    alg_sub_t2_m         total := total * coeff - multiplicand;
2049
2050    The first operand must be either alg_zero or alg_m.  */
2051
2052 struct algorithm
2053 {
2054   short cost;
2055   short ops;
2056   /* The size of the OP and LOG fields are not directly related to the
2057      word size, but the worst-case algorithms will be if we have few
2058      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2059      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2060      in total wordsize operations.  */
2061   enum alg_code op[MAX_BITS_PER_WORD];
2062   char log[MAX_BITS_PER_WORD];
2063 };
2064
2065 static void synth_mult                  PARAMS ((struct algorithm *,
2066                                                  unsigned HOST_WIDE_INT,
2067                                                  int));
2068 static unsigned HOST_WIDE_INT choose_multiplier PARAMS ((unsigned HOST_WIDE_INT,
2069                                                          int, int,
2070                                                          unsigned HOST_WIDE_INT *,
2071                                                          int *, int *));
2072 static unsigned HOST_WIDE_INT invert_mod2n      PARAMS ((unsigned HOST_WIDE_INT,
2073                                                          int));
2074 /* Compute and return the best algorithm for multiplying by T.
2075    The algorithm must cost less than cost_limit
2076    If retval.cost >= COST_LIMIT, no algorithm was found and all
2077    other field of the returned struct are undefined.  */
2078
2079 static void
2080 synth_mult (alg_out, t, cost_limit)
2081      struct algorithm *alg_out;
2082      unsigned HOST_WIDE_INT t;
2083      int cost_limit;
2084 {
2085   int m;
2086   struct algorithm *alg_in, *best_alg;
2087   int cost;
2088   unsigned HOST_WIDE_INT q;
2089
2090   /* Indicate that no algorithm is yet found.  If no algorithm
2091      is found, this value will be returned and indicate failure.  */
2092   alg_out->cost = cost_limit;
2093
2094   if (cost_limit <= 0)
2095     return;
2096
2097   /* t == 1 can be done in zero cost.  */
2098   if (t == 1)
2099     {
2100       alg_out->ops = 1;
2101       alg_out->cost = 0;
2102       alg_out->op[0] = alg_m;
2103       return;
2104     }
2105
2106   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2107      fail now.  */
2108   if (t == 0)
2109     {
2110       if (zero_cost >= cost_limit)
2111         return;
2112       else
2113         {
2114           alg_out->ops = 1;
2115           alg_out->cost = zero_cost;
2116           alg_out->op[0] = alg_zero;
2117           return;
2118         }
2119     }
2120
2121   /* We'll be needing a couple extra algorithm structures now.  */
2122
2123   alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
2124   best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
2125
2126   /* If we have a group of zero bits at the low-order part of T, try
2127      multiplying by the remaining bits and then doing a shift.  */
2128
2129   if ((t & 1) == 0)
2130     {
2131       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2132       q = t >> m;
2133       cost = shift_cost[m];
2134       synth_mult (alg_in, q, cost_limit - cost);
2135
2136       cost += alg_in->cost;
2137       if (cost < cost_limit)
2138         {
2139           struct algorithm *x;
2140           x = alg_in, alg_in = best_alg, best_alg = x;
2141           best_alg->log[best_alg->ops] = m;
2142           best_alg->op[best_alg->ops] = alg_shift;
2143           cost_limit = cost;
2144         }
2145     }
2146
2147   /* If we have an odd number, add or subtract one.  */
2148   if ((t & 1) != 0)
2149     {
2150       unsigned HOST_WIDE_INT w;
2151
2152       for (w = 1; (w & t) != 0; w <<= 1)
2153         ;
2154       /* If T was -1, then W will be zero after the loop.  This is another
2155          case where T ends with ...111.  Handling this with (T + 1) and
2156          subtract 1 produces slightly better code and results in algorithm
2157          selection much faster than treating it like the ...0111 case
2158          below.  */
2159       if (w == 0
2160           || (w > 2
2161               /* Reject the case where t is 3.
2162                  Thus we prefer addition in that case.  */
2163               && t != 3))
2164         {
2165           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2166
2167           cost = add_cost;
2168           synth_mult (alg_in, t + 1, cost_limit - cost);
2169
2170           cost += alg_in->cost;
2171           if (cost < cost_limit)
2172             {
2173               struct algorithm *x;
2174               x = alg_in, alg_in = best_alg, best_alg = x;
2175               best_alg->log[best_alg->ops] = 0;
2176               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2177               cost_limit = cost;
2178             }
2179         }
2180       else
2181         {
2182           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2183
2184           cost = add_cost;
2185           synth_mult (alg_in, t - 1, cost_limit - cost);
2186
2187           cost += alg_in->cost;
2188           if (cost < cost_limit)
2189             {
2190               struct algorithm *x;
2191               x = alg_in, alg_in = best_alg, best_alg = x;
2192               best_alg->log[best_alg->ops] = 0;
2193               best_alg->op[best_alg->ops] = alg_add_t_m2;
2194               cost_limit = cost;
2195             }
2196         }
2197     }
2198
2199   /* Look for factors of t of the form
2200      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2201      If we find such a factor, we can multiply by t using an algorithm that
2202      multiplies by q, shift the result by m and add/subtract it to itself.
2203
2204      We search for large factors first and loop down, even if large factors
2205      are less probable than small; if we find a large factor we will find a
2206      good sequence quickly, and therefore be able to prune (by decreasing
2207      COST_LIMIT) the search.  */
2208
2209   for (m = floor_log2 (t - 1); m >= 2; m--)
2210     {
2211       unsigned HOST_WIDE_INT d;
2212
2213       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2214       if (t % d == 0 && t > d)
2215         {
2216           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2217           synth_mult (alg_in, t / d, cost_limit - cost);
2218
2219           cost += alg_in->cost;
2220           if (cost < cost_limit)
2221             {
2222               struct algorithm *x;
2223               x = alg_in, alg_in = best_alg, best_alg = x;
2224               best_alg->log[best_alg->ops] = m;
2225               best_alg->op[best_alg->ops] = alg_add_factor;
2226               cost_limit = cost;
2227             }
2228           /* Other factors will have been taken care of in the recursion.  */
2229           break;
2230         }
2231
2232       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2233       if (t % d == 0 && t > d)
2234         {
2235           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2236           synth_mult (alg_in, t / d, cost_limit - cost);
2237
2238           cost += alg_in->cost;
2239           if (cost < cost_limit)
2240             {
2241               struct algorithm *x;
2242               x = alg_in, alg_in = best_alg, best_alg = x;
2243               best_alg->log[best_alg->ops] = m;
2244               best_alg->op[best_alg->ops] = alg_sub_factor;
2245               cost_limit = cost;
2246             }
2247           break;
2248         }
2249     }
2250
2251   /* Try shift-and-add (load effective address) instructions,
2252      i.e. do a*3, a*5, a*9.  */
2253   if ((t & 1) != 0)
2254     {
2255       q = t - 1;
2256       q = q & -q;
2257       m = exact_log2 (q);
2258       if (m >= 0)
2259         {
2260           cost = shiftadd_cost[m];
2261           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2262
2263           cost += alg_in->cost;
2264           if (cost < cost_limit)
2265             {
2266               struct algorithm *x;
2267               x = alg_in, alg_in = best_alg, best_alg = x;
2268               best_alg->log[best_alg->ops] = m;
2269               best_alg->op[best_alg->ops] = alg_add_t2_m;
2270               cost_limit = cost;
2271             }
2272         }
2273
2274       q = t + 1;
2275       q = q & -q;
2276       m = exact_log2 (q);
2277       if (m >= 0)
2278         {
2279           cost = shiftsub_cost[m];
2280           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2281
2282           cost += alg_in->cost;
2283           if (cost < cost_limit)
2284             {
2285               struct algorithm *x;
2286               x = alg_in, alg_in = best_alg, best_alg = x;
2287               best_alg->log[best_alg->ops] = m;
2288               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2289               cost_limit = cost;
2290             }
2291         }
2292     }
2293
2294   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2295      we have not found any algorithm.  */
2296   if (cost_limit == alg_out->cost)
2297     return;
2298
2299   /* If we are getting a too long sequence for `struct algorithm'
2300      to record, make this search fail.  */
2301   if (best_alg->ops == MAX_BITS_PER_WORD)
2302     return;
2303
2304   /* Copy the algorithm from temporary space to the space at alg_out.
2305      We avoid using structure assignment because the majority of
2306      best_alg is normally undefined, and this is a critical function.  */
2307   alg_out->ops = best_alg->ops + 1;
2308   alg_out->cost = cost_limit;
2309   bcopy ((char *) best_alg->op, (char *) alg_out->op,
2310          alg_out->ops * sizeof *alg_out->op);
2311   bcopy ((char *) best_alg->log, (char *) alg_out->log,
2312          alg_out->ops * sizeof *alg_out->log);
2313 }
2314 \f
2315 /* Perform a multiplication and return an rtx for the result.
2316    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2317    TARGET is a suggestion for where to store the result (an rtx).
2318
2319    We check specially for a constant integer as OP1.
2320    If you want this check for OP0 as well, then before calling
2321    you should swap the two operands if OP0 would be constant.  */
2322
2323 rtx
2324 expand_mult (mode, op0, op1, target, unsignedp)
2325      enum machine_mode mode;
2326      register rtx op0, op1, target;
2327      int unsignedp;
2328 {
2329   rtx const_op1 = op1;
2330
2331   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2332      less than or equal in size to `unsigned int' this doesn't matter.
2333      If the mode is larger than `unsigned int', then synth_mult works only
2334      if the constant value exactly fits in an `unsigned int' without any
2335      truncation.  This means that multiplying by negative values does
2336      not work; results are off by 2^32 on a 32 bit machine.  */
2337
2338   /* If we are multiplying in DImode, it may still be a win
2339      to try to work with shifts and adds.  */
2340   if (GET_CODE (op1) == CONST_DOUBLE
2341       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2342       && HOST_BITS_PER_INT >= BITS_PER_WORD
2343       && CONST_DOUBLE_HIGH (op1) == 0)
2344     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2345   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2346            && GET_CODE (op1) == CONST_INT
2347            && INTVAL (op1) < 0)
2348     const_op1 = 0;
2349
2350   /* We used to test optimize here, on the grounds that it's better to
2351      produce a smaller program when -O is not used.
2352      But this causes such a terrible slowdown sometimes
2353      that it seems better to use synth_mult always.  */
2354
2355   if (const_op1 && GET_CODE (const_op1) == CONST_INT)
2356     {
2357       struct algorithm alg;
2358       struct algorithm alg2;
2359       HOST_WIDE_INT val = INTVAL (op1);
2360       HOST_WIDE_INT val_so_far;
2361       rtx insn;
2362       int mult_cost;
2363       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2364
2365       /* Try to do the computation three ways: multiply by the negative of OP1
2366          and then negate, do the multiplication directly, or do multiplication
2367          by OP1 - 1.  */
2368
2369       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2370       mult_cost = MIN (12 * add_cost, mult_cost);
2371
2372       synth_mult (&alg, val, mult_cost);
2373
2374       /* This works only if the inverted value actually fits in an
2375          `unsigned int' */
2376       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2377         {
2378           synth_mult (&alg2, - val,
2379                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2380           if (alg2.cost + negate_cost < alg.cost)
2381             alg = alg2, variant = negate_variant;
2382         }
2383
2384       /* This proves very useful for division-by-constant.  */
2385       synth_mult (&alg2, val - 1,
2386                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2387       if (alg2.cost + add_cost < alg.cost)
2388         alg = alg2, variant = add_variant;
2389
2390       if (alg.cost < mult_cost)
2391         {
2392           /* We found something cheaper than a multiply insn.  */
2393           int opno;
2394           rtx accum, tem;
2395
2396           op0 = protect_from_queue (op0, 0);
2397
2398           /* Avoid referencing memory over and over.
2399              For speed, but also for correctness when mem is volatile.  */
2400           if (GET_CODE (op0) == MEM)
2401             op0 = force_reg (mode, op0);
2402
2403           /* ACCUM starts out either as OP0 or as a zero, depending on
2404              the first operation.  */
2405
2406           if (alg.op[0] == alg_zero)
2407             {
2408               accum = copy_to_mode_reg (mode, const0_rtx);
2409               val_so_far = 0;
2410             }
2411           else if (alg.op[0] == alg_m)
2412             {
2413               accum = copy_to_mode_reg (mode, op0);
2414               val_so_far = 1;
2415             }
2416           else
2417             abort ();
2418
2419           for (opno = 1; opno < alg.ops; opno++)
2420             {
2421               int log = alg.log[opno];
2422               int preserve = preserve_subexpressions_p ();
2423               rtx shift_subtarget = preserve ? 0 : accum;
2424               rtx add_target
2425                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2426                    && ! preserve)
2427                   ? target : 0;
2428               rtx accum_target = preserve ? 0 : accum;
2429
2430               switch (alg.op[opno])
2431                 {
2432                 case alg_shift:
2433                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2434                                         build_int_2 (log, 0), NULL_RTX, 0);
2435                   val_so_far <<= log;
2436                   break;
2437
2438                 case alg_add_t_m2:
2439                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2440                                       build_int_2 (log, 0), NULL_RTX, 0);
2441                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2442                                          add_target
2443                                          ? add_target : accum_target);
2444                   val_so_far += (HOST_WIDE_INT) 1 << log;
2445                   break;
2446
2447                 case alg_sub_t_m2:
2448                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2449                                       build_int_2 (log, 0), NULL_RTX, 0);
2450                   accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2451                                          add_target
2452                                          ? add_target : accum_target);
2453                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2454                   break;
2455
2456                 case alg_add_t2_m:
2457                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2458                                         build_int_2 (log, 0), shift_subtarget,
2459                                         0);
2460                   accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2461                                          add_target
2462                                          ? add_target : accum_target);
2463                   val_so_far = (val_so_far << log) + 1;
2464                   break;
2465
2466                 case alg_sub_t2_m:
2467                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2468                                         build_int_2 (log, 0), shift_subtarget,
2469                                         0);
2470                   accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2471                                          add_target
2472                                          ? add_target : accum_target);
2473                   val_so_far = (val_so_far << log) - 1;
2474                   break;
2475
2476                 case alg_add_factor:
2477                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2478                                       build_int_2 (log, 0), NULL_RTX, 0);
2479                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2480                                          add_target
2481                                          ? add_target : accum_target);
2482                   val_so_far += val_so_far << log;
2483                   break;
2484
2485                 case alg_sub_factor:
2486                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2487                                       build_int_2 (log, 0), NULL_RTX, 0);
2488                   accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2489                                          (add_target ? add_target
2490                                           : preserve ? 0 : tem));
2491                   val_so_far = (val_so_far << log) - val_so_far;
2492                   break;
2493
2494                 default:
2495                   abort ();
2496                 }
2497
2498               /* Write a REG_EQUAL note on the last insn so that we can cse
2499                  multiplication sequences.  */
2500
2501               insn = get_last_insn ();
2502               set_unique_reg_note (insn,
2503                                    REG_EQUAL,
2504                                    gen_rtx_MULT (mode, op0,
2505                                                  GEN_INT (val_so_far)));
2506             }
2507
2508           if (variant == negate_variant)
2509             {
2510               val_so_far = - val_so_far;
2511               accum = expand_unop (mode, neg_optab, accum, target, 0);
2512             }
2513           else if (variant == add_variant)
2514             {
2515               val_so_far = val_so_far + 1;
2516               accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2517             }
2518
2519           if (val != val_so_far)
2520             abort ();
2521
2522           return accum;
2523         }
2524     }
2525
2526   /* This used to use umul_optab if unsigned, but for non-widening multiply
2527      there is no difference between signed and unsigned.  */
2528   op0 = expand_binop (mode, smul_optab,
2529                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2530   if (op0 == 0)
2531     abort ();
2532   return op0;
2533 }
2534 \f
2535 /* Return the smallest n such that 2**n >= X.  */
2536
2537 int
2538 ceil_log2 (x)
2539      unsigned HOST_WIDE_INT x;
2540 {
2541   return floor_log2 (x - 1) + 1;
2542 }
2543
2544 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2545    replace division by D, and put the least significant N bits of the result
2546    in *MULTIPLIER_PTR and return the most significant bit.
2547
2548    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2549    needed precision is in PRECISION (should be <= N).
2550
2551    PRECISION should be as small as possible so this function can choose
2552    multiplier more freely.
2553
2554    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2555    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2556
2557    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2558    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2559
2560 static
2561 unsigned HOST_WIDE_INT
2562 choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
2563      unsigned HOST_WIDE_INT d;
2564      int n;
2565      int precision;
2566      unsigned HOST_WIDE_INT *multiplier_ptr;
2567      int *post_shift_ptr;
2568      int *lgup_ptr;
2569 {
2570   unsigned HOST_WIDE_INT mhigh_hi, mhigh_lo;
2571   unsigned HOST_WIDE_INT mlow_hi, mlow_lo;
2572   int lgup, post_shift;
2573   int pow, pow2;
2574   unsigned HOST_WIDE_INT nh, nl, dummy1, dummy2;
2575
2576   /* lgup = ceil(log2(divisor)); */
2577   lgup = ceil_log2 (d);
2578
2579   if (lgup > n)
2580     abort ();
2581
2582   pow = n + lgup;
2583   pow2 = n + lgup - precision;
2584
2585   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2586     {
2587       /* We could handle this with some effort, but this case is much better
2588          handled directly with a scc insn, so rely on caller using that.  */
2589       abort ();
2590     }
2591
2592   /* mlow = 2^(N + lgup)/d */
2593  if (pow >= HOST_BITS_PER_WIDE_INT)
2594     {
2595       nh = (unsigned HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2596       nl = 0;
2597     }
2598   else
2599     {
2600       nh = 0;
2601       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2602     }
2603   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2604                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2605
2606   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2607   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2608     nh |= (unsigned HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2609   else
2610     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2611   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2612                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2613
2614   if (mhigh_hi && nh - d >= d)
2615     abort ();
2616   if (mhigh_hi > 1 || mlow_hi > 1)
2617     abort ();
2618   /* assert that mlow < mhigh.  */
2619   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2620     abort();
2621
2622   /* If precision == N, then mlow, mhigh exceed 2^N
2623      (but they do not exceed 2^(N+1)).  */
2624
2625   /* Reduce to lowest terms */
2626   for (post_shift = lgup; post_shift > 0; post_shift--)
2627     {
2628       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2629       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2630       if (ml_lo >= mh_lo)
2631         break;
2632
2633       mlow_hi = 0;
2634       mlow_lo = ml_lo;
2635       mhigh_hi = 0;
2636       mhigh_lo = mh_lo;
2637     }
2638
2639   *post_shift_ptr = post_shift;
2640   *lgup_ptr = lgup;
2641   if (n < HOST_BITS_PER_WIDE_INT)
2642     {
2643       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2644       *multiplier_ptr = mhigh_lo & mask;
2645       return mhigh_lo >= mask;
2646     }
2647   else
2648     {
2649       *multiplier_ptr = mhigh_lo;
2650       return mhigh_hi;
2651     }
2652 }
2653
2654 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2655    congruent to 1 (mod 2**N).  */
2656
2657 static unsigned HOST_WIDE_INT
2658 invert_mod2n (x, n)
2659      unsigned HOST_WIDE_INT x;
2660      int n;
2661 {
2662   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2663
2664   /* The algorithm notes that the choice y = x satisfies
2665      x*y == 1 mod 2^3, since x is assumed odd.
2666      Each iteration doubles the number of bits of significance in y.  */
2667
2668   unsigned HOST_WIDE_INT mask;
2669   unsigned HOST_WIDE_INT y = x;
2670   int nbit = 3;
2671
2672   mask = (n == HOST_BITS_PER_WIDE_INT
2673           ? ~(unsigned HOST_WIDE_INT) 0
2674           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2675
2676   while (nbit < n)
2677     {
2678       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2679       nbit *= 2;
2680     }
2681   return y;
2682 }
2683
2684 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2685    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2686    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2687    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2688    become signed.
2689
2690    The result is put in TARGET if that is convenient.
2691
2692    MODE is the mode of operation.  */
2693
2694 rtx
2695 expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
2696      enum machine_mode mode;
2697      register rtx adj_operand, op0, op1, target;
2698      int unsignedp;
2699 {
2700   rtx tem;
2701   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2702
2703   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2704                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2705                       NULL_RTX, 0);
2706   tem = expand_and (tem, op1, NULL_RTX);
2707   adj_operand
2708     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2709                      adj_operand);
2710
2711   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2712                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2713                       NULL_RTX, 0);
2714   tem = expand_and (tem, op0, NULL_RTX);
2715   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2716                           target);
2717
2718   return target;
2719 }
2720
2721 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2722    in TARGET if that is convenient, and return where the result is.  If the
2723    operation can not be performed, 0 is returned.
2724
2725    MODE is the mode of operation and result.
2726
2727    UNSIGNEDP nonzero means unsigned multiply.
2728
2729    MAX_COST is the total allowed cost for the expanded RTL.  */
2730
2731 rtx
2732 expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
2733      enum machine_mode mode;
2734      register rtx op0, target;
2735      unsigned HOST_WIDE_INT cnst1;
2736      int unsignedp;
2737      int max_cost;
2738 {
2739   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2740   optab mul_highpart_optab;
2741   optab moptab;
2742   rtx tem;
2743   int size = GET_MODE_BITSIZE (mode);
2744   rtx op1, wide_op1;
2745
2746   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2747   if (size > HOST_BITS_PER_WIDE_INT)
2748     abort ();
2749
2750   op1 = GEN_INT (cnst1);
2751
2752   if (GET_MODE_BITSIZE (wider_mode) <= HOST_BITS_PER_INT)
2753     wide_op1 = op1;
2754   else
2755     wide_op1
2756       = immed_double_const (cnst1,
2757                             (unsignedp
2758                              ? (HOST_WIDE_INT) 0
2759                              : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2760                             wider_mode);
2761
2762   /* expand_mult handles constant multiplication of word_mode
2763      or narrower.  It does a poor job for large modes.  */
2764   if (size < BITS_PER_WORD
2765       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2766     {
2767       /* We have to do this, since expand_binop doesn't do conversion for
2768          multiply.  Maybe change expand_binop to handle widening multiply?  */
2769       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2770
2771       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, unsignedp);
2772       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2773                           build_int_2 (size, 0), NULL_RTX, 1);
2774       return convert_modes (mode, wider_mode, tem, unsignedp);
2775     }
2776
2777   if (target == 0)
2778     target = gen_reg_rtx (mode);
2779
2780   /* Firstly, try using a multiplication insn that only generates the needed
2781      high part of the product, and in the sign flavor of unsignedp.  */
2782   if (mul_highpart_cost[(int) mode] < max_cost)
2783     {
2784       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2785       target = expand_binop (mode, mul_highpart_optab,
2786                              op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2787       if (target)
2788         return target;
2789     }
2790
2791   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2792      Need to adjust the result after the multiplication.  */
2793   if (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost < max_cost)
2794     {
2795       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2796       target = expand_binop (mode, mul_highpart_optab,
2797                              op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2798       if (target)
2799         /* We used the wrong signedness.  Adjust the result.  */
2800         return expand_mult_highpart_adjust (mode, target, op0,
2801                                             op1, target, unsignedp);
2802     }
2803
2804   /* Try widening multiplication.  */
2805   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2806   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2807       && mul_widen_cost[(int) wider_mode] < max_cost)
2808     {
2809       op1 = force_reg (mode, op1);
2810       goto try;
2811     }
2812
2813   /* Try widening the mode and perform a non-widening multiplication.  */
2814   moptab = smul_optab;
2815   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2816       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2817     {
2818       op1 = wide_op1;
2819       goto try;
2820     }
2821
2822   /* Try widening multiplication of opposite signedness, and adjust.  */
2823   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2824   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2825       && (mul_widen_cost[(int) wider_mode]
2826           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2827     {
2828       rtx regop1 = force_reg (mode, op1);
2829       tem = expand_binop (wider_mode, moptab, op0, regop1,
2830                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2831       if (tem != 0)
2832         {
2833           /* Extract the high half of the just generated product.  */
2834           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2835                               build_int_2 (size, 0), NULL_RTX, 1);
2836           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2837           /* We used the wrong signedness.  Adjust the result.  */
2838           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2839                                               target, unsignedp);
2840         }
2841     }
2842
2843   return 0;
2844
2845  try:
2846   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2847   tem = expand_binop (wider_mode, moptab, op0, op1,
2848                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2849   if (tem == 0)
2850     return 0;
2851
2852   /* Extract the high half of the just generated product.  */
2853   if (mode == word_mode)
2854     {
2855       return gen_highpart (mode, tem);
2856     }
2857   else
2858     {
2859       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2860                           build_int_2 (size, 0), NULL_RTX, 1);
2861       return convert_modes (mode, wider_mode, tem, unsignedp);
2862     }
2863 }
2864 \f
2865 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2866    if that is convenient, and returning where the result is.
2867    You may request either the quotient or the remainder as the result;
2868    specify REM_FLAG nonzero to get the remainder.
2869
2870    CODE is the expression code for which kind of division this is;
2871    it controls how rounding is done.  MODE is the machine mode to use.
2872    UNSIGNEDP nonzero means do unsigned division.  */
2873
2874 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2875    and then correct it by or'ing in missing high bits
2876    if result of ANDI is nonzero.
2877    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2878    This could optimize to a bfexts instruction.
2879    But C doesn't use these operations, so their optimizations are
2880    left for later.  */
2881 /* ??? For modulo, we don't actually need the highpart of the first product,
2882    the low part will do nicely.  And for small divisors, the second multiply
2883    can also be a low-part only multiply or even be completely left out.
2884    E.g. to calculate the remainder of a division by 3 with a 32 bit
2885    multiply, multiply with 0x55555556 and extract the upper two bits;
2886    the result is exact for inputs up to 0x1fffffff.
2887    The input range can be reduced by using cross-sum rules.
2888    For odd divisors >= 3, the following table gives right shift counts
2889    so that if an number is shifted by an integer multiple of the given
2890    amount, the remainder stays the same:
2891    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
2892    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
2893    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
2894    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
2895    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
2896
2897    Cross-sum rules for even numbers can be derived by leaving as many bits
2898    to the right alone as the divisor has zeros to the right.
2899    E.g. if x is an unsigned 32 bit number:
2900    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
2901    */
2902
2903 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2904
2905 rtx
2906 expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
2907      int rem_flag;
2908      enum tree_code code;
2909      enum machine_mode mode;
2910      register rtx op0, op1, target;
2911      int unsignedp;
2912 {
2913   enum machine_mode compute_mode;
2914   register rtx tquotient;
2915   rtx quotient = 0, remainder = 0;
2916   rtx last;
2917   int size;
2918   rtx insn, set;
2919   optab optab1, optab2;
2920   int op1_is_constant, op1_is_pow2;
2921   int max_cost, extra_cost;
2922   static HOST_WIDE_INT last_div_const = 0;
2923
2924   op1_is_constant = GET_CODE (op1) == CONST_INT;
2925   op1_is_pow2 = (op1_is_constant
2926                  && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2927                       || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1))))));
2928
2929   /*
2930      This is the structure of expand_divmod:
2931
2932      First comes code to fix up the operands so we can perform the operations
2933      correctly and efficiently.
2934
2935      Second comes a switch statement with code specific for each rounding mode.
2936      For some special operands this code emits all RTL for the desired
2937      operation, for other cases, it generates only a quotient and stores it in
2938      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2939      to indicate that it has not done anything.
2940
2941      Last comes code that finishes the operation.  If QUOTIENT is set and
2942      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2943      QUOTIENT is not set, it is computed using trunc rounding.
2944
2945      We try to generate special code for division and remainder when OP1 is a
2946      constant.  If |OP1| = 2**n we can use shifts and some other fast
2947      operations.  For other values of OP1, we compute a carefully selected
2948      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2949      by m.
2950
2951      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2952      half of the product.  Different strategies for generating the product are
2953      implemented in expand_mult_highpart.
2954
2955      If what we actually want is the remainder, we generate that by another
2956      by-constant multiplication and a subtraction.  */
2957
2958   /* We shouldn't be called with OP1 == const1_rtx, but some of the
2959      code below will malfunction if we are, so check here and handle
2960      the special case if so.  */
2961   if (op1 == const1_rtx)
2962     return rem_flag ? const0_rtx : op0;
2963
2964   if (target
2965       /* Don't use the function value register as a target
2966          since we have to read it as well as write it,
2967          and function-inlining gets confused by this.  */
2968       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
2969           /* Don't clobber an operand while doing a multi-step calculation.  */
2970           || ((rem_flag || op1_is_constant)
2971               && (reg_mentioned_p (target, op0)
2972                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
2973           || reg_mentioned_p (target, op1)
2974           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
2975     target = 0;
2976
2977   /* Get the mode in which to perform this computation.  Normally it will
2978      be MODE, but sometimes we can't do the desired operation in MODE.
2979      If so, pick a wider mode in which we can do the operation.  Convert
2980      to that mode at the start to avoid repeated conversions.
2981
2982      First see what operations we need.  These depend on the expression
2983      we are evaluating.  (We assume that divxx3 insns exist under the
2984      same conditions that modxx3 insns and that these insns don't normally
2985      fail.  If these assumptions are not correct, we may generate less
2986      efficient code in some cases.)
2987
2988      Then see if we find a mode in which we can open-code that operation
2989      (either a division, modulus, or shift).  Finally, check for the smallest
2990      mode for which we can do the operation with a library call.  */
2991
2992   /* We might want to refine this now that we have division-by-constant
2993      optimization.  Since expand_mult_highpart tries so many variants, it is
2994      not straightforward to generalize this.  Maybe we should make an array
2995      of possible modes in init_expmed?  Save this for GCC 2.7.  */
2996
2997   optab1 = (op1_is_pow2 ? (unsignedp ? lshr_optab : ashr_optab)
2998             : (unsignedp ? udiv_optab : sdiv_optab));
2999   optab2 = (op1_is_pow2 ? optab1 : (unsignedp ? udivmod_optab : sdivmod_optab));
3000
3001   for (compute_mode = mode; compute_mode != VOIDmode;
3002        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3003     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
3004         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
3005       break;
3006
3007   if (compute_mode == VOIDmode)
3008     for (compute_mode = mode; compute_mode != VOIDmode;
3009          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3010       if (optab1->handlers[(int) compute_mode].libfunc
3011           || optab2->handlers[(int) compute_mode].libfunc)
3012         break;
3013
3014   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3015      in expand_binop.  */
3016   if (compute_mode == VOIDmode)
3017     compute_mode = mode;
3018
3019   if (target && GET_MODE (target) == compute_mode)
3020     tquotient = target;
3021   else
3022     tquotient = gen_reg_rtx (compute_mode);
3023
3024   size = GET_MODE_BITSIZE (compute_mode);
3025 #if 0
3026   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3027      (mode), and thereby get better code when OP1 is a constant.  Do that
3028      later.  It will require going over all usages of SIZE below.  */
3029   size = GET_MODE_BITSIZE (mode);
3030 #endif
3031
3032   /* Only deduct something for a REM if the last divide done was
3033      for a different constant.   Then set the constant of the last
3034      divide.  */
3035   max_cost = div_cost[(int) compute_mode]
3036     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3037                       && INTVAL (op1) == last_div_const)
3038        ? mul_cost[(int) compute_mode] + add_cost : 0);
3039
3040   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3041
3042   /* Now convert to the best mode to use.  */
3043   if (compute_mode != mode)
3044     {
3045       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3046       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3047
3048       /* convert_modes may have placed op1 into a register, so we
3049          must recompute the following.  */
3050       op1_is_constant = GET_CODE (op1) == CONST_INT;
3051       op1_is_pow2 = (op1_is_constant
3052                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3053                           || (! unsignedp
3054                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3055     }
3056
3057   /* If one of the operands is a volatile MEM, copy it into a register.  */
3058
3059   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3060     op0 = force_reg (compute_mode, op0);
3061   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3062     op1 = force_reg (compute_mode, op1);
3063
3064   /* If we need the remainder or if OP1 is constant, we need to
3065      put OP0 in a register in case it has any queued subexpressions.  */
3066   if (rem_flag || op1_is_constant)
3067     op0 = force_reg (compute_mode, op0);
3068
3069   last = get_last_insn ();
3070
3071   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3072   if (unsignedp)
3073     {
3074       if (code == FLOOR_DIV_EXPR)
3075         code = TRUNC_DIV_EXPR;
3076       if (code == FLOOR_MOD_EXPR)
3077         code = TRUNC_MOD_EXPR;
3078       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3079         code = TRUNC_DIV_EXPR;
3080     }
3081
3082   if (op1 != const0_rtx)
3083     switch (code)
3084       {
3085       case TRUNC_MOD_EXPR:
3086       case TRUNC_DIV_EXPR:
3087         if (op1_is_constant)
3088           {
3089             if (unsignedp)
3090               {
3091                 unsigned HOST_WIDE_INT mh, ml;
3092                 int pre_shift, post_shift;
3093                 int dummy;
3094                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3095
3096                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3097                   {
3098                     pre_shift = floor_log2 (d);
3099                     if (rem_flag)
3100                       {
3101                         remainder
3102                           = expand_binop (compute_mode, and_optab, op0,
3103                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3104                                           remainder, 1,
3105                                           OPTAB_LIB_WIDEN);
3106                         if (remainder)
3107                           return gen_lowpart (mode, remainder);
3108                       }
3109                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3110                                              build_int_2 (pre_shift, 0),
3111                                              tquotient, 1);
3112                   }
3113                 else if (size <= HOST_BITS_PER_WIDE_INT)
3114                   {
3115                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3116                       {
3117                         /* Most significant bit of divisor is set; emit an scc
3118                            insn.  */
3119                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3120                                                     compute_mode, 1, 1);
3121                         if (quotient == 0)
3122                           goto fail1;
3123                       }
3124                     else
3125                       {
3126                         /* Find a suitable multiplier and right shift count
3127                            instead of multiplying with D.  */
3128
3129                         mh = choose_multiplier (d, size, size,
3130                                                 &ml, &post_shift, &dummy);
3131
3132                         /* If the suggested multiplier is more than SIZE bits,
3133                            we can do better for even divisors, using an
3134                            initial right shift.  */
3135                         if (mh != 0 && (d & 1) == 0)
3136                           {
3137                             pre_shift = floor_log2 (d & -d);
3138                             mh = choose_multiplier (d >> pre_shift, size,
3139                                                     size - pre_shift,
3140                                                     &ml, &post_shift, &dummy);
3141                             if (mh)
3142                               abort ();
3143                           }
3144                         else
3145                           pre_shift = 0;
3146
3147                         if (mh != 0)
3148                           {
3149                             rtx t1, t2, t3, t4;
3150
3151                             extra_cost = (shift_cost[post_shift - 1]
3152                                           + shift_cost[1] + 2 * add_cost);
3153                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3154                                                        NULL_RTX, 1,
3155                                                        max_cost - extra_cost);
3156                             if (t1 == 0)
3157                               goto fail1;
3158                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3159                                                                op0, t1),
3160                                                 NULL_RTX);
3161                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3162                                                build_int_2 (1, 0), NULL_RTX,1);
3163                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3164                                                               t1, t3),
3165                                                 NULL_RTX);
3166                             quotient
3167                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3168                                               build_int_2 (post_shift - 1, 0),
3169                                               tquotient, 1);
3170                           }
3171                         else
3172                           {
3173                             rtx t1, t2;
3174
3175                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3176                                                build_int_2 (pre_shift, 0),
3177                                                NULL_RTX, 1);
3178                             extra_cost = (shift_cost[pre_shift]
3179                                           + shift_cost[post_shift]);
3180                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3181                                                        NULL_RTX, 1,
3182                                                        max_cost - extra_cost);
3183                             if (t2 == 0)
3184                               goto fail1;
3185                             quotient
3186                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3187                                               build_int_2 (post_shift, 0),
3188                                               tquotient, 1);
3189                           }
3190                       }
3191                   }
3192                 else            /* Too wide mode to use tricky code */
3193                   break;
3194
3195                 insn = get_last_insn ();
3196                 if (insn != last
3197                     && (set = single_set (insn)) != 0
3198                     && SET_DEST (set) == quotient)
3199                   set_unique_reg_note (insn,
3200                                        REG_EQUAL,
3201                                        gen_rtx_UDIV (compute_mode, op0, op1));
3202               }
3203             else                /* TRUNC_DIV, signed */
3204               {
3205                 unsigned HOST_WIDE_INT ml;
3206                 int lgup, post_shift;
3207                 HOST_WIDE_INT d = INTVAL (op1);
3208                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3209
3210                 /* n rem d = n rem -d */
3211                 if (rem_flag && d < 0)
3212                   {
3213                     d = abs_d;
3214                     op1 = GEN_INT (abs_d);
3215                   }
3216
3217                 if (d == 1)
3218                   quotient = op0;
3219                 else if (d == -1)
3220                   quotient = expand_unop (compute_mode, neg_optab, op0,
3221                                           tquotient, 0);
3222                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3223                   {
3224                     /* This case is not handled correctly below.  */
3225                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3226                                                 compute_mode, 1, 1);
3227                     if (quotient == 0)
3228                       goto fail1;
3229                   }
3230                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3231                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap))
3232                   ;
3233                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3234                   {
3235                     lgup = floor_log2 (abs_d);
3236                     if (abs_d != 2 && BRANCH_COST < 3)
3237                       {
3238                         rtx label = gen_label_rtx ();
3239                         rtx t1;
3240
3241                         t1 = copy_to_mode_reg (compute_mode, op0);
3242                         do_cmp_and_jump (t1, const0_rtx, GE,
3243                                          compute_mode, label);
3244                         expand_inc (t1, GEN_INT (abs_d - 1));
3245                         emit_label (label);
3246                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3247                                                  build_int_2 (lgup, 0),
3248                                                  tquotient, 0);
3249                       }
3250                     else
3251                       {
3252                         rtx t1, t2, t3;
3253                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3254                                            build_int_2 (size - 1, 0),
3255                                            NULL_RTX, 0);
3256                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3257                                            build_int_2 (size - lgup, 0),
3258                                            NULL_RTX, 1);
3259                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3260                                                           op0, t2),
3261                                             NULL_RTX);
3262                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3263                                                  build_int_2 (lgup, 0),
3264                                                  tquotient, 0);
3265                       }
3266
3267                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3268                        the quotient.  */
3269                     if (d < 0)
3270                       {
3271                         insn = get_last_insn ();
3272                         if (insn != last
3273                             && (set = single_set (insn)) != 0
3274                             && SET_DEST (set) == quotient
3275                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3276                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3277                           set_unique_reg_note (insn,
3278                                                REG_EQUAL,
3279                                                gen_rtx_DIV (compute_mode,
3280                                                             op0,
3281                                                             GEN_INT (abs_d)));
3282
3283                         quotient = expand_unop (compute_mode, neg_optab,
3284                                                 quotient, quotient, 0);
3285                       }
3286                   }
3287                 else if (size <= HOST_BITS_PER_WIDE_INT)
3288                   {
3289                     choose_multiplier (abs_d, size, size - 1,
3290                                        &ml, &post_shift, &lgup);
3291                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3292                       {
3293                         rtx t1, t2, t3;
3294
3295                         extra_cost = (shift_cost[post_shift]
3296                                       + shift_cost[size - 1] + add_cost);
3297                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3298                                                    NULL_RTX, 0,
3299                                                    max_cost - extra_cost);
3300                         if (t1 == 0)
3301                           goto fail1;
3302                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3303                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3304                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3305                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3306                         if (d < 0)
3307                           quotient
3308                             = force_operand (gen_rtx_MINUS (compute_mode,
3309                                                             t3, t2),
3310                                              tquotient);
3311                         else
3312                           quotient
3313                             = force_operand (gen_rtx_MINUS (compute_mode,
3314                                                             t2, t3),
3315                                              tquotient);
3316                       }
3317                     else
3318                       {
3319                         rtx t1, t2, t3, t4;
3320
3321                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3322                         extra_cost = (shift_cost[post_shift]
3323                                       + shift_cost[size - 1] + 2 * add_cost);
3324                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3325                                                    NULL_RTX, 0,
3326                                                    max_cost - extra_cost);
3327                         if (t1 == 0)
3328                           goto fail1;
3329                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3330                                                           t1, op0),
3331                                             NULL_RTX);
3332                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3333                                            build_int_2 (post_shift, 0),
3334                                            NULL_RTX, 0);
3335                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3336                                            build_int_2 (size - 1, 0),
3337                                            NULL_RTX, 0);
3338                         if (d < 0)
3339                           quotient
3340                             = force_operand (gen_rtx_MINUS (compute_mode,
3341                                                             t4, t3),
3342                                              tquotient);
3343                         else
3344                           quotient
3345                             = force_operand (gen_rtx_MINUS (compute_mode,
3346                                                             t3, t4),
3347                                              tquotient);
3348                       }
3349                   }
3350                 else            /* Too wide mode to use tricky code */
3351                   break;
3352
3353                 insn = get_last_insn ();
3354                 if (insn != last
3355                     && (set = single_set (insn)) != 0
3356                     && SET_DEST (set) == quotient)
3357                   set_unique_reg_note (insn,
3358                                        REG_EQUAL,
3359                                        gen_rtx_DIV (compute_mode, op0, op1));
3360               }
3361             break;
3362           }
3363       fail1:
3364         delete_insns_since (last);
3365         break;
3366
3367       case FLOOR_DIV_EXPR:
3368       case FLOOR_MOD_EXPR:
3369       /* We will come here only for signed operations.  */
3370         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3371           {
3372             unsigned HOST_WIDE_INT mh, ml;
3373             int pre_shift, lgup, post_shift;
3374             HOST_WIDE_INT d = INTVAL (op1);
3375
3376             if (d > 0)
3377               {
3378                 /* We could just as easily deal with negative constants here,
3379                    but it does not seem worth the trouble for GCC 2.6.  */
3380                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3381                   {
3382                     pre_shift = floor_log2 (d);
3383                     if (rem_flag)
3384                       {
3385                         remainder = expand_binop (compute_mode, and_optab, op0,
3386                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3387                                                   remainder, 0, OPTAB_LIB_WIDEN);
3388                         if (remainder)
3389                           return gen_lowpart (mode, remainder);
3390                       }
3391                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3392                                              build_int_2 (pre_shift, 0),
3393                                              tquotient, 0);
3394                   }
3395                 else
3396                   {
3397                     rtx t1, t2, t3, t4;
3398
3399                     mh = choose_multiplier (d, size, size - 1,
3400                                             &ml, &post_shift, &lgup);
3401                     if (mh)
3402                       abort ();
3403
3404                     t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3405                                        build_int_2 (size - 1, 0), NULL_RTX, 0);
3406                     t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3407                                        NULL_RTX, 0, OPTAB_WIDEN);
3408                     extra_cost = (shift_cost[post_shift]
3409                                   + shift_cost[size - 1] + 2 * add_cost);
3410                     t3 = expand_mult_highpart (compute_mode, t2, ml,
3411                                                NULL_RTX, 1,
3412                                                max_cost - extra_cost);
3413                     if (t3 != 0)
3414                       {
3415                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3416                                            build_int_2 (post_shift, 0),
3417                                            NULL_RTX, 1);
3418                         quotient = expand_binop (compute_mode, xor_optab,
3419                                                  t4, t1, tquotient, 0,
3420                                                  OPTAB_WIDEN);
3421                       }
3422                   }
3423               }
3424             else
3425               {
3426                 rtx nsign, t1, t2, t3, t4;
3427                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3428                                                   op0, constm1_rtx), NULL_RTX);
3429                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3430                                    0, OPTAB_WIDEN);
3431                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3432                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3433                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3434                                     NULL_RTX);
3435                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3436                                     NULL_RTX, 0);
3437                 if (t4)
3438                   {
3439                     rtx t5;
3440                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3441                                       NULL_RTX, 0);
3442                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3443                                                             t4, t5),
3444                                               tquotient);
3445                   }
3446               }
3447           }
3448
3449         if (quotient != 0)
3450           break;
3451         delete_insns_since (last);
3452
3453         /* Try using an instruction that produces both the quotient and
3454            remainder, using truncation.  We can easily compensate the quotient
3455            or remainder to get floor rounding, once we have the remainder.
3456            Notice that we compute also the final remainder value here,
3457            and return the result right away.  */
3458         if (target == 0 || GET_MODE (target) != compute_mode)
3459           target = gen_reg_rtx (compute_mode);
3460
3461         if (rem_flag)
3462           {
3463             remainder
3464               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3465             quotient = gen_reg_rtx (compute_mode);
3466           }
3467         else
3468           {
3469             quotient
3470               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3471             remainder = gen_reg_rtx (compute_mode);
3472           }
3473
3474         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3475                                  quotient, remainder, 0))
3476           {
3477             /* This could be computed with a branch-less sequence.
3478                Save that for later.  */
3479             rtx tem;
3480             rtx label = gen_label_rtx ();
3481             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3482             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3483                                 NULL_RTX, 0, OPTAB_WIDEN);
3484             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3485             expand_dec (quotient, const1_rtx);
3486             expand_inc (remainder, op1);
3487             emit_label (label);
3488             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3489           }
3490
3491         /* No luck with division elimination or divmod.  Have to do it
3492            by conditionally adjusting op0 *and* the result.  */
3493         {
3494           rtx label1, label2, label3, label4, label5;
3495           rtx adjusted_op0;
3496           rtx tem;
3497
3498           quotient = gen_reg_rtx (compute_mode);
3499           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3500           label1 = gen_label_rtx ();
3501           label2 = gen_label_rtx ();
3502           label3 = gen_label_rtx ();
3503           label4 = gen_label_rtx ();
3504           label5 = gen_label_rtx ();
3505           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3506           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3507           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3508                               quotient, 0, OPTAB_LIB_WIDEN);
3509           if (tem != quotient)
3510             emit_move_insn (quotient, tem);
3511           emit_jump_insn (gen_jump (label5));
3512           emit_barrier ();
3513           emit_label (label1);
3514           expand_inc (adjusted_op0, const1_rtx);
3515           emit_jump_insn (gen_jump (label4));
3516           emit_barrier ();
3517           emit_label (label2);
3518           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3519           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3520                               quotient, 0, OPTAB_LIB_WIDEN);
3521           if (tem != quotient)
3522             emit_move_insn (quotient, tem);
3523           emit_jump_insn (gen_jump (label5));
3524           emit_barrier ();
3525           emit_label (label3);
3526           expand_dec (adjusted_op0, const1_rtx);
3527           emit_label (label4);
3528           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3529                               quotient, 0, OPTAB_LIB_WIDEN);
3530           if (tem != quotient)
3531             emit_move_insn (quotient, tem);
3532           expand_dec (quotient, const1_rtx);
3533           emit_label (label5);
3534         }
3535         break;
3536
3537       case CEIL_DIV_EXPR:
3538       case CEIL_MOD_EXPR:
3539         if (unsignedp)
3540           {
3541             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3542               {
3543                 rtx t1, t2, t3;
3544                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3545                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3546                                    build_int_2 (floor_log2 (d), 0),
3547                                    tquotient, 1);
3548                 t2 = expand_binop (compute_mode, and_optab, op0,
3549                                    GEN_INT (d - 1),
3550                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3551                 t3 = gen_reg_rtx (compute_mode);
3552                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3553                                       compute_mode, 1, 1);
3554                 if (t3 == 0)
3555                   {
3556                     rtx lab;
3557                     lab = gen_label_rtx ();
3558                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3559                     expand_inc (t1, const1_rtx);
3560                     emit_label (lab);
3561                     quotient = t1;
3562                   }
3563                 else
3564                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3565                                                           t1, t3),
3566                                             tquotient);
3567                 break;
3568               }
3569
3570             /* Try using an instruction that produces both the quotient and
3571                remainder, using truncation.  We can easily compensate the
3572                quotient or remainder to get ceiling rounding, once we have the
3573                remainder.  Notice that we compute also the final remainder
3574                value here, and return the result right away.  */
3575             if (target == 0 || GET_MODE (target) != compute_mode)
3576               target = gen_reg_rtx (compute_mode);
3577
3578             if (rem_flag)
3579               {
3580                 remainder = (GET_CODE (target) == REG
3581                              ? target : gen_reg_rtx (compute_mode));
3582                 quotient = gen_reg_rtx (compute_mode);
3583               }
3584             else
3585               {
3586                 quotient = (GET_CODE (target) == REG
3587                             ? target : gen_reg_rtx (compute_mode));
3588                 remainder = gen_reg_rtx (compute_mode);
3589               }
3590
3591             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3592                                      remainder, 1))
3593               {
3594                 /* This could be computed with a branch-less sequence.
3595                    Save that for later.  */
3596                 rtx label = gen_label_rtx ();
3597                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3598                                  compute_mode, label);
3599                 expand_inc (quotient, const1_rtx);
3600                 expand_dec (remainder, op1);
3601                 emit_label (label);
3602                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3603               }
3604
3605             /* No luck with division elimination or divmod.  Have to do it
3606                by conditionally adjusting op0 *and* the result.  */
3607             {
3608               rtx label1, label2;
3609               rtx adjusted_op0, tem;
3610
3611               quotient = gen_reg_rtx (compute_mode);
3612               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3613               label1 = gen_label_rtx ();
3614               label2 = gen_label_rtx ();
3615               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3616                                compute_mode, label1);
3617               emit_move_insn  (quotient, const0_rtx);
3618               emit_jump_insn (gen_jump (label2));
3619               emit_barrier ();
3620               emit_label (label1);
3621               expand_dec (adjusted_op0, const1_rtx);
3622               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3623                                   quotient, 1, OPTAB_LIB_WIDEN);
3624               if (tem != quotient)
3625                 emit_move_insn (quotient, tem);
3626               expand_inc (quotient, const1_rtx);
3627               emit_label (label2);
3628             }
3629           }
3630         else /* signed */
3631           {
3632             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3633                 && INTVAL (op1) >= 0)
3634               {
3635                 /* This is extremely similar to the code for the unsigned case
3636                    above.  For 2.7 we should merge these variants, but for
3637                    2.6.1 I don't want to touch the code for unsigned since that
3638                    get used in C.  The signed case will only be used by other
3639                    languages (Ada).  */
3640
3641                 rtx t1, t2, t3;
3642                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3643                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3644                                    build_int_2 (floor_log2 (d), 0),
3645                                    tquotient, 0);
3646                 t2 = expand_binop (compute_mode, and_optab, op0,
3647                                    GEN_INT (d - 1),
3648                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3649                 t3 = gen_reg_rtx (compute_mode);
3650                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3651                                       compute_mode, 1, 1);
3652                 if (t3 == 0)
3653                   {
3654                     rtx lab;
3655                     lab = gen_label_rtx ();
3656                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3657                     expand_inc (t1, const1_rtx);
3658                     emit_label (lab);
3659                     quotient = t1;
3660                   }
3661                 else
3662                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3663                                                           t1, t3),
3664                                             tquotient);
3665                 break;
3666               }
3667
3668             /* Try using an instruction that produces both the quotient and
3669                remainder, using truncation.  We can easily compensate the
3670                quotient or remainder to get ceiling rounding, once we have the
3671                remainder.  Notice that we compute also the final remainder
3672                value here, and return the result right away.  */
3673             if (target == 0 || GET_MODE (target) != compute_mode)
3674               target = gen_reg_rtx (compute_mode);
3675             if (rem_flag)
3676               {
3677                 remainder= (GET_CODE (target) == REG
3678                             ? target : gen_reg_rtx (compute_mode));
3679                 quotient = gen_reg_rtx (compute_mode);
3680               }
3681             else
3682               {
3683                 quotient = (GET_CODE (target) == REG
3684                             ? target : gen_reg_rtx (compute_mode));
3685                 remainder = gen_reg_rtx (compute_mode);
3686               }
3687
3688             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3689                                      remainder, 0))
3690               {
3691                 /* This could be computed with a branch-less sequence.
3692                    Save that for later.  */
3693                 rtx tem;
3694                 rtx label = gen_label_rtx ();
3695                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3696                                  compute_mode, label);
3697                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3698                                     NULL_RTX, 0, OPTAB_WIDEN);
3699                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3700                 expand_inc (quotient, const1_rtx);
3701                 expand_dec (remainder, op1);
3702                 emit_label (label);
3703                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3704               }
3705
3706             /* No luck with division elimination or divmod.  Have to do it
3707                by conditionally adjusting op0 *and* the result.  */
3708             {
3709               rtx label1, label2, label3, label4, label5;
3710               rtx adjusted_op0;
3711               rtx tem;
3712
3713               quotient = gen_reg_rtx (compute_mode);
3714               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3715               label1 = gen_label_rtx ();
3716               label2 = gen_label_rtx ();
3717               label3 = gen_label_rtx ();
3718               label4 = gen_label_rtx ();
3719               label5 = gen_label_rtx ();
3720               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3721               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3722                                compute_mode, label1);
3723               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3724                                   quotient, 0, OPTAB_LIB_WIDEN);
3725               if (tem != quotient)
3726                 emit_move_insn (quotient, tem);
3727               emit_jump_insn (gen_jump (label5));
3728               emit_barrier ();
3729               emit_label (label1);
3730               expand_dec (adjusted_op0, const1_rtx);
3731               emit_jump_insn (gen_jump (label4));
3732               emit_barrier ();
3733               emit_label (label2);
3734               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3735                                compute_mode, label3);
3736               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3737                                   quotient, 0, OPTAB_LIB_WIDEN);
3738               if (tem != quotient)
3739                 emit_move_insn (quotient, tem);
3740               emit_jump_insn (gen_jump (label5));
3741               emit_barrier ();
3742               emit_label (label3);
3743               expand_inc (adjusted_op0, const1_rtx);
3744               emit_label (label4);
3745               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3746                                   quotient, 0, OPTAB_LIB_WIDEN);
3747               if (tem != quotient)
3748                 emit_move_insn (quotient, tem);
3749               expand_inc (quotient, const1_rtx);
3750               emit_label (label5);
3751             }
3752           }
3753         break;
3754
3755       case EXACT_DIV_EXPR:
3756         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3757           {
3758             HOST_WIDE_INT d = INTVAL (op1);
3759             unsigned HOST_WIDE_INT ml;
3760             int post_shift;
3761             rtx t1;
3762
3763             post_shift = floor_log2 (d & -d);
3764             ml = invert_mod2n (d >> post_shift, size);
3765             t1 = expand_mult (compute_mode, op0, GEN_INT (ml), NULL_RTX,
3766                               unsignedp);
3767             quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3768                                      build_int_2 (post_shift, 0),
3769                                      NULL_RTX, unsignedp);
3770
3771             insn = get_last_insn ();
3772             set_unique_reg_note (insn,
3773                                  REG_EQUAL,
3774                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3775                                                  compute_mode,
3776                                                  op0, op1));
3777           }
3778         break;
3779
3780       case ROUND_DIV_EXPR:
3781       case ROUND_MOD_EXPR:
3782         if (unsignedp)
3783           {
3784             rtx tem;
3785             rtx label;
3786             label = gen_label_rtx ();
3787             quotient = gen_reg_rtx (compute_mode);
3788             remainder = gen_reg_rtx (compute_mode);
3789             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3790               {
3791                 rtx tem;
3792                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3793                                          quotient, 1, OPTAB_LIB_WIDEN);
3794                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3795                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3796                                           remainder, 1, OPTAB_LIB_WIDEN);
3797               }
3798             tem = plus_constant (op1, -1);
3799             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3800                                 build_int_2 (1, 0), NULL_RTX, 1);
3801             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3802             expand_inc (quotient, const1_rtx);
3803             expand_dec (remainder, op1);
3804             emit_label (label);
3805           }
3806         else
3807           {
3808             rtx abs_rem, abs_op1, tem, mask;
3809             rtx label;
3810             label = gen_label_rtx ();
3811             quotient = gen_reg_rtx (compute_mode);
3812             remainder = gen_reg_rtx (compute_mode);
3813             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3814               {
3815                 rtx tem;
3816                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3817                                          quotient, 0, OPTAB_LIB_WIDEN);
3818                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3819                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3820                                           remainder, 0, OPTAB_LIB_WIDEN);
3821               }
3822             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 0);
3823             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 0);
3824             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3825                                 build_int_2 (1, 0), NULL_RTX, 1);
3826             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3827             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3828                                 NULL_RTX, 0, OPTAB_WIDEN);
3829             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3830                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3831             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3832                                 NULL_RTX, 0, OPTAB_WIDEN);
3833             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3834                                 NULL_RTX, 0, OPTAB_WIDEN);
3835             expand_inc (quotient, tem);
3836             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3837                                 NULL_RTX, 0, OPTAB_WIDEN);
3838             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3839                                 NULL_RTX, 0, OPTAB_WIDEN);
3840             expand_dec (remainder, tem);
3841             emit_label (label);
3842           }
3843         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3844
3845       default:
3846         abort ();
3847       }
3848
3849   if (quotient == 0)
3850     {
3851       if (target && GET_MODE (target) != compute_mode)
3852         target = 0;
3853
3854       if (rem_flag)
3855         {
3856           /* Try to produce the remainder without producing the quotient.
3857              If we seem to have a divmod patten that does not require widening,
3858              don't try windening here.  We should really have an WIDEN argument
3859              to expand_twoval_binop, since what we'd really like to do here is
3860              1) try a mod insn in compute_mode
3861              2) try a divmod insn in compute_mode
3862              3) try a div insn in compute_mode and multiply-subtract to get
3863                 remainder
3864              4) try the same things with widening allowed.  */
3865           remainder
3866             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3867                                  op0, op1, target,
3868                                  unsignedp,
3869                                  ((optab2->handlers[(int) compute_mode].insn_code
3870                                    != CODE_FOR_nothing)
3871                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
3872           if (remainder == 0)
3873             {
3874               /* No luck there.  Can we do remainder and divide at once
3875                  without a library call?  */
3876               remainder = gen_reg_rtx (compute_mode);
3877               if (! expand_twoval_binop ((unsignedp
3878                                           ? udivmod_optab
3879                                           : sdivmod_optab),
3880                                          op0, op1,
3881                                          NULL_RTX, remainder, unsignedp))
3882                 remainder = 0;
3883             }
3884
3885           if (remainder)
3886             return gen_lowpart (mode, remainder);
3887         }
3888
3889       /* Produce the quotient.  Try a quotient insn, but not a library call.
3890          If we have a divmod in this mode, use it in preference to widening
3891          the div (for this test we assume it will not fail). Note that optab2
3892          is set to the one of the two optabs that the call below will use.  */
3893       quotient
3894         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3895                              op0, op1, rem_flag ? NULL_RTX : target,
3896                              unsignedp,
3897                              ((optab2->handlers[(int) compute_mode].insn_code
3898                                != CODE_FOR_nothing)
3899                               ? OPTAB_DIRECT : OPTAB_WIDEN));
3900
3901       if (quotient == 0)
3902         {
3903           /* No luck there.  Try a quotient-and-remainder insn,
3904              keeping the quotient alone.  */
3905           quotient = gen_reg_rtx (compute_mode);
3906           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3907                                      op0, op1,
3908                                      quotient, NULL_RTX, unsignedp))
3909             {
3910               quotient = 0;
3911               if (! rem_flag)
3912                 /* Still no luck.  If we are not computing the remainder,
3913                    use a library call for the quotient.  */
3914                 quotient = sign_expand_binop (compute_mode,
3915                                               udiv_optab, sdiv_optab,
3916                                               op0, op1, target,
3917                                               unsignedp, OPTAB_LIB_WIDEN);
3918             }
3919         }
3920     }
3921
3922   if (rem_flag)
3923     {
3924       if (target && GET_MODE (target) != compute_mode)
3925         target = 0;
3926
3927       if (quotient == 0)
3928         /* No divide instruction either.  Use library for remainder.  */
3929         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3930                                        op0, op1, target,
3931                                        unsignedp, OPTAB_LIB_WIDEN);
3932       else
3933         {
3934           /* We divided.  Now finish doing X - Y * (X / Y).  */
3935           remainder = expand_mult (compute_mode, quotient, op1,
3936                                    NULL_RTX, unsignedp);
3937           remainder = expand_binop (compute_mode, sub_optab, op0,
3938                                     remainder, target, unsignedp,
3939                                     OPTAB_LIB_WIDEN);
3940         }
3941     }
3942
3943   return gen_lowpart (mode, rem_flag ? remainder : quotient);
3944 }
3945 \f
3946 /* Return a tree node with data type TYPE, describing the value of X.
3947    Usually this is an RTL_EXPR, if there is no obvious better choice.
3948    X may be an expression, however we only support those expressions
3949    generated by loop.c.   */
3950
3951 tree
3952 make_tree (type, x)
3953      tree type;
3954      rtx x;
3955 {
3956   tree t;
3957
3958   switch (GET_CODE (x))
3959     {
3960     case CONST_INT:
3961       t = build_int_2 (INTVAL (x),
3962                        (TREE_UNSIGNED (type)
3963                         && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
3964                        || INTVAL (x) >= 0 ? 0 : -1);
3965       TREE_TYPE (t) = type;
3966       return t;
3967
3968     case CONST_DOUBLE:
3969       if (GET_MODE (x) == VOIDmode)
3970         {
3971           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
3972           TREE_TYPE (t) = type;
3973         }
3974       else
3975         {
3976           REAL_VALUE_TYPE d;
3977
3978           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
3979           t = build_real (type, d);
3980         }
3981
3982       return t;
3983
3984     case PLUS:
3985       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3986                           make_tree (type, XEXP (x, 1))));
3987
3988     case MINUS:
3989       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3990                           make_tree (type, XEXP (x, 1))));
3991
3992     case NEG:
3993       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
3994
3995     case MULT:
3996       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
3997                           make_tree (type, XEXP (x, 1))));
3998
3999     case ASHIFT:
4000       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4001                           make_tree (type, XEXP (x, 1))));
4002
4003     case LSHIFTRT:
4004       return fold (convert (type,
4005                             build (RSHIFT_EXPR, unsigned_type (type),
4006                                    make_tree (unsigned_type (type),
4007                                               XEXP (x, 0)),
4008                                    make_tree (type, XEXP (x, 1)))));
4009
4010     case ASHIFTRT:
4011       return fold (convert (type,
4012                             build (RSHIFT_EXPR, signed_type (type),
4013                                    make_tree (signed_type (type), XEXP (x, 0)),
4014                                    make_tree (type, XEXP (x, 1)))));
4015
4016     case DIV:
4017       if (TREE_CODE (type) != REAL_TYPE)
4018         t = signed_type (type);
4019       else
4020         t = type;
4021
4022       return fold (convert (type,
4023                             build (TRUNC_DIV_EXPR, t,
4024                                    make_tree (t, XEXP (x, 0)),
4025                                    make_tree (t, XEXP (x, 1)))));
4026     case UDIV:
4027       t = unsigned_type (type);
4028       return fold (convert (type,
4029                             build (TRUNC_DIV_EXPR, t,
4030                                    make_tree (t, XEXP (x, 0)),
4031                                    make_tree (t, XEXP (x, 1)))));
4032    default:
4033       t = make_node (RTL_EXPR);
4034       TREE_TYPE (t) = type;
4035       RTL_EXPR_RTL (t) = x;
4036       /* There are no insns to be output
4037          when this rtl_expr is used.  */
4038       RTL_EXPR_SEQUENCE (t) = 0;
4039       return t;
4040     }
4041 }
4042
4043 /* Return an rtx representing the value of X * MULT + ADD.
4044    TARGET is a suggestion for where to store the result (an rtx).
4045    MODE is the machine mode for the computation.
4046    X and MULT must have mode MODE.  ADD may have a different mode.
4047    So can X (defaults to same as MODE).
4048    UNSIGNEDP is non-zero to do unsigned multiplication.
4049    This may emit insns.  */
4050
4051 rtx
4052 expand_mult_add (x, target, mult, add, mode, unsignedp)
4053      rtx x, target, mult, add;
4054      enum machine_mode mode;
4055      int unsignedp;
4056 {
4057   tree type = type_for_mode (mode, unsignedp);
4058   tree add_type = (GET_MODE (add) == VOIDmode
4059                    ? type : type_for_mode (GET_MODE (add), unsignedp));
4060   tree result =  fold (build (PLUS_EXPR, type,
4061                               fold (build (MULT_EXPR, type,
4062                                            make_tree (type, x),
4063                                            make_tree (type, mult))),
4064                               make_tree (add_type, add)));
4065
4066   return expand_expr (result, target, VOIDmode, 0);
4067 }
4068 \f
4069 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4070    and returning TARGET.
4071
4072    If TARGET is 0, a pseudo-register or constant is returned.  */
4073
4074 rtx
4075 expand_and (op0, op1, target)
4076      rtx op0, op1, target;
4077 {
4078   enum machine_mode mode = VOIDmode;
4079   rtx tem;
4080
4081   if (GET_MODE (op0) != VOIDmode)
4082     mode = GET_MODE (op0);
4083   else if (GET_MODE (op1) != VOIDmode)
4084     mode = GET_MODE (op1);
4085
4086   if (mode != VOIDmode)
4087     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4088   else if (GET_CODE (op0) == CONST_INT && GET_CODE (op1) == CONST_INT)
4089     tem = GEN_INT (INTVAL (op0) & INTVAL (op1));
4090   else
4091     abort ();
4092
4093   if (target == 0)
4094     target = tem;
4095   else if (tem != target)
4096     emit_move_insn (target, tem);
4097   return target;
4098 }
4099 \f
4100 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4101    and storing in TARGET.  Normally return TARGET.
4102    Return 0 if that cannot be done.
4103
4104    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4105    it is VOIDmode, they cannot both be CONST_INT.
4106
4107    UNSIGNEDP is for the case where we have to widen the operands
4108    to perform the operation.  It says to use zero-extension.
4109
4110    NORMALIZEP is 1 if we should convert the result to be either zero
4111    or one.  Normalize is -1 if we should convert the result to be
4112    either zero or -1.  If NORMALIZEP is zero, the result will be left
4113    "raw" out of the scc insn.  */
4114
4115 rtx
4116 emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
4117      rtx target;
4118      enum rtx_code code;
4119      rtx op0, op1;
4120      enum machine_mode mode;
4121      int unsignedp;
4122      int normalizep;
4123 {
4124   rtx subtarget;
4125   enum insn_code icode;
4126   enum machine_mode compare_mode;
4127   enum machine_mode target_mode = GET_MODE (target);
4128   rtx tem;
4129   rtx last = get_last_insn ();
4130   rtx pattern, comparison;
4131
4132   if (unsignedp)
4133     code = unsigned_condition (code);
4134
4135   /* If one operand is constant, make it the second one.  Only do this
4136      if the other operand is not constant as well.  */
4137
4138   if ((CONSTANT_P (op0) && ! CONSTANT_P (op1))
4139       || (GET_CODE (op0) == CONST_INT && GET_CODE (op1) != CONST_INT))
4140     {
4141       tem = op0;
4142       op0 = op1;
4143       op1 = tem;
4144       code = swap_condition (code);
4145     }
4146
4147   if (mode == VOIDmode)
4148     mode = GET_MODE (op0);
4149
4150   /* For some comparisons with 1 and -1, we can convert this to
4151      comparisons with zero.  This will often produce more opportunities for
4152      store-flag insns.  */
4153
4154   switch (code)
4155     {
4156     case LT:
4157       if (op1 == const1_rtx)
4158         op1 = const0_rtx, code = LE;
4159       break;
4160     case LE:
4161       if (op1 == constm1_rtx)
4162         op1 = const0_rtx, code = LT;
4163       break;
4164     case GE:
4165       if (op1 == const1_rtx)
4166         op1 = const0_rtx, code = GT;
4167       break;
4168     case GT:
4169       if (op1 == constm1_rtx)
4170         op1 = const0_rtx, code = GE;
4171       break;
4172     case GEU:
4173       if (op1 == const1_rtx)
4174         op1 = const0_rtx, code = NE;
4175       break;
4176     case LTU:
4177       if (op1 == const1_rtx)
4178         op1 = const0_rtx, code = EQ;
4179       break;
4180     default:
4181       break;
4182     }
4183
4184   /* From now on, we won't change CODE, so set ICODE now.  */
4185   icode = setcc_gen_code[(int) code];
4186
4187   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4188      complement of A (for GE) and shifting the sign bit to the low bit.  */
4189   if (op1 == const0_rtx && (code == LT || code == GE)
4190       && GET_MODE_CLASS (mode) == MODE_INT
4191       && (normalizep || STORE_FLAG_VALUE == 1
4192           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4193               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4194                   == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4195     {
4196       subtarget = target;
4197
4198       /* If the result is to be wider than OP0, it is best to convert it
4199          first.  If it is to be narrower, it is *incorrect* to convert it
4200          first.  */
4201       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4202         {
4203           op0 = protect_from_queue (op0, 0);
4204           op0 = convert_modes (target_mode, mode, op0, 0);
4205           mode = target_mode;
4206         }
4207
4208       if (target_mode != mode)
4209         subtarget = 0;
4210
4211       if (code == GE)
4212         op0 = expand_unop (mode, one_cmpl_optab, op0,
4213                            ((STORE_FLAG_VALUE == 1 || normalizep)
4214                             ? 0 : subtarget), 0);
4215
4216       if (STORE_FLAG_VALUE == 1 || normalizep)
4217         /* If we are supposed to produce a 0/1 value, we want to do
4218            a logical shift from the sign bit to the low-order bit; for
4219            a -1/0 value, we do an arithmetic shift.  */
4220         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4221                             size_int (GET_MODE_BITSIZE (mode) - 1),
4222                             subtarget, normalizep != -1);
4223
4224       if (mode != target_mode)
4225         op0 = convert_modes (target_mode, mode, op0, 0);
4226
4227       return op0;
4228     }
4229
4230   if (icode != CODE_FOR_nothing)
4231     {
4232       insn_operand_predicate_fn pred;
4233
4234       /* We think we may be able to do this with a scc insn.  Emit the
4235          comparison and then the scc insn.
4236
4237          compare_from_rtx may call emit_queue, which would be deleted below
4238          if the scc insn fails.  So call it ourselves before setting LAST.
4239          Likewise for do_pending_stack_adjust.  */
4240
4241       emit_queue ();
4242       do_pending_stack_adjust ();
4243       last = get_last_insn ();
4244
4245       comparison
4246         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
4247       if (GET_CODE (comparison) == CONST_INT)
4248         return (comparison == const0_rtx ? const0_rtx
4249                 : normalizep == 1 ? const1_rtx
4250                 : normalizep == -1 ? constm1_rtx
4251                 : const_true_rtx);
4252
4253       /* If the code of COMPARISON doesn't match CODE, something is
4254          wrong; we can no longer be sure that we have the operation.
4255          We could handle this case, but it should not happen.  */
4256
4257       if (GET_CODE (comparison) != code)
4258         abort ();
4259
4260       /* Get a reference to the target in the proper mode for this insn.  */
4261       compare_mode = insn_data[(int) icode].operand[0].mode;
4262       subtarget = target;
4263       pred = insn_data[(int) icode].operand[0].predicate;
4264       if (preserve_subexpressions_p ()
4265           || ! (*pred) (subtarget, compare_mode))
4266         subtarget = gen_reg_rtx (compare_mode);
4267
4268       pattern = GEN_FCN (icode) (subtarget);
4269       if (pattern)
4270         {
4271           emit_insn (pattern);
4272
4273           /* If we are converting to a wider mode, first convert to
4274              TARGET_MODE, then normalize.  This produces better combining
4275              opportunities on machines that have a SIGN_EXTRACT when we are
4276              testing a single bit.  This mostly benefits the 68k.
4277
4278              If STORE_FLAG_VALUE does not have the sign bit set when
4279              interpreted in COMPARE_MODE, we can do this conversion as
4280              unsigned, which is usually more efficient.  */
4281           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4282             {
4283               convert_move (target, subtarget,
4284                             (GET_MODE_BITSIZE (compare_mode)
4285                              <= HOST_BITS_PER_WIDE_INT)
4286                             && 0 == (STORE_FLAG_VALUE
4287                                      & ((HOST_WIDE_INT) 1
4288                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4289               op0 = target;
4290               compare_mode = target_mode;
4291             }
4292           else
4293             op0 = subtarget;
4294
4295           /* If we want to keep subexpressions around, don't reuse our
4296              last target.  */
4297
4298           if (preserve_subexpressions_p ())
4299             subtarget = 0;
4300
4301           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4302              we don't have to do anything.  */
4303           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4304             ;
4305           /* STORE_FLAG_VALUE might be the most negative number, so write
4306              the comparison this way to avoid a compiler-time warning.  */
4307           else if (- normalizep == STORE_FLAG_VALUE)
4308             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4309
4310           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4311              makes it hard to use a value of just the sign bit due to
4312              ANSI integer constant typing rules.  */
4313           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4314                    && (STORE_FLAG_VALUE
4315                        & ((HOST_WIDE_INT) 1
4316                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4317             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4318                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4319                                 subtarget, normalizep == 1);
4320           else if (STORE_FLAG_VALUE & 1)
4321             {
4322               op0 = expand_and (op0, const1_rtx, subtarget);
4323               if (normalizep == -1)
4324                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4325             }
4326           else
4327             abort ();
4328
4329           /* If we were converting to a smaller mode, do the
4330              conversion now.  */
4331           if (target_mode != compare_mode)
4332             {
4333               convert_move (target, op0, 0);
4334               return target;
4335             }
4336           else
4337             return op0;
4338         }
4339     }
4340
4341   delete_insns_since (last);
4342
4343   /* If expensive optimizations, use different pseudo registers for each
4344      insn, instead of reusing the same pseudo.  This leads to better CSE,
4345      but slows down the compiler, since there are more pseudos */
4346   subtarget = (!flag_expensive_optimizations
4347                && (target_mode == mode)) ? target : NULL_RTX;
4348
4349   /* If we reached here, we can't do this with a scc insn.  However, there
4350      are some comparisons that can be done directly.  For example, if
4351      this is an equality comparison of integers, we can try to exclusive-or
4352      (or subtract) the two operands and use a recursive call to try the
4353      comparison with zero.  Don't do any of these cases if branches are
4354      very cheap.  */
4355
4356   if (BRANCH_COST > 0
4357       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4358       && op1 != const0_rtx)
4359     {
4360       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4361                           OPTAB_WIDEN);
4362
4363       if (tem == 0)
4364         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4365                             OPTAB_WIDEN);
4366       if (tem != 0)
4367         tem = emit_store_flag (target, code, tem, const0_rtx,
4368                                mode, unsignedp, normalizep);
4369       if (tem == 0)
4370         delete_insns_since (last);
4371       return tem;
4372     }
4373
4374   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4375      the constant zero.  Reject all other comparisons at this point.  Only
4376      do LE and GT if branches are expensive since they are expensive on
4377      2-operand machines.  */
4378
4379   if (BRANCH_COST == 0
4380       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4381       || (code != EQ && code != NE
4382           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4383     return 0;
4384
4385   /* See what we need to return.  We can only return a 1, -1, or the
4386      sign bit.  */
4387
4388   if (normalizep == 0)
4389     {
4390       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4391         normalizep = STORE_FLAG_VALUE;
4392
4393       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4394                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4395                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4396         ;
4397       else
4398         return 0;
4399     }
4400
4401   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4402      do the necessary operation below.  */
4403
4404   tem = 0;
4405
4406   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4407      the sign bit set.  */
4408
4409   if (code == LE)
4410     {
4411       /* This is destructive, so SUBTARGET can't be OP0.  */
4412       if (rtx_equal_p (subtarget, op0))
4413         subtarget = 0;
4414
4415       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4416                           OPTAB_WIDEN);
4417       if (tem)
4418         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4419                             OPTAB_WIDEN);
4420     }
4421
4422   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4423      number of bits in the mode of OP0, minus one.  */
4424
4425   if (code == GT)
4426     {
4427       if (rtx_equal_p (subtarget, op0))
4428         subtarget = 0;
4429
4430       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4431                           size_int (GET_MODE_BITSIZE (mode) - 1),
4432                           subtarget, 0);
4433       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4434                           OPTAB_WIDEN);
4435     }
4436
4437   if (code == EQ || code == NE)
4438     {
4439       /* For EQ or NE, one way to do the comparison is to apply an operation
4440          that converts the operand into a positive number if it is non-zero
4441          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4442          for NE we negate.  This puts the result in the sign bit.  Then we
4443          normalize with a shift, if needed.
4444
4445          Two operations that can do the above actions are ABS and FFS, so try
4446          them.  If that doesn't work, and MODE is smaller than a full word,
4447          we can use zero-extension to the wider mode (an unsigned conversion)
4448          as the operation.  */
4449
4450       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4451         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4452       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4453         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4454       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4455         {
4456           op0 = protect_from_queue (op0, 0);
4457           tem = convert_modes (word_mode, mode, op0, 1);
4458           mode = word_mode;
4459         }
4460
4461       if (tem != 0)
4462         {
4463           if (code == EQ)
4464             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4465                                 0, OPTAB_WIDEN);
4466           else
4467             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4468         }
4469
4470       /* If we couldn't do it that way, for NE we can "or" the two's complement
4471          of the value with itself.  For EQ, we take the one's complement of
4472          that "or", which is an extra insn, so we only handle EQ if branches
4473          are expensive.  */
4474
4475       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4476         {
4477           if (rtx_equal_p (subtarget, op0))
4478             subtarget = 0;
4479
4480           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4481           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4482                               OPTAB_WIDEN);
4483
4484           if (tem && code == EQ)
4485             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4486         }
4487     }
4488
4489   if (tem && normalizep)
4490     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4491                         size_int (GET_MODE_BITSIZE (mode) - 1),
4492                         subtarget, normalizep == 1);
4493
4494   if (tem)
4495     {
4496       if (GET_MODE (tem) != target_mode)
4497         {
4498           convert_move (target, tem, 0);
4499           tem = target;
4500         }
4501       else if (!subtarget)
4502         {
4503           emit_move_insn (target, tem);
4504           tem = target;
4505         }
4506     }
4507   else
4508     delete_insns_since (last);
4509
4510   return tem;
4511 }
4512
4513 /* Like emit_store_flag, but always succeeds.  */
4514
4515 rtx
4516 emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
4517      rtx target;
4518      enum rtx_code code;
4519      rtx op0, op1;
4520      enum machine_mode mode;
4521      int unsignedp;
4522      int normalizep;
4523 {
4524   rtx tem, label;
4525
4526   /* First see if emit_store_flag can do the job.  */
4527   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4528   if (tem != 0)
4529     return tem;
4530
4531   if (normalizep == 0)
4532     normalizep = 1;
4533
4534   /* If this failed, we have to do this with set/compare/jump/set code.  */
4535
4536   if (GET_CODE (target) != REG
4537       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4538     target = gen_reg_rtx (GET_MODE (target));
4539
4540   emit_move_insn (target, const1_rtx);
4541   label = gen_label_rtx ();
4542   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, 0,
4543                            NULL_RTX, label);
4544
4545   emit_move_insn (target, const0_rtx);
4546   emit_label (label);
4547
4548   return target;
4549 }
4550 \f
4551 /* Perform possibly multi-word comparison and conditional jump to LABEL
4552    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4553
4554    The algorithm is based on the code in expr.c:do_jump.
4555
4556    Note that this does not perform a general comparison.  Only variants
4557    generated within expmed.c are correctly handled, others abort (but could
4558    be handled if needed).  */
4559
4560 static void
4561 do_cmp_and_jump (arg1, arg2, op, mode, label)
4562      rtx arg1, arg2, label;
4563      enum rtx_code op;
4564      enum machine_mode mode;
4565 {
4566   /* If this mode is an integer too wide to compare properly,
4567      compare word by word.  Rely on cse to optimize constant cases.  */
4568
4569   if (GET_MODE_CLASS (mode) == MODE_INT
4570       && ! can_compare_p (op, mode, ccp_jump))
4571     {
4572       rtx label2 = gen_label_rtx ();
4573
4574       switch (op)
4575         {
4576         case LTU:
4577           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4578           break;
4579
4580         case LEU:
4581           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4582           break;
4583
4584         case LT:
4585           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4586           break;
4587
4588         case GT:
4589           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4590           break;
4591
4592         case GE:
4593           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4594           break;
4595
4596           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
4597              that's the only equality operations we do */
4598         case EQ:
4599           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4600             abort();
4601           do_jump_by_parts_equality_rtx (arg1, label2, label);
4602           break;
4603
4604         case NE:
4605           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4606             abort();
4607           do_jump_by_parts_equality_rtx (arg1, label, label2);
4608           break;
4609
4610         default:
4611           abort();
4612         }
4613
4614       emit_label (label2);
4615     }
4616   else
4617     {
4618       emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, 0, label);
4619     }
4620 }