gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000 Free Software Foundation, Inc.
   5
   6 This file is part of GNU CC.
   7
   8 GNU CC is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU CC is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU CC; see the file COPYING.  If not, write to
  20 the Free Software Foundation, 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "toplev.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "tm_p.h"
  30 #include "flags.h"
  31 #include "insn-flags.h"
  32 #include "insn-codes.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "real.h"
  36 #include "recog.h"
  37
  38 static void store_fixed_bit_field       PARAMS ((rtx, unsigned HOST_WIDE_INT,
  39                                                  unsigned HOST_WIDE_INT,
  40                                                  unsigned HOST_WIDE_INT, rtx,
  41                                                  unsigned int));
  42 static void store_split_bit_field       PARAMS ((rtx, unsigned HOST_WIDE_INT,
  43                                                  unsigned HOST_WIDE_INT, rtx,
  44                                                  unsigned int));
  45 static rtx extract_fixed_bit_field      PARAMS ((enum machine_mode, rtx,
  46                                                  unsigned HOST_WIDE_INT,
  47                                                  unsigned HOST_WIDE_INT,
  48                                                  unsigned HOST_WIDE_INT,
  49                                                  rtx, int, unsigned int));
  50 static rtx mask_rtx                     PARAMS ((enum machine_mode, int,
  51                                                  int, int));
  52 static rtx lshift_value                 PARAMS ((enum machine_mode, rtx,
  53                                                  int, int));
  54 static rtx extract_split_bit_field      PARAMS ((rtx, unsigned HOST_WIDE_INT,
  55                                                  unsigned HOST_WIDE_INT, int,
  56                                                  unsigned int));
  57 static void do_cmp_and_jump             PARAMS ((rtx, rtx, enum rtx_code,
  58                                                  enum machine_mode, rtx));
  59
  60 /* Non-zero means divides or modulus operations are relatively cheap for
  61    powers of two, so don't use branches; emit the operation instead.
  62    Usually, this will mean that the MD file will emit non-branch
  63    sequences.  */
  64
  65 static int sdiv_pow2_cheap, smod_pow2_cheap;
  66
  67 #ifndef SLOW_UNALIGNED_ACCESS
  68 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  69 #endif
  70
  71 /* For compilers that support multiple targets with different word sizes,
  72    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  73    is the H8/300(H) compiler.  */
  74
  75 #ifndef MAX_BITS_PER_WORD
  76 #define MAX_BITS_PER_WORD BITS_PER_WORD
  77 #endif
  78
  79 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  80    shift count and some by mode.  */
  81 static int add_cost, negate_cost, zero_cost;
  82 static int shift_cost[MAX_BITS_PER_WORD];
  83 static int shiftadd_cost[MAX_BITS_PER_WORD];
  84 static int shiftsub_cost[MAX_BITS_PER_WORD];
  85 static int mul_cost[NUM_MACHINE_MODES];
  86 static int div_cost[NUM_MACHINE_MODES];
  87 static int mul_widen_cost[NUM_MACHINE_MODES];
  88 static int mul_highpart_cost[NUM_MACHINE_MODES];
  89
  90 void
  91 init_expmed ()
  92 {
  93   char *free_point;
  94   /* This is "some random pseudo register" for purposes of calling recog
  95      to see what insns exist.  */
  96   rtx reg = gen_rtx_REG (word_mode, 10000);
  97   rtx shift_insn, shiftadd_insn, shiftsub_insn;
  98   int dummy;
  99   int m;
 100   enum machine_mode mode, wider_mode;
 101
 102   start_sequence ();
 103
 104   /* Since we are on the permanent obstack, we must be sure we save this
 105      spot AFTER we call start_sequence, since it will reuse the rtl it
 106      makes.  */
 107   free_point = (char *) oballoc (0);
 108
 109   reg = gen_rtx_REG (word_mode, 10000);
 110
 111   zero_cost = rtx_cost (const0_rtx, 0);
 112   add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
 113
 114   shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
 115                                        gen_rtx_ASHIFT (word_mode, reg,
 116                                                        const0_rtx)));
 117
 118   shiftadd_insn
 119     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 120                               gen_rtx_PLUS (word_mode,
 121                                             gen_rtx_MULT (word_mode,
 122                                                           reg, const0_rtx),
 123                                             reg)));
 124
 125   shiftsub_insn
 126     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 127                               gen_rtx_MINUS (word_mode,
 128                                              gen_rtx_MULT (word_mode,
 129                                                            reg, const0_rtx),
 130                                              reg)));
 131
 132   init_recog ();
 133
 134   shift_cost[0] = 0;
 135   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 136
 137   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 138     {
 139       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 140
 141       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 142       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 143         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 144
 145       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1)
 146         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 147       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 148         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 149
 150       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1)
 151         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 152       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 153         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 154     }
 155
 156   negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
 157
 158   sdiv_pow2_cheap
 159     = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
 160        <= 2 * add_cost);
 161   smod_pow2_cheap
 162     = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
 163        <= 2 * add_cost);
 164
 165   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 166        mode != VOIDmode;
 167        mode = GET_MODE_WIDER_MODE (mode))
 168     {
 169       reg = gen_rtx_REG (mode, 10000);
 170       div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
 171       mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
 172       wider_mode = GET_MODE_WIDER_MODE (mode);
 173       if (wider_mode != VOIDmode)
 174         {
 175           mul_widen_cost[(int) wider_mode]
 176             = rtx_cost (gen_rtx_MULT (wider_mode,
 177                                       gen_rtx_ZERO_EXTEND (wider_mode, reg),
 178                                       gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 179                         SET);
 180           mul_highpart_cost[(int) mode]
 181             = rtx_cost (gen_rtx_TRUNCATE
 182                         (mode,
 183                          gen_rtx_LSHIFTRT (wider_mode,
 184                                            gen_rtx_MULT (wider_mode,
 185                                                          gen_rtx_ZERO_EXTEND
 186                                                          (wider_mode, reg),
 187                                                          gen_rtx_ZERO_EXTEND
 188                                                          (wider_mode, reg)),
 189                                            GEN_INT (GET_MODE_BITSIZE (mode)))),
 190                         SET);
 191         }
 192     }
 193
 194   /* Free the objects we just allocated.  */
 195   end_sequence ();
 196   obfree (free_point);
 197 }
 198
 199 /* Return an rtx representing minus the value of X.
 200    MODE is the intended mode of the result,
 201    useful if X is a CONST_INT.  */
 202
 203 rtx
 204 negate_rtx (mode, x)
 205      enum machine_mode mode;
 206      rtx x;
 207 {
 208   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 209
 210   if (result == 0)
 211     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 212
 213   return result;
 214 }
 215 \f
 216 /* Generate code to store value from rtx VALUE
 217    into a bit-field within structure STR_RTX
 218    containing BITSIZE bits starting at bit BITNUM.
 219    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 220    ALIGN is the alignment that STR_RTX is known to have.
 221    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 222
 223 /* ??? Note that there are two different ideas here for how
 224    to determine the size to count bits within, for a register.
 225    One is BITS_PER_WORD, and the other is the size of operand 3
 226    of the insv pattern.
 227
 228    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 229    else, we use the mode of operand 3.  */
 230
 231 rtx
 232 store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
 233      rtx str_rtx;
 234      unsigned HOST_WIDE_INT bitsize;
 235      unsigned HOST_WIDE_INT bitnum;
 236      enum machine_mode fieldmode;
 237      rtx value;
 238      unsigned int align;
 239      HOST_WIDE_INT total_size;
 240 {
 241   unsigned int unit
 242     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 243   unsigned HOST_WIDE_INT offset = bitnum / unit;
 244   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 245   register rtx op0 = str_rtx;
 246 #ifdef HAVE_insv
 247   unsigned HOST_WIDE_INT insv_bitsize;
 248   enum machine_mode op_mode;
 249
 250   op_mode = insn_data[(int) CODE_FOR_insv].operand[3].mode;
 251   if (op_mode == VOIDmode)
 252     op_mode = word_mode;
 253   insv_bitsize = GET_MODE_BITSIZE (op_mode);
 254 #endif
 255
 256   if (GET_CODE (str_rtx) == MEM && ! MEM_IN_STRUCT_P (str_rtx))
 257     abort ();
 258
 259   /* Discount the part of the structure before the desired byte.
 260      We need to know how many bytes are safe to reference after it.  */
 261   if (total_size >= 0)
 262     total_size -= (bitpos / BIGGEST_ALIGNMENT
 263                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 264
 265   while (GET_CODE (op0) == SUBREG)
 266     {
 267       /* The following line once was done only if WORDS_BIG_ENDIAN,
 268          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 269          meaningful at a much higher level; when structures are copied
 270          between memory and regs, the higher-numbered regs
 271          always get higher addresses.  */
 272       offset += SUBREG_WORD (op0);
 273       /* We used to adjust BITPOS here, but now we do the whole adjustment
 274          right after the loop.  */
 275       op0 = SUBREG_REG (op0);
 276     }
 277
 278   /* Make sure we are playing with integral modes.  Pun with subregs
 279      if we aren't.  */
 280   {
 281     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 282     if (imode != GET_MODE (op0))
 283       {
 284         if (GET_CODE (op0) == MEM)
 285           op0 = change_address (op0, imode, NULL_RTX);
 286         else if (imode != BLKmode)
 287           op0 = gen_lowpart (imode, op0);
 288         else
 289           abort ();
 290       }
 291   }
 292
 293   /* If OP0 is a register, BITPOS must count within a word.
 294      But as we have it, it counts within whatever size OP0 now has.
 295      On a bigendian machine, these are not the same, so convert.  */
 296   if (BYTES_BIG_ENDIAN
 297       && GET_CODE (op0) != MEM
 298       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 299     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 300
 301   value = protect_from_queue (value, 0);
 302
 303   if (flag_force_mem)
 304     value = force_not_mem (value);
 305
 306   if ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 307        || (GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode)
 308            && GET_MODE_SIZE (fieldmode) != 0))
 309       && (GET_CODE (op0) != MEM
 310           || ! SLOW_UNALIGNED_ACCESS (fieldmode, align)
 311           || (offset * BITS_PER_UNIT % bitsize == 0
 312               && align % GET_MODE_BITSIZE (fieldmode) == 0))
 313       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 314       && bitsize == GET_MODE_BITSIZE (fieldmode))
 315     {
 316       /* Storing in a full-word or multi-word field in a register
 317          can be done with just SUBREG.  Also, storing in the entire object
 318          can be done with just SUBREG.  */
 319       if (GET_MODE (op0) != fieldmode)
 320         {
 321           if (GET_CODE (op0) == SUBREG)
 322             {
 323               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 324                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 325                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 326                 op0 = SUBREG_REG (op0);
 327               else
 328                 /* Else we've got some float mode source being extracted into
 329                    a different float mode destination -- this combination of
 330                    subregs results in Severe Tire Damage.  */
 331                 abort ();
 332             }
 333           if (GET_CODE (op0) == REG)
 334             op0 = gen_rtx_SUBREG (fieldmode, op0, offset);
 335           else
 336             op0 = change_address (op0, fieldmode,
 337                                   plus_constant (XEXP (op0, 0), offset));
 338         }
 339       emit_move_insn (op0, value);
 340       return value;
 341     }
 342
 343   /* Storing an lsb-aligned field in a register
 344      can be done with a movestrict instruction.  */
 345
 346   if (GET_CODE (op0) != MEM
 347       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 348       && bitsize == GET_MODE_BITSIZE (fieldmode)
 349       && (movstrict_optab->handlers[(int) fieldmode].insn_code
 350           != CODE_FOR_nothing))
 351     {
 352       int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 353
 354       /* Get appropriate low part of the value being stored.  */
 355       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 356         value = gen_lowpart (fieldmode, value);
 357       else if (!(GET_CODE (value) == SYMBOL_REF
 358                  || GET_CODE (value) == LABEL_REF
 359                  || GET_CODE (value) == CONST))
 360         value = convert_to_mode (fieldmode, value, 0);
 361
 362       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 363         value = copy_to_mode_reg (fieldmode, value);
 364
 365       if (GET_CODE (op0) == SUBREG)
 366         {
 367           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 368               || GET_MODE_CLASS (fieldmode) == MODE_INT
 369               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 370             op0 = SUBREG_REG (op0);
 371           else
 372             /* Else we've got some float mode source being extracted into
 373                a different float mode destination -- this combination of
 374                subregs results in Severe Tire Damage.  */
 375             abort ();
 376         }
 377
 378       emit_insn (GEN_FCN (icode)
 379                  (gen_rtx_SUBREG (fieldmode, op0, offset), value));
 380
 381       return value;
 382     }
 383
 384   /* Handle fields bigger than a word.  */
 385
 386   if (bitsize > BITS_PER_WORD)
 387     {
 388       /* Here we transfer the words of the field
 389          in the order least significant first.
 390          This is because the most significant word is the one which may
 391          be less than full.
 392          However, only do that if the value is not BLKmode.  */
 393
 394       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 395       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 396       unsigned int i;
 397
 398       /* This is the mode we must force value to, so that there will be enough
 399          subwords to extract.  Note that fieldmode will often (always?) be
 400          VOIDmode, because that is what store_field uses to indicate that this
 401          is a bit field, but passing VOIDmode to operand_subword_force will
 402          result in an abort.  */
 403       fieldmode = mode_for_size (nwords * BITS_PER_WORD, MODE_INT, 0);
 404
 405       for (i = 0; i < nwords; i++)
 406         {
 407           /* If I is 0, use the low-order word in both field and target;
 408              if I is 1, use the next to lowest word; and so on.  */
 409           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 410           unsigned int bit_offset = (backwards
 411                             ? MAX ((int) bitsize - ((int) i + 1)
 412                                    * BITS_PER_WORD,
 413                                    0)
 414                             : (int) i * BITS_PER_WORD);
 415
 416           store_bit_field (op0, MIN (BITS_PER_WORD,
 417                                      bitsize - i * BITS_PER_WORD),
 418                            bitnum + bit_offset, word_mode,
 419                            operand_subword_force (value, wordnum,
 420                                                   (GET_MODE (value) == VOIDmode
 421                                                    ? fieldmode
 422                                                    : GET_MODE (value))),
 423                            align, total_size);
 424         }
 425       return value;
 426     }
 427
 428   /* From here on we can assume that the field to be stored in is
 429      a full-word (whatever type that is), since it is shorter than a word.  */
 430
 431   /* OFFSET is the number of words or bytes (UNIT says which)
 432      from STR_RTX to the first word or byte containing part of the field.  */
 433
 434   if (GET_CODE (op0) != MEM)
 435     {
 436       if (offset != 0
 437           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 438         {
 439           if (GET_CODE (op0) != REG)
 440             {
 441               /* Since this is a destination (lvalue), we can't copy it to a
 442                  pseudo.  We can trivially remove a SUBREG that does not
 443                  change the size of the operand.  Such a SUBREG may have been
 444                  added above.  Otherwise, abort.  */
 445               if (GET_CODE (op0) == SUBREG
 446                   && (GET_MODE_SIZE (GET_MODE (op0))
 447                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 448                 op0 = SUBREG_REG (op0);
 449               else
 450                 abort ();
 451             }
 452           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 453                                 op0, offset);
 454         }
 455       offset = 0;
 456     }
 457   else
 458     {
 459       op0 = protect_from_queue (op0, 1);
 460     }
 461
 462   /* If VALUE is a floating-point mode, access it as an integer of the
 463      corresponding size.  This can occur on a machine with 64 bit registers
 464      that uses SFmode for float.  This can also occur for unaligned float
 465      structure fields.  */
 466   if (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT)
 467     {
 468       if (GET_CODE (value) != REG)
 469         value = copy_to_reg (value);
 470       value = gen_rtx_SUBREG (word_mode, value, 0);
 471     }
 472
 473   /* Now OFFSET is nonzero only if OP0 is memory
 474      and is therefore always measured in bytes.  */
 475
 476 #ifdef HAVE_insv
 477   if (HAVE_insv
 478       && GET_MODE (value) != BLKmode
 479       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 480       /* Ensure insv's size is wide enough for this field.  */
 481       && (insv_bitsize >= bitsize)
 482       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 483             && (bitsize + bitpos > insv_bitsize)))
 484     {
 485       int xbitpos = bitpos;
 486       rtx value1;
 487       rtx xop0 = op0;
 488       rtx last = get_last_insn ();
 489       rtx pat;
 490       enum machine_mode maxmode;
 491       int save_volatile_ok = volatile_ok;
 492
 493       maxmode = insn_data[(int) CODE_FOR_insv].operand[3].mode;
 494       if (maxmode == VOIDmode)
 495         maxmode = word_mode;
 496
 497       volatile_ok = 1;
 498
 499       /* If this machine's insv can only insert into a register, copy OP0
 500          into a register and save it back later.  */
 501       /* This used to check flag_force_mem, but that was a serious
 502          de-optimization now that flag_force_mem is enabled by -O2.  */
 503       if (GET_CODE (op0) == MEM
 504           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 505                 (op0, VOIDmode)))
 506         {
 507           rtx tempreg;
 508           enum machine_mode bestmode;
 509
 510           /* Get the mode to use for inserting into this field.  If OP0 is
 511              BLKmode, get the smallest mode consistent with the alignment. If
 512              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 513              mode. Otherwise, use the smallest mode containing the field.  */
 514
 515           if (GET_MODE (op0) == BLKmode
 516               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 517             bestmode
 518               = get_best_mode (bitsize, bitnum, align, maxmode,
 519                                MEM_VOLATILE_P (op0));
 520           else
 521             bestmode = GET_MODE (op0);
 522
 523           if (bestmode == VOIDmode
 524               || (SLOW_UNALIGNED_ACCESS (bestmode, align)
 525                   && GET_MODE_BITSIZE (bestmode) > align))
 526             goto insv_loses;
 527
 528           /* Adjust address to point to the containing unit of that mode.  */
 529           unit = GET_MODE_BITSIZE (bestmode);
 530           /* Compute offset as multiple of this unit, counting in bytes.  */
 531           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 532           bitpos = bitnum % unit;
 533           op0 = change_address (op0, bestmode,
 534                                 plus_constant (XEXP (op0, 0), offset));
 535
 536           /* Fetch that unit, store the bitfield in it, then store
 537              the unit.  */
 538           tempreg = copy_to_reg (op0);
 539           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 540                            align, total_size);
 541           emit_move_insn (op0, tempreg);
 542           return value;
 543         }
 544       volatile_ok = save_volatile_ok;
 545
 546       /* Add OFFSET into OP0's address.  */
 547       if (GET_CODE (xop0) == MEM)
 548         xop0 = change_address (xop0, byte_mode,
 549                                plus_constant (XEXP (xop0, 0), offset));
 550
 551       /* If xop0 is a register, we need it in MAXMODE
 552          to make it acceptable to the format of insv.  */
 553       if (GET_CODE (xop0) == SUBREG)
 554         /* We can't just change the mode, because this might clobber op0,
 555            and we will need the original value of op0 if insv fails.  */
 556         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_WORD (xop0));
 557       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 558         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 559
 560       /* On big-endian machines, we count bits from the most significant.
 561          If the bit field insn does not, we must invert.  */
 562
 563       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 564         xbitpos = unit - bitsize - xbitpos;
 565
 566       /* We have been counting XBITPOS within UNIT.
 567          Count instead within the size of the register.  */
 568       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 569         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 570
 571       unit = GET_MODE_BITSIZE (maxmode);
 572
 573       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 574       value1 = value;
 575       if (GET_MODE (value) != maxmode)
 576         {
 577           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 578             {
 579               /* Optimization: Don't bother really extending VALUE
 580                  if it has all the bits we will actually use.  However,
 581                  if we must narrow it, be sure we do it correctly.  */
 582
 583               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 584                 {
 585                   /* Avoid making subreg of a subreg, or of a mem.  */
 586                   if (GET_CODE (value1) != REG)
 587                     value1 = copy_to_reg (value1);
 588                   value1 = gen_rtx_SUBREG (maxmode, value1, 0);
 589                 }
 590               else
 591                 value1 = gen_lowpart (maxmode, value1);
 592             }
 593           else if (!CONSTANT_P (value))
 594             /* Parse phase is supposed to make VALUE's data type
 595                match that of the component reference, which is a type
 596                at least as wide as the field; so VALUE should have
 597                a mode that corresponds to that type.  */
 598             abort ();
 599         }
 600
 601       /* If this machine's insv insists on a register,
 602          get VALUE1 into a register.  */
 603       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 604              (value1, maxmode)))
 605         value1 = force_reg (maxmode, value1);
 606
 607       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 608       if (pat)
 609         emit_insn (pat);
 610       else
 611         {
 612           delete_insns_since (last);
 613           store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 614         }
 615     }
 616   else
 617     insv_loses:
 618 #endif
 619     /* Insv is not available; store using shifts and boolean ops.  */
 620     store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 621   return value;
 622 }
 623 \f
 624 /* Use shifts and boolean operations to store VALUE
 625    into a bit field of width BITSIZE
 626    in a memory location specified by OP0 except offset by OFFSET bytes.
 627      (OFFSET must be 0 if OP0 is a register.)
 628    The field starts at position BITPOS within the byte.
 629     (If OP0 is a register, it may be a full word or a narrower mode,
 630      but BITPOS still counts within a full word,
 631      which is significant on bigendian machines.)
 632    STRUCT_ALIGN is the alignment the structure is known to have.
 633
 634    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 635
 636 static void
 637 store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align)
 638      register rtx op0;
 639      unsigned HOST_WIDE_INT offset, bitsize, bitpos;
 640      register rtx value;
 641      unsigned int struct_align;
 642 {
 643   register enum machine_mode mode;
 644   unsigned int total_bits = BITS_PER_WORD;
 645   rtx subtarget, temp;
 646   int all_zero = 0;
 647   int all_one = 0;
 648
 649   if (! SLOW_UNALIGNED_ACCESS (word_mode, struct_align))
 650     struct_align = BIGGEST_ALIGNMENT;
 651
 652   /* There is a case not handled here:
 653      a structure with a known alignment of just a halfword
 654      and a field split across two aligned halfwords within the structure.
 655      Or likewise a structure with a known alignment of just a byte
 656      and a field split across two bytes.
 657      Such cases are not supposed to be able to occur.  */
 658
 659   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 660     {
 661       if (offset != 0)
 662         abort ();
 663       /* Special treatment for a bit field split across two registers.  */
 664       if (bitsize + bitpos > BITS_PER_WORD)
 665         {
 666           store_split_bit_field (op0, bitsize, bitpos,
 667                                  value, BITS_PER_WORD);
 668           return;
 669         }
 670     }
 671   else
 672     {
 673       /* Get the proper mode to use for this field.  We want a mode that
 674          includes the entire field.  If such a mode would be larger than
 675          a word, we won't be doing the extraction the normal way.  */
 676
 677       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 678                             struct_align, word_mode,
 679                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
 680
 681       if (mode == VOIDmode)
 682         {
 683           /* The only way this should occur is if the field spans word
 684              boundaries.  */
 685           store_split_bit_field (op0,
 686                                  bitsize, bitpos + offset * BITS_PER_UNIT,
 687                                  value, struct_align);
 688           return;
 689         }
 690
 691       total_bits = GET_MODE_BITSIZE (mode);
 692
 693       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 694          be in the range 0 to total_bits-1, and put any excess bytes in
 695          OFFSET.  */
 696       if (bitpos >= total_bits)
 697         {
 698           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 699           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 700                      * BITS_PER_UNIT);
 701         }
 702
 703       /* Get ref to an aligned byte, halfword, or word containing the field.
 704          Adjust BITPOS to be position within a word,
 705          and OFFSET to be the offset of that word.
 706          Then alter OP0 to refer to that word.  */
 707       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 708       offset -= (offset % (total_bits / BITS_PER_UNIT));
 709       op0 = change_address (op0, mode,
 710                             plus_constant (XEXP (op0, 0), offset));
 711     }
 712
 713   mode = GET_MODE (op0);
 714
 715   /* Now MODE is either some integral mode for a MEM as OP0,
 716      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 717      The bit field is contained entirely within OP0.
 718      BITPOS is the starting bit number within OP0.
 719      (OP0's mode may actually be narrower than MODE.)  */
 720
 721   if (BYTES_BIG_ENDIAN)
 722       /* BITPOS is the distance between our msb
 723          and that of the containing datum.
 724          Convert it to the distance from the lsb.  */
 725       bitpos = total_bits - bitsize - bitpos;
 726
 727   /* Now BITPOS is always the distance between our lsb
 728      and that of OP0.  */
 729
 730   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 731      we must first convert its mode to MODE.  */
 732
 733   if (GET_CODE (value) == CONST_INT)
 734     {
 735       register HOST_WIDE_INT v = INTVAL (value);
 736
 737       if (bitsize < HOST_BITS_PER_WIDE_INT)
 738         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 739
 740       if (v == 0)
 741         all_zero = 1;
 742       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 743                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 744                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 745         all_one = 1;
 746
 747       value = lshift_value (mode, value, bitpos, bitsize);
 748     }
 749   else
 750     {
 751       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 752                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 753
 754       if (GET_MODE (value) != mode)
 755         {
 756           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 757               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 758             value = gen_lowpart (mode, value);
 759           else
 760             value = convert_to_mode (mode, value, 1);
 761         }
 762
 763       if (must_and)
 764         value = expand_binop (mode, and_optab, value,
 765                               mask_rtx (mode, 0, bitsize, 0),
 766                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 767       if (bitpos > 0)
 768         value = expand_shift (LSHIFT_EXPR, mode, value,
 769                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 770     }
 771
 772   /* Now clear the chosen bits in OP0,
 773      except that if VALUE is -1 we need not bother.  */
 774
 775   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 776
 777   if (! all_one)
 778     {
 779       temp = expand_binop (mode, and_optab, op0,
 780                            mask_rtx (mode, bitpos, bitsize, 1),
 781                            subtarget, 1, OPTAB_LIB_WIDEN);
 782       subtarget = temp;
 783     }
 784   else
 785     temp = op0;
 786
 787   /* Now logical-or VALUE into OP0, unless it is zero.  */
 788
 789   if (! all_zero)
 790     temp = expand_binop (mode, ior_optab, temp, value,
 791                          subtarget, 1, OPTAB_LIB_WIDEN);
 792   if (op0 != temp)
 793     emit_move_insn (op0, temp);
 794 }
 795 \f
 796 /* Store a bit field that is split across multiple accessible memory objects.
 797
 798    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 799    BITSIZE is the field width; BITPOS the position of its first bit
 800    (within the word).
 801    VALUE is the value to store.
 802    ALIGN is the known alignment of OP0.
 803    This is also the size of the memory objects to be used.
 804
 805    This does not yet handle fields wider than BITS_PER_WORD.  */
 806
 807 static void
 808 store_split_bit_field (op0, bitsize, bitpos, value, align)
 809      rtx op0;
 810      unsigned HOST_WIDE_INT bitsize, bitpos;
 811      rtx value;
 812      unsigned int align;
 813 {
 814   unsigned int unit;
 815   unsigned int bitsdone = 0;
 816
 817   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 818      much at a time.  */
 819   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 820     unit = BITS_PER_WORD;
 821   else
 822     unit = MIN (align, BITS_PER_WORD);
 823
 824   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 825      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 826      that VALUE might be a floating-point constant.  */
 827   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 828     {
 829       rtx word = gen_lowpart_common (word_mode, value);
 830
 831       if (word && (value != word))
 832         value = word;
 833       else
 834         value = gen_lowpart_common (word_mode,
 835                                     force_reg (GET_MODE (value) != VOIDmode
 836                                                ? GET_MODE (value)
 837                                                : word_mode, value));
 838     }
 839   else if (GET_CODE (value) == ADDRESSOF)
 840     value = copy_to_reg (value);
 841
 842   while (bitsdone < bitsize)
 843     {
 844       unsigned HOST_WIDE_INT thissize;
 845       rtx part, word;
 846       unsigned HOST_WIDE_INT thispos;
 847       unsigned HOST_WIDE_INT offset;
 848
 849       offset = (bitpos + bitsdone) / unit;
 850       thispos = (bitpos + bitsdone) % unit;
 851
 852       /* THISSIZE must not overrun a word boundary.  Otherwise,
 853          store_fixed_bit_field will call us again, and we will mutually
 854          recurse forever.  */
 855       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 856       thissize = MIN (thissize, unit - thispos);
 857
 858       if (BYTES_BIG_ENDIAN)
 859         {
 860           int total_bits;
 861
 862           /* We must do an endian conversion exactly the same way as it is
 863              done in extract_bit_field, so that the two calls to
 864              extract_fixed_bit_field will have comparable arguments.  */
 865           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 866             total_bits = BITS_PER_WORD;
 867           else
 868             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 869
 870           /* Fetch successively less significant portions.  */
 871           if (GET_CODE (value) == CONST_INT)
 872             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 873                              >> (bitsize - bitsdone - thissize))
 874                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 875           else
 876             /* The args are chosen so that the last part includes the
 877                lsb.  Give extract_bit_field the value it needs (with
 878                endianness compensation) to fetch the piece we want.
 879
 880                ??? We have no idea what the alignment of VALUE is, so
 881                we have to use a guess.  */
 882             part
 883               = extract_fixed_bit_field
 884                 (word_mode, value, 0, thissize,
 885                  total_bits - bitsize + bitsdone, NULL_RTX, 1,
 886                  GET_MODE (value) == VOIDmode
 887                  ? UNITS_PER_WORD
 888                  : (GET_MODE (value) == BLKmode
 889                     ? 1 : GET_MODE_ALIGNMENT (GET_MODE (value))));
 890         }
 891       else
 892         {
 893           /* Fetch successively more significant portions.  */
 894           if (GET_CODE (value) == CONST_INT)
 895             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 896                              >> bitsdone)
 897                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 898           else
 899             part
 900               = extract_fixed_bit_field
 901                 (word_mode, value, 0, thissize, bitsdone, NULL_RTX, 1,
 902                  GET_MODE (value) == VOIDmode
 903                  ? UNITS_PER_WORD
 904                  : (GET_MODE (value) == BLKmode
 905                     ? 1 : GET_MODE_ALIGNMENT (GET_MODE (value))));
 906         }
 907
 908       /* If OP0 is a register, then handle OFFSET here.
 909
 910          When handling multiword bitfields, extract_bit_field may pass
 911          down a word_mode SUBREG of a larger REG for a bitfield that actually
 912          crosses a word boundary.  Thus, for a SUBREG, we must find
 913          the current word starting from the base register.  */
 914       if (GET_CODE (op0) == SUBREG)
 915         {
 916           word = operand_subword_force (SUBREG_REG (op0),
 917                                         SUBREG_WORD (op0) + offset,
 918                                         GET_MODE (SUBREG_REG (op0)));
 919           offset = 0;
 920         }
 921       else if (GET_CODE (op0) == REG)
 922         {
 923           word = operand_subword_force (op0, offset, GET_MODE (op0));
 924           offset = 0;
 925         }
 926       else
 927         word = op0;
 928
 929       /* OFFSET is in UNITs, and UNIT is in bits.
 930          store_fixed_bit_field wants offset in bytes.  */
 931       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT,
 932                              thissize, thispos, part, align);
 933       bitsdone += thissize;
 934     }
 935 }
 936 \f
 937 /* Generate code to extract a byte-field from STR_RTX
 938    containing BITSIZE bits, starting at BITNUM,
 939    and put it in TARGET if possible (if TARGET is nonzero).
 940    Regardless of TARGET, we return the rtx for where the value is placed.
 941    It may be a QUEUED.
 942
 943    STR_RTX is the structure containing the byte (a REG or MEM).
 944    UNSIGNEDP is nonzero if this is an unsigned bit field.
 945    MODE is the natural mode of the field value once extracted.
 946    TMODE is the mode the caller would like the value to have;
 947    but the value may be returned with type MODE instead.
 948
 949    ALIGN is the alignment that STR_RTX is known to have.
 950    TOTAL_SIZE is the size in bytes of the containing structure,
 951    or -1 if varying.
 952
 953    If a TARGET is specified and we can store in it at no extra cost,
 954    we do so, and return TARGET.
 955    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
 956    if they are equally easy.  */
 957
 958 rtx
 959 extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 960                    target, mode, tmode, align, total_size)
 961      rtx str_rtx;
 962      unsigned HOST_WIDE_INT bitsize;
 963      unsigned HOST_WIDE_INT bitnum;
 964      int unsignedp;
 965      rtx target;
 966      enum machine_mode mode, tmode;
 967      unsigned int align;
 968      HOST_WIDE_INT total_size;
 969 {
 970   unsigned int unit
 971     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 972   unsigned HOST_WIDE_INT offset = bitnum / unit;
 973   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 974   register rtx op0 = str_rtx;
 975   rtx spec_target = target;
 976   rtx spec_target_subreg = 0;
 977   enum machine_mode int_mode;
 978 #ifdef HAVE_extv
 979   unsigned HOST_WIDE_INT extv_bitsize;
 980   enum machine_mode extv_mode;
 981 #endif
 982 #ifdef HAVE_extzv
 983   unsigned HOST_WIDE_INT extzv_bitsize;
 984   enum machine_mode extzv_mode;
 985 #endif
 986
 987 #ifdef HAVE_extv
 988   extv_mode = insn_data[(int) CODE_FOR_extv].operand[0].mode;
 989   if (extv_mode == VOIDmode)
 990     extv_mode = word_mode;
 991   extv_bitsize = GET_MODE_BITSIZE (extv_mode);
 992 #endif
 993
 994 #ifdef HAVE_extzv
 995   extzv_mode = insn_data[(int) CODE_FOR_extzv].operand[0].mode;
 996   if (extzv_mode == VOIDmode)
 997     extzv_mode = word_mode;
 998   extzv_bitsize = GET_MODE_BITSIZE (extzv_mode);
 999 #endif
1000
1001   /* Discount the part of the structure before the desired byte.
1002      We need to know how many bytes are safe to reference after it.  */
1003   if (total_size >= 0)
1004     total_size -= (bitpos / BIGGEST_ALIGNMENT
1005                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
1006
1007   if (tmode == VOIDmode)
1008     tmode = mode;
1009   while (GET_CODE (op0) == SUBREG)
1010     {
1011       int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
1012       int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
1013
1014       offset += SUBREG_WORD (op0);
1015
1016       inner_size = MIN (inner_size, BITS_PER_WORD);
1017
1018       if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
1019         {
1020           bitpos += inner_size - outer_size;
1021           if (bitpos > unit)
1022             {
1023               offset += (bitpos / unit);
1024               bitpos %= unit;
1025             }
1026         }
1027
1028       op0 = SUBREG_REG (op0);
1029     }
1030
1031   /* Make sure we are playing with integral modes.  Pun with subregs
1032      if we aren't.  */
1033   {
1034     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1035     if (imode != GET_MODE (op0))
1036       {
1037         if (GET_CODE (op0) == MEM)
1038           op0 = change_address (op0, imode, NULL_RTX);
1039         else if (imode != BLKmode)
1040           op0 = gen_lowpart (imode, op0);
1041         else
1042           abort ();
1043       }
1044   }
1045
1046   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1047      If that's wrong, the solution is to test for it and set TARGET to 0
1048      if needed.  */
1049
1050   /* If OP0 is a register, BITPOS must count within a word.
1051      But as we have it, it counts within whatever size OP0 now has.
1052      On a bigendian machine, these are not the same, so convert.  */
1053   if (BYTES_BIG_ENDIAN
1054       && GET_CODE (op0) != MEM
1055       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1056     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1057
1058   /* Extracting a full-word or multi-word value
1059      from a structure in a register or aligned memory.
1060      This can be done with just SUBREG.
1061      So too extracting a subword value in
1062      the least significant part of the register.  */
1063
1064   if (((GET_CODE (op0) != MEM
1065         && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1066                                   GET_MODE_BITSIZE (GET_MODE (op0))))
1067        || (GET_CODE (op0) == MEM
1068            && (! SLOW_UNALIGNED_ACCESS (mode, align)
1069                || (offset * BITS_PER_UNIT % bitsize == 0
1070                    && align % bitsize == 0))))
1071       && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1072            && bitpos % BITS_PER_WORD == 0)
1073           || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
1074               /* ??? The big endian test here is wrong.  This is correct
1075                  if the value is in a register, and if mode_for_size is not
1076                  the same mode as op0.  This causes us to get unnecessarily
1077                  inefficient code from the Thumb port when -mbig-endian.  */
1078               && (BYTES_BIG_ENDIAN
1079                   ? bitpos + bitsize == BITS_PER_WORD
1080                   : bitpos == 0))))
1081     {
1082       enum machine_mode mode1
1083         = mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0);
1084
1085       if (mode1 != GET_MODE (op0))
1086         {
1087           if (GET_CODE (op0) == SUBREG)
1088             {
1089               if (GET_MODE (SUBREG_REG (op0)) == mode1
1090                   || GET_MODE_CLASS (mode1) == MODE_INT
1091                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1092                 op0 = SUBREG_REG (op0);
1093               else
1094                 /* Else we've got some float mode source being extracted into
1095                    a different float mode destination -- this combination of
1096                    subregs results in Severe Tire Damage.  */
1097                 abort ();
1098             }
1099           if (GET_CODE (op0) == REG)
1100             op0 = gen_rtx_SUBREG (mode1, op0, offset);
1101           else
1102             op0 = change_address (op0, mode1,
1103                                   plus_constant (XEXP (op0, 0), offset));
1104         }
1105       if (mode1 != mode)
1106         return convert_to_mode (tmode, op0, unsignedp);
1107       return op0;
1108     }
1109
1110   /* Handle fields bigger than a word.  */
1111
1112   if (bitsize > BITS_PER_WORD)
1113     {
1114       /* Here we transfer the words of the field
1115          in the order least significant first.
1116          This is because the most significant word is the one which may
1117          be less than full.  */
1118
1119       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1120       unsigned int i;
1121
1122       if (target == 0 || GET_CODE (target) != REG)
1123         target = gen_reg_rtx (mode);
1124
1125       /* Indicate for flow that the entire target reg is being set.  */
1126       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1127
1128       for (i = 0; i < nwords; i++)
1129         {
1130           /* If I is 0, use the low-order word in both field and target;
1131              if I is 1, use the next to lowest word; and so on.  */
1132           /* Word number in TARGET to use.  */
1133           unsigned int wordnum
1134             = (WORDS_BIG_ENDIAN
1135                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1136                : i);
1137           /* Offset from start of field in OP0.  */
1138           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1139                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1140                                                 * (int) BITS_PER_WORD))
1141                                      : (int) i * BITS_PER_WORD);
1142           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1143           rtx result_part
1144             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1145                                            bitsize - i * BITS_PER_WORD),
1146                                  bitnum + bit_offset, 1, target_part, mode,
1147                                  word_mode, align, total_size);
1148
1149           if (target_part == 0)
1150             abort ();
1151
1152           if (result_part != target_part)
1153             emit_move_insn (target_part, result_part);
1154         }
1155
1156       if (unsignedp)
1157         {
1158           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1159              need to be zero'd out.  */
1160           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1161             {
1162               unsigned int i, total_words;
1163
1164               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1165               for (i = nwords; i < total_words; i++)
1166                 {
1167                   int wordnum = WORDS_BIG_ENDIAN ? total_words - i - 1 : i;
1168                   rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1169                   emit_move_insn (target_part, const0_rtx);
1170                 }
1171             }
1172           return target;
1173         }
1174
1175       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1176       target = expand_shift (LSHIFT_EXPR, mode, target,
1177                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1178                              NULL_RTX, 0);
1179       return expand_shift (RSHIFT_EXPR, mode, target,
1180                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1181                            NULL_RTX, 0);
1182     }
1183
1184   /* From here on we know the desired field is smaller than a word.  */
1185
1186   /* Check if there is a correspondingly-sized integer field, so we can
1187      safely extract it as one size of integer, if necessary; then
1188      truncate or extend to the size that is wanted; then use SUBREGs or
1189      convert_to_mode to get one of the modes we really wanted.  */
1190
1191   int_mode = int_mode_for_mode (tmode);
1192   if (int_mode == BLKmode)
1193     int_mode = int_mode_for_mode (mode);
1194   if (int_mode == BLKmode)
1195     abort();    /* Should probably push op0 out to memory and then
1196                    do a load.  */
1197
1198   /* OFFSET is the number of words or bytes (UNIT says which)
1199      from STR_RTX to the first word or byte containing part of the field.  */
1200
1201   if (GET_CODE (op0) != MEM)
1202     {
1203       if (offset != 0
1204           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1205         {
1206           if (GET_CODE (op0) != REG)
1207             op0 = copy_to_reg (op0);
1208           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1209                                 op0, offset);
1210         }
1211       offset = 0;
1212     }
1213   else
1214     {
1215       op0 = protect_from_queue (str_rtx, 1);
1216     }
1217
1218   /* Now OFFSET is nonzero only for memory operands.  */
1219
1220   if (unsignedp)
1221     {
1222 #ifdef HAVE_extzv
1223       if (HAVE_extzv
1224           && (extzv_bitsize >= bitsize)
1225           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1226                 && (bitsize + bitpos > extzv_bitsize)))
1227         {
1228           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1229           rtx bitsize_rtx, bitpos_rtx;
1230           rtx last = get_last_insn ();
1231           rtx xop0 = op0;
1232           rtx xtarget = target;
1233           rtx xspec_target = spec_target;
1234           rtx xspec_target_subreg = spec_target_subreg;
1235           rtx pat;
1236           enum machine_mode maxmode;
1237
1238           maxmode = insn_data[(int) CODE_FOR_extzv].operand[0].mode;
1239           if (maxmode == VOIDmode)
1240             maxmode = word_mode;
1241
1242           if (GET_CODE (xop0) == MEM)
1243             {
1244               int save_volatile_ok = volatile_ok;
1245               volatile_ok = 1;
1246
1247               /* Is the memory operand acceptable?  */
1248               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1249                      (xop0, GET_MODE (xop0))))
1250                 {
1251                   /* No, load into a reg and extract from there.  */
1252                   enum machine_mode bestmode;
1253
1254                   /* Get the mode to use for inserting into this field.  If
1255                      OP0 is BLKmode, get the smallest mode consistent with the
1256                      alignment. If OP0 is a non-BLKmode object that is no
1257                      wider than MAXMODE, use its mode. Otherwise, use the
1258                      smallest mode containing the field.  */
1259
1260                   if (GET_MODE (xop0) == BLKmode
1261                       || (GET_MODE_SIZE (GET_MODE (op0))
1262                           > GET_MODE_SIZE (maxmode)))
1263                     bestmode = get_best_mode (bitsize, bitnum, align, maxmode,
1264                                               MEM_VOLATILE_P (xop0));
1265                   else
1266                     bestmode = GET_MODE (xop0);
1267
1268                   if (bestmode == VOIDmode
1269                       || (SLOW_UNALIGNED_ACCESS (bestmode, align)
1270                           && GET_MODE_BITSIZE (bestmode) > align))
1271                     goto extzv_loses;
1272
1273                   /* Compute offset as multiple of this unit,
1274                      counting in bytes.  */
1275                   unit = GET_MODE_BITSIZE (bestmode);
1276                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1277                   xbitpos = bitnum % unit;
1278                   xop0 = change_address (xop0, bestmode,
1279                                          plus_constant (XEXP (xop0, 0),
1280                                                         xoffset));
1281                   /* Fetch it to a register in that size.  */
1282                   xop0 = force_reg (bestmode, xop0);
1283
1284                   /* XBITPOS counts within UNIT, which is what is expected.  */
1285                 }
1286               else
1287                 /* Get ref to first byte containing part of the field.  */
1288                 xop0 = change_address (xop0, byte_mode,
1289                                        plus_constant (XEXP (xop0, 0), xoffset));
1290
1291               volatile_ok = save_volatile_ok;
1292             }
1293
1294           /* If op0 is a register, we need it in MAXMODE (which is usually
1295              SImode). to make it acceptable to the format of extzv.  */
1296           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1297             goto extzv_loses;
1298           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1299             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1300
1301           /* On big-endian machines, we count bits from the most significant.
1302              If the bit field insn does not, we must invert.  */
1303           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1304             xbitpos = unit - bitsize - xbitpos;
1305
1306           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1307           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1308             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1309
1310           unit = GET_MODE_BITSIZE (maxmode);
1311
1312           if (xtarget == 0
1313               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1314             xtarget = xspec_target = gen_reg_rtx (tmode);
1315
1316           if (GET_MODE (xtarget) != maxmode)
1317             {
1318               if (GET_CODE (xtarget) == REG)
1319                 {
1320                   int wider = (GET_MODE_SIZE (maxmode)
1321                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1322                   xtarget = gen_lowpart (maxmode, xtarget);
1323                   if (wider)
1324                     xspec_target_subreg = xtarget;
1325                 }
1326               else
1327                 xtarget = gen_reg_rtx (maxmode);
1328             }
1329
1330           /* If this machine's extzv insists on a register target,
1331              make sure we have one.  */
1332           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1333                  (xtarget, maxmode)))
1334             xtarget = gen_reg_rtx (maxmode);
1335
1336           bitsize_rtx = GEN_INT (bitsize);
1337           bitpos_rtx = GEN_INT (xbitpos);
1338
1339           pat = gen_extzv (protect_from_queue (xtarget, 1),
1340                            xop0, bitsize_rtx, bitpos_rtx);
1341           if (pat)
1342             {
1343               emit_insn (pat);
1344               target = xtarget;
1345               spec_target = xspec_target;
1346               spec_target_subreg = xspec_target_subreg;
1347             }
1348           else
1349             {
1350               delete_insns_since (last);
1351               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1352                                                 bitpos, target, 1, align);
1353             }
1354         }
1355       else
1356         extzv_loses:
1357 #endif
1358       target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1359                                         bitpos, target, 1, align);
1360     }
1361   else
1362     {
1363 #ifdef HAVE_extv
1364       if (HAVE_extv
1365           && (extv_bitsize >= bitsize)
1366           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1367                 && (bitsize + bitpos > extv_bitsize)))
1368         {
1369           int xbitpos = bitpos, xoffset = offset;
1370           rtx bitsize_rtx, bitpos_rtx;
1371           rtx last = get_last_insn ();
1372           rtx xop0 = op0, xtarget = target;
1373           rtx xspec_target = spec_target;
1374           rtx xspec_target_subreg = spec_target_subreg;
1375           rtx pat;
1376           enum machine_mode maxmode;
1377
1378           maxmode = insn_data[(int) CODE_FOR_extv].operand[0].mode;
1379           if (maxmode == VOIDmode)
1380             maxmode = word_mode;
1381
1382           if (GET_CODE (xop0) == MEM)
1383             {
1384               /* Is the memory operand acceptable?  */
1385               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1386                      (xop0, GET_MODE (xop0))))
1387                 {
1388                   /* No, load into a reg and extract from there.  */
1389                   enum machine_mode bestmode;
1390
1391                   /* Get the mode to use for inserting into this field.  If
1392                      OP0 is BLKmode, get the smallest mode consistent with the
1393                      alignment. If OP0 is a non-BLKmode object that is no
1394                      wider than MAXMODE, use its mode. Otherwise, use the
1395                      smallest mode containing the field.  */
1396
1397                   if (GET_MODE (xop0) == BLKmode
1398                       || (GET_MODE_SIZE (GET_MODE (op0))
1399                           > GET_MODE_SIZE (maxmode)))
1400                     bestmode = get_best_mode (bitsize, bitnum, align, maxmode,
1401                                               MEM_VOLATILE_P (xop0));
1402                   else
1403                     bestmode = GET_MODE (xop0);
1404
1405                   if (bestmode == VOIDmode
1406                       || (SLOW_UNALIGNED_ACCESS (bestmode, align)
1407                           && GET_MODE_BITSIZE (bestmode) > align))
1408                     goto extv_loses;
1409
1410                   /* Compute offset as multiple of this unit,
1411                      counting in bytes.  */
1412                   unit = GET_MODE_BITSIZE (bestmode);
1413                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1414                   xbitpos = bitnum % unit;
1415                   xop0 = change_address (xop0, bestmode,
1416                                          plus_constant (XEXP (xop0, 0),
1417                                                         xoffset));
1418                   /* Fetch it to a register in that size.  */
1419                   xop0 = force_reg (bestmode, xop0);
1420
1421                   /* XBITPOS counts within UNIT, which is what is expected.  */
1422                 }
1423               else
1424                 /* Get ref to first byte containing part of the field.  */
1425                 xop0 = change_address (xop0, byte_mode,
1426                                        plus_constant (XEXP (xop0, 0), xoffset));
1427             }
1428
1429           /* If op0 is a register, we need it in MAXMODE (which is usually
1430              SImode) to make it acceptable to the format of extv.  */
1431           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1432             goto extv_loses;
1433           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1434             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1435
1436           /* On big-endian machines, we count bits from the most significant.
1437              If the bit field insn does not, we must invert.  */
1438           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1439             xbitpos = unit - bitsize - xbitpos;
1440
1441           /* XBITPOS counts within a size of UNIT.
1442              Adjust to count within a size of MAXMODE.  */
1443           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1444             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1445
1446           unit = GET_MODE_BITSIZE (maxmode);
1447
1448           if (xtarget == 0
1449               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1450             xtarget = xspec_target = gen_reg_rtx (tmode);
1451
1452           if (GET_MODE (xtarget) != maxmode)
1453             {
1454               if (GET_CODE (xtarget) == REG)
1455                 {
1456                   int wider = (GET_MODE_SIZE (maxmode)
1457                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1458                   xtarget = gen_lowpart (maxmode, xtarget);
1459                   if (wider)
1460                     xspec_target_subreg = xtarget;
1461                 }
1462               else
1463                 xtarget = gen_reg_rtx (maxmode);
1464             }
1465
1466           /* If this machine's extv insists on a register target,
1467              make sure we have one.  */
1468           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1469                  (xtarget, maxmode)))
1470             xtarget = gen_reg_rtx (maxmode);
1471
1472           bitsize_rtx = GEN_INT (bitsize);
1473           bitpos_rtx = GEN_INT (xbitpos);
1474
1475           pat = gen_extv (protect_from_queue (xtarget, 1),
1476                           xop0, bitsize_rtx, bitpos_rtx);
1477           if (pat)
1478             {
1479               emit_insn (pat);
1480               target = xtarget;
1481               spec_target = xspec_target;
1482               spec_target_subreg = xspec_target_subreg;
1483             }
1484           else
1485             {
1486               delete_insns_since (last);
1487               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1488                                                 bitpos, target, 0, align);
1489             }
1490         }
1491       else
1492         extv_loses:
1493 #endif
1494       target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1495                                         bitpos, target, 0, align);
1496     }
1497   if (target == spec_target)
1498     return target;
1499   if (target == spec_target_subreg)
1500     return spec_target;
1501   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1502     {
1503       /* If the target mode is floating-point, first convert to the
1504          integer mode of that size and then access it as a floating-point
1505          value via a SUBREG.  */
1506       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1507         {
1508           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1509                                                    MODE_INT, 0),
1510                                     target, unsignedp);
1511           if (GET_CODE (target) != REG)
1512             target = copy_to_reg (target);
1513           return gen_rtx_SUBREG (tmode, target, 0);
1514         }
1515       else
1516         return convert_to_mode (tmode, target, unsignedp);
1517     }
1518   return target;
1519 }
1520 \f
1521 /* Extract a bit field using shifts and boolean operations
1522    Returns an rtx to represent the value.
1523    OP0 addresses a register (word) or memory (byte).
1524    BITPOS says which bit within the word or byte the bit field starts in.
1525    OFFSET says how many bytes farther the bit field starts;
1526     it is 0 if OP0 is a register.
1527    BITSIZE says how many bits long the bit field is.
1528     (If OP0 is a register, it may be narrower than a full word,
1529      but BITPOS still counts within a full word,
1530      which is significant on bigendian machines.)
1531
1532    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1533    If TARGET is nonzero, attempts to store the value there
1534    and return TARGET, but this is not guaranteed.
1535    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.
1536
1537    ALIGN is the alignment that STR_RTX is known to have.  */
1538
1539 static rtx
1540 extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1541                          target, unsignedp, align)
1542      enum machine_mode tmode;
1543      register rtx op0, target;
1544      unsigned HOST_WIDE_INT offset, bitsize, bitpos;
1545      int unsignedp;
1546      unsigned int align;
1547 {
1548   unsigned int total_bits = BITS_PER_WORD;
1549   enum machine_mode mode;
1550
1551   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1552     {
1553       /* Special treatment for a bit field split across two registers.  */
1554       if (bitsize + bitpos > BITS_PER_WORD)
1555         return extract_split_bit_field (op0, bitsize, bitpos,
1556                                         unsignedp, align);
1557     }
1558   else
1559     {
1560       /* Get the proper mode to use for this field.  We want a mode that
1561          includes the entire field.  If such a mode would be larger than
1562          a word, we won't be doing the extraction the normal way.  */
1563
1564       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, align,
1565                             word_mode,
1566                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
1567
1568       if (mode == VOIDmode)
1569         /* The only way this should occur is if the field spans word
1570            boundaries.  */
1571         return extract_split_bit_field (op0, bitsize,
1572                                         bitpos + offset * BITS_PER_UNIT,
1573                                         unsignedp, align);
1574
1575       total_bits = GET_MODE_BITSIZE (mode);
1576
1577       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1578          be in the range 0 to total_bits-1, and put any excess bytes in
1579          OFFSET.  */
1580       if (bitpos >= total_bits)
1581         {
1582           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1583           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1584                      * BITS_PER_UNIT);
1585         }
1586
1587       /* Get ref to an aligned byte, halfword, or word containing the field.
1588          Adjust BITPOS to be position within a word,
1589          and OFFSET to be the offset of that word.
1590          Then alter OP0 to refer to that word.  */
1591       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1592       offset -= (offset % (total_bits / BITS_PER_UNIT));
1593       op0 = change_address (op0, mode,
1594                             plus_constant (XEXP (op0, 0), offset));
1595     }
1596
1597   mode = GET_MODE (op0);
1598
1599   if (BYTES_BIG_ENDIAN)
1600     {
1601       /* BITPOS is the distance between our msb and that of OP0.
1602          Convert it to the distance from the lsb.  */
1603
1604       bitpos = total_bits - bitsize - bitpos;
1605     }
1606
1607   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1608      We have reduced the big-endian case to the little-endian case.  */
1609
1610   if (unsignedp)
1611     {
1612       if (bitpos)
1613         {
1614           /* If the field does not already start at the lsb,
1615              shift it so it does.  */
1616           tree amount = build_int_2 (bitpos, 0);
1617           /* Maybe propagate the target for the shift.  */
1618           /* But not if we will return it--could confuse integrate.c.  */
1619           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1620                            && !REG_FUNCTION_VALUE_P (target)
1621                            ? target : 0);
1622           if (tmode != mode) subtarget = 0;
1623           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1624         }
1625       /* Convert the value to the desired mode.  */
1626       if (mode != tmode)
1627         op0 = convert_to_mode (tmode, op0, 1);
1628
1629       /* Unless the msb of the field used to be the msb when we shifted,
1630          mask out the upper bits.  */
1631
1632       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize
1633 #if 0
1634 #ifdef SLOW_ZERO_EXTEND
1635           /* Always generate an `and' if
1636              we just zero-extended op0 and SLOW_ZERO_EXTEND, since it
1637              will combine fruitfully with the zero-extend.  */
1638           || tmode != mode
1639 #endif
1640 #endif
1641           )
1642         return expand_binop (GET_MODE (op0), and_optab, op0,
1643                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1644                              target, 1, OPTAB_LIB_WIDEN);
1645       return op0;
1646     }
1647
1648   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1649      then arithmetic-shift its lsb to the lsb of the word.  */
1650   op0 = force_reg (mode, op0);
1651   if (mode != tmode)
1652     target = 0;
1653
1654   /* Find the narrowest integer mode that contains the field.  */
1655
1656   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1657        mode = GET_MODE_WIDER_MODE (mode))
1658     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1659       {
1660         op0 = convert_to_mode (mode, op0, 0);
1661         break;
1662       }
1663
1664   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1665     {
1666       tree amount = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1667       /* Maybe propagate the target for the shift.  */
1668       /* But not if we will return the result--could confuse integrate.c.  */
1669       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1670                        && ! REG_FUNCTION_VALUE_P (target)
1671                        ? target : 0);
1672       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1673     }
1674
1675   return expand_shift (RSHIFT_EXPR, mode, op0,
1676                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1677                        target, 0);
1678 }
1679 \f
1680 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1681    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1682    complement of that if COMPLEMENT.  The mask is truncated if
1683    necessary to the width of mode MODE.  The mask is zero-extended if
1684    BITSIZE+BITPOS is too small for MODE.  */
1685
1686 static rtx
1687 mask_rtx (mode, bitpos, bitsize, complement)
1688      enum machine_mode mode;
1689      int bitpos, bitsize, complement;
1690 {
1691   HOST_WIDE_INT masklow, maskhigh;
1692
1693   if (bitpos < HOST_BITS_PER_WIDE_INT)
1694     masklow = (HOST_WIDE_INT) -1 << bitpos;
1695   else
1696     masklow = 0;
1697
1698   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1699     masklow &= ((unsigned HOST_WIDE_INT) -1
1700                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1701
1702   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1703     maskhigh = -1;
1704   else
1705     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1706
1707   if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1708     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1709                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1710   else
1711     maskhigh = 0;
1712
1713   if (complement)
1714     {
1715       maskhigh = ~maskhigh;
1716       masklow = ~masklow;
1717     }
1718
1719   return immed_double_const (masklow, maskhigh, mode);
1720 }
1721
1722 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1723    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1724
1725 static rtx
1726 lshift_value (mode, value, bitpos, bitsize)
1727      enum machine_mode mode;
1728      rtx value;
1729      int bitpos, bitsize;
1730 {
1731   unsigned HOST_WIDE_INT v = INTVAL (value);
1732   HOST_WIDE_INT low, high;
1733
1734   if (bitsize < HOST_BITS_PER_WIDE_INT)
1735     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1736
1737   if (bitpos < HOST_BITS_PER_WIDE_INT)
1738     {
1739       low = v << bitpos;
1740       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1741     }
1742   else
1743     {
1744       low = 0;
1745       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1746     }
1747
1748   return immed_double_const (low, high, mode);
1749 }
1750 \f
1751 /* Extract a bit field that is split across two words
1752    and return an RTX for the result.
1753
1754    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1755    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1756    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
1757
1758    ALIGN is the known alignment of OP0.  This is also the size of the
1759    memory objects to be used.  */
1760
1761 static rtx
1762 extract_split_bit_field (op0, bitsize, bitpos, unsignedp, align)
1763      rtx op0;
1764      unsigned HOST_WIDE_INT bitsize, bitpos;
1765      int unsignedp;
1766      unsigned int align;
1767 {
1768   unsigned int unit;
1769   unsigned int bitsdone = 0;
1770   rtx result = NULL_RTX;
1771   int first = 1;
1772
1773   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1774      much at a time.  */
1775   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1776     unit = BITS_PER_WORD;
1777   else
1778     unit = MIN (align, BITS_PER_WORD);
1779
1780   while (bitsdone < bitsize)
1781     {
1782       unsigned HOST_WIDE_INT thissize;
1783       rtx part, word;
1784       unsigned HOST_WIDE_INT thispos;
1785       unsigned HOST_WIDE_INT offset;
1786
1787       offset = (bitpos + bitsdone) / unit;
1788       thispos = (bitpos + bitsdone) % unit;
1789
1790       /* THISSIZE must not overrun a word boundary.  Otherwise,
1791          extract_fixed_bit_field will call us again, and we will mutually
1792          recurse forever.  */
1793       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1794       thissize = MIN (thissize, unit - thispos);
1795
1796       /* If OP0 is a register, then handle OFFSET here.
1797
1798          When handling multiword bitfields, extract_bit_field may pass
1799          down a word_mode SUBREG of a larger REG for a bitfield that actually
1800          crosses a word boundary.  Thus, for a SUBREG, we must find
1801          the current word starting from the base register.  */
1802       if (GET_CODE (op0) == SUBREG)
1803         {
1804           word = operand_subword_force (SUBREG_REG (op0),
1805                                         SUBREG_WORD (op0) + offset,
1806                                         GET_MODE (SUBREG_REG (op0)));
1807           offset = 0;
1808         }
1809       else if (GET_CODE (op0) == REG)
1810         {
1811           word = operand_subword_force (op0, offset, GET_MODE (op0));
1812           offset = 0;
1813         }
1814       else
1815         word = op0;
1816
1817       /* Extract the parts in bit-counting order,
1818          whose meaning is determined by BYTES_PER_UNIT.
1819          OFFSET is in UNITs, and UNIT is in bits.
1820          extract_fixed_bit_field wants offset in bytes.  */
1821       part = extract_fixed_bit_field (word_mode, word,
1822                                       offset * unit / BITS_PER_UNIT,
1823                                       thissize, thispos, 0, 1, align);
1824       bitsdone += thissize;
1825
1826       /* Shift this part into place for the result.  */
1827       if (BYTES_BIG_ENDIAN)
1828         {
1829           if (bitsize != bitsdone)
1830             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1831                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1832         }
1833       else
1834         {
1835           if (bitsdone != thissize)
1836             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1837                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1838         }
1839
1840       if (first)
1841         result = part;
1842       else
1843         /* Combine the parts with bitwise or.  This works
1844            because we extracted each part as an unsigned bit field.  */
1845         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1846                                OPTAB_LIB_WIDEN);
1847
1848       first = 0;
1849     }
1850
1851   /* Unsigned bit field: we are done.  */
1852   if (unsignedp)
1853     return result;
1854   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1855   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1856                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1857                          NULL_RTX, 0);
1858   return expand_shift (RSHIFT_EXPR, word_mode, result,
1859                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1860 }
1861 \f
1862 /* Add INC into TARGET.  */
1863
1864 void
1865 expand_inc (target, inc)
1866      rtx target, inc;
1867 {
1868   rtx value = expand_binop (GET_MODE (target), add_optab,
1869                             target, inc,
1870                             target, 0, OPTAB_LIB_WIDEN);
1871   if (value != target)
1872     emit_move_insn (target, value);
1873 }
1874
1875 /* Subtract DEC from TARGET.  */
1876
1877 void
1878 expand_dec (target, dec)
1879      rtx target, dec;
1880 {
1881   rtx value = expand_binop (GET_MODE (target), sub_optab,
1882                             target, dec,
1883                             target, 0, OPTAB_LIB_WIDEN);
1884   if (value != target)
1885     emit_move_insn (target, value);
1886 }
1887 \f
1888 /* Output a shift instruction for expression code CODE,
1889    with SHIFTED being the rtx for the value to shift,
1890    and AMOUNT the tree for the amount to shift by.
1891    Store the result in the rtx TARGET, if that is convenient.
1892    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1893    Return the rtx for where the value is.  */
1894
1895 rtx
1896 expand_shift (code, mode, shifted, amount, target, unsignedp)
1897      enum tree_code code;
1898      register enum machine_mode mode;
1899      rtx shifted;
1900      tree amount;
1901      register rtx target;
1902      int unsignedp;
1903 {
1904   register rtx op1, temp = 0;
1905   register int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1906   register int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1907   int try;
1908
1909   /* Previously detected shift-counts computed by NEGATE_EXPR
1910      and shifted in the other direction; but that does not work
1911      on all machines.  */
1912
1913   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1914
1915 #ifdef SHIFT_COUNT_TRUNCATED
1916   if (SHIFT_COUNT_TRUNCATED)
1917     {
1918       if (GET_CODE (op1) == CONST_INT
1919           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
1920               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
1921         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1922                        % GET_MODE_BITSIZE (mode));
1923       else if (GET_CODE (op1) == SUBREG
1924                && SUBREG_WORD (op1) == 0)
1925         op1 = SUBREG_REG (op1);
1926     }
1927 #endif
1928
1929   if (op1 == const0_rtx)
1930     return shifted;
1931
1932   for (try = 0; temp == 0 && try < 3; try++)
1933     {
1934       enum optab_methods methods;
1935
1936       if (try == 0)
1937         methods = OPTAB_DIRECT;
1938       else if (try == 1)
1939         methods = OPTAB_WIDEN;
1940       else
1941         methods = OPTAB_LIB_WIDEN;
1942
1943       if (rotate)
1944         {
1945           /* Widening does not work for rotation.  */
1946           if (methods == OPTAB_WIDEN)
1947             continue;
1948           else if (methods == OPTAB_LIB_WIDEN)
1949             {
1950               /* If we have been unable to open-code this by a rotation,
1951                  do it as the IOR of two shifts.  I.e., to rotate A
1952                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1953                  where C is the bitsize of A.
1954
1955                  It is theoretically possible that the target machine might
1956                  not be able to perform either shift and hence we would
1957                  be making two libcalls rather than just the one for the
1958                  shift (similarly if IOR could not be done).  We will allow
1959                  this extremely unlikely lossage to avoid complicating the
1960                  code below.  */
1961
1962               rtx subtarget = target == shifted ? 0 : target;
1963               rtx temp1;
1964               tree type = TREE_TYPE (amount);
1965               tree new_amount = make_tree (type, op1);
1966               tree other_amount
1967                 = fold (build (MINUS_EXPR, type,
1968                                convert (type,
1969                                         build_int_2 (GET_MODE_BITSIZE (mode),
1970                                                      0)),
1971                                amount));
1972
1973               shifted = force_reg (mode, shifted);
1974
1975               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1976                                    mode, shifted, new_amount, subtarget, 1);
1977               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1978                                     mode, shifted, other_amount, 0, 1);
1979               return expand_binop (mode, ior_optab, temp, temp1, target,
1980                                    unsignedp, methods);
1981             }
1982
1983           temp = expand_binop (mode,
1984                                left ? rotl_optab : rotr_optab,
1985                                shifted, op1, target, unsignedp, methods);
1986
1987           /* If we don't have the rotate, but we are rotating by a constant
1988              that is in range, try a rotate in the opposite direction.  */
1989
1990           if (temp == 0 && GET_CODE (op1) == CONST_INT
1991               && INTVAL (op1) > 0 && INTVAL (op1) < GET_MODE_BITSIZE (mode))
1992             temp = expand_binop (mode,
1993                                  left ? rotr_optab : rotl_optab,
1994                                  shifted,
1995                                  GEN_INT (GET_MODE_BITSIZE (mode)
1996                                           - INTVAL (op1)),
1997                                  target, unsignedp, methods);
1998         }
1999       else if (unsignedp)
2000         temp = expand_binop (mode,
2001                              left ? ashl_optab : lshr_optab,
2002                              shifted, op1, target, unsignedp, methods);
2003
2004       /* Do arithmetic shifts.
2005          Also, if we are going to widen the operand, we can just as well
2006          use an arithmetic right-shift instead of a logical one.  */
2007       if (temp == 0 && ! rotate
2008           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2009         {
2010           enum optab_methods methods1 = methods;
2011
2012           /* If trying to widen a log shift to an arithmetic shift,
2013              don't accept an arithmetic shift of the same size.  */
2014           if (unsignedp)
2015             methods1 = OPTAB_MUST_WIDEN;
2016
2017           /* Arithmetic shift */
2018
2019           temp = expand_binop (mode,
2020                                left ? ashl_optab : ashr_optab,
2021                                shifted, op1, target, unsignedp, methods1);
2022         }
2023
2024       /* We used to try extzv here for logical right shifts, but that was
2025          only useful for one machine, the VAX, and caused poor code
2026          generation there for lshrdi3, so the code was deleted and a
2027          define_expand for lshrsi3 was added to vax.md.  */
2028     }
2029
2030   if (temp == 0)
2031     abort ();
2032   return temp;
2033 }
2034 \f
2035 enum alg_code { alg_zero, alg_m, alg_shift,
2036                   alg_add_t_m2, alg_sub_t_m2,
2037                   alg_add_factor, alg_sub_factor,
2038                   alg_add_t2_m, alg_sub_t2_m,
2039                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2040
2041 /* This structure records a sequence of operations.
2042    `ops' is the number of operations recorded.
2043    `cost' is their total cost.
2044    The operations are stored in `op' and the corresponding
2045    logarithms of the integer coefficients in `log'.
2046
2047    These are the operations:
2048    alg_zero             total := 0;
2049    alg_m                total := multiplicand;
2050    alg_shift            total := total * coeff
2051    alg_add_t_m2         total := total + multiplicand * coeff;
2052    alg_sub_t_m2         total := total - multiplicand * coeff;
2053    alg_add_factor       total := total * coeff + total;
2054    alg_sub_factor       total := total * coeff - total;
2055    alg_add_t2_m         total := total * coeff + multiplicand;
2056    alg_sub_t2_m         total := total * coeff - multiplicand;
2057
2058    The first operand must be either alg_zero or alg_m.  */
2059
2060 struct algorithm
2061 {
2062   short cost;
2063   short ops;
2064   /* The size of the OP and LOG fields are not directly related to the
2065      word size, but the worst-case algorithms will be if we have few
2066      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2067      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2068      in total wordsize operations.  */
2069   enum alg_code op[MAX_BITS_PER_WORD];
2070   char log[MAX_BITS_PER_WORD];
2071 };
2072
2073 static void synth_mult                  PARAMS ((struct algorithm *,
2074                                                  unsigned HOST_WIDE_INT,
2075                                                  int));
2076 static unsigned HOST_WIDE_INT choose_multiplier PARAMS ((unsigned HOST_WIDE_INT,
2077                                                          int, int,
2078                                                          unsigned HOST_WIDE_INT *,
2079                                                          int *, int *));
2080 static unsigned HOST_WIDE_INT invert_mod2n      PARAMS ((unsigned HOST_WIDE_INT,
2081                                                          int));
2082 /* Compute and return the best algorithm for multiplying by T.
2083    The algorithm must cost less than cost_limit
2084    If retval.cost >= COST_LIMIT, no algorithm was found and all
2085    other field of the returned struct are undefined.  */
2086
2087 static void
2088 synth_mult (alg_out, t, cost_limit)
2089      struct algorithm *alg_out;
2090      unsigned HOST_WIDE_INT t;
2091      int cost_limit;
2092 {
2093   int m;
2094   struct algorithm *alg_in, *best_alg;
2095   int cost;
2096   unsigned HOST_WIDE_INT q;
2097
2098   /* Indicate that no algorithm is yet found.  If no algorithm
2099      is found, this value will be returned and indicate failure.  */
2100   alg_out->cost = cost_limit;
2101
2102   if (cost_limit <= 0)
2103     return;
2104
2105   /* t == 1 can be done in zero cost.  */
2106   if (t == 1)
2107     {
2108       alg_out->ops = 1;
2109       alg_out->cost = 0;
2110       alg_out->op[0] = alg_m;
2111       return;
2112     }
2113
2114   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2115      fail now.  */
2116   if (t == 0)
2117     {
2118       if (zero_cost >= cost_limit)
2119         return;
2120       else
2121         {
2122           alg_out->ops = 1;
2123           alg_out->cost = zero_cost;
2124           alg_out->op[0] = alg_zero;
2125           return;
2126         }
2127     }
2128
2129   /* We'll be needing a couple extra algorithm structures now.  */
2130
2131   alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
2132   best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
2133
2134   /* If we have a group of zero bits at the low-order part of T, try
2135      multiplying by the remaining bits and then doing a shift.  */
2136
2137   if ((t & 1) == 0)
2138     {
2139       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2140       q = t >> m;
2141       cost = shift_cost[m];
2142       synth_mult (alg_in, q, cost_limit - cost);
2143
2144       cost += alg_in->cost;
2145       if (cost < cost_limit)
2146         {
2147           struct algorithm *x;
2148           x = alg_in, alg_in = best_alg, best_alg = x;
2149           best_alg->log[best_alg->ops] = m;
2150           best_alg->op[best_alg->ops] = alg_shift;
2151           cost_limit = cost;
2152         }
2153     }
2154
2155   /* If we have an odd number, add or subtract one.  */
2156   if ((t & 1) != 0)
2157     {
2158       unsigned HOST_WIDE_INT w;
2159
2160       for (w = 1; (w & t) != 0; w <<= 1)
2161         ;
2162       /* If T was -1, then W will be zero after the loop.  This is another
2163          case where T ends with ...111.  Handling this with (T + 1) and
2164          subtract 1 produces slightly better code and results in algorithm
2165          selection much faster than treating it like the ...0111 case
2166          below.  */
2167       if (w == 0
2168           || (w > 2
2169               /* Reject the case where t is 3.
2170                  Thus we prefer addition in that case.  */
2171               && t != 3))
2172         {
2173           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2174
2175           cost = add_cost;
2176           synth_mult (alg_in, t + 1, cost_limit - cost);
2177
2178           cost += alg_in->cost;
2179           if (cost < cost_limit)
2180             {
2181               struct algorithm *x;
2182               x = alg_in, alg_in = best_alg, best_alg = x;
2183               best_alg->log[best_alg->ops] = 0;
2184               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2185               cost_limit = cost;
2186             }
2187         }
2188       else
2189         {
2190           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2191
2192           cost = add_cost;
2193           synth_mult (alg_in, t - 1, cost_limit - cost);
2194
2195           cost += alg_in->cost;
2196           if (cost < cost_limit)
2197             {
2198               struct algorithm *x;
2199               x = alg_in, alg_in = best_alg, best_alg = x;
2200               best_alg->log[best_alg->ops] = 0;
2201               best_alg->op[best_alg->ops] = alg_add_t_m2;
2202               cost_limit = cost;
2203             }
2204         }
2205     }
2206
2207   /* Look for factors of t of the form
2208      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2209      If we find such a factor, we can multiply by t using an algorithm that
2210      multiplies by q, shift the result by m and add/subtract it to itself.
2211
2212      We search for large factors first and loop down, even if large factors
2213      are less probable than small; if we find a large factor we will find a
2214      good sequence quickly, and therefore be able to prune (by decreasing
2215      COST_LIMIT) the search.  */
2216
2217   for (m = floor_log2 (t - 1); m >= 2; m--)
2218     {
2219       unsigned HOST_WIDE_INT d;
2220
2221       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2222       if (t % d == 0 && t > d)
2223         {
2224           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2225           synth_mult (alg_in, t / d, cost_limit - cost);
2226
2227           cost += alg_in->cost;
2228           if (cost < cost_limit)
2229             {
2230               struct algorithm *x;
2231               x = alg_in, alg_in = best_alg, best_alg = x;
2232               best_alg->log[best_alg->ops] = m;
2233               best_alg->op[best_alg->ops] = alg_add_factor;
2234               cost_limit = cost;
2235             }
2236           /* Other factors will have been taken care of in the recursion.  */
2237           break;
2238         }
2239
2240       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2241       if (t % d == 0 && t > d)
2242         {
2243           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2244           synth_mult (alg_in, t / d, cost_limit - cost);
2245
2246           cost += alg_in->cost;
2247           if (cost < cost_limit)
2248             {
2249               struct algorithm *x;
2250               x = alg_in, alg_in = best_alg, best_alg = x;
2251               best_alg->log[best_alg->ops] = m;
2252               best_alg->op[best_alg->ops] = alg_sub_factor;
2253               cost_limit = cost;
2254             }
2255           break;
2256         }
2257     }
2258
2259   /* Try shift-and-add (load effective address) instructions,
2260      i.e. do a*3, a*5, a*9.  */
2261   if ((t & 1) != 0)
2262     {
2263       q = t - 1;
2264       q = q & -q;
2265       m = exact_log2 (q);
2266       if (m >= 0)
2267         {
2268           cost = shiftadd_cost[m];
2269           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2270
2271           cost += alg_in->cost;
2272           if (cost < cost_limit)
2273             {
2274               struct algorithm *x;
2275               x = alg_in, alg_in = best_alg, best_alg = x;
2276               best_alg->log[best_alg->ops] = m;
2277               best_alg->op[best_alg->ops] = alg_add_t2_m;
2278               cost_limit = cost;
2279             }
2280         }
2281
2282       q = t + 1;
2283       q = q & -q;
2284       m = exact_log2 (q);
2285       if (m >= 0)
2286         {
2287           cost = shiftsub_cost[m];
2288           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2289
2290           cost += alg_in->cost;
2291           if (cost < cost_limit)
2292             {
2293               struct algorithm *x;
2294               x = alg_in, alg_in = best_alg, best_alg = x;
2295               best_alg->log[best_alg->ops] = m;
2296               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2297               cost_limit = cost;
2298             }
2299         }
2300     }
2301
2302   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2303      we have not found any algorithm.  */
2304   if (cost_limit == alg_out->cost)
2305     return;
2306
2307   /* If we are getting a too long sequence for `struct algorithm'
2308      to record, make this search fail.  */
2309   if (best_alg->ops == MAX_BITS_PER_WORD)
2310     return;
2311
2312   /* Copy the algorithm from temporary space to the space at alg_out.
2313      We avoid using structure assignment because the majority of
2314      best_alg is normally undefined, and this is a critical function.  */
2315   alg_out->ops = best_alg->ops + 1;
2316   alg_out->cost = cost_limit;
2317   bcopy ((char *) best_alg->op, (char *) alg_out->op,
2318          alg_out->ops * sizeof *alg_out->op);
2319   bcopy ((char *) best_alg->log, (char *) alg_out->log,
2320          alg_out->ops * sizeof *alg_out->log);
2321 }
2322 \f
2323 /* Perform a multiplication and return an rtx for the result.
2324    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2325    TARGET is a suggestion for where to store the result (an rtx).
2326
2327    We check specially for a constant integer as OP1.
2328    If you want this check for OP0 as well, then before calling
2329    you should swap the two operands if OP0 would be constant.  */
2330
2331 rtx
2332 expand_mult (mode, op0, op1, target, unsignedp)
2333      enum machine_mode mode;
2334      register rtx op0, op1, target;
2335      int unsignedp;
2336 {
2337   rtx const_op1 = op1;
2338
2339   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2340      less than or equal in size to `unsigned int' this doesn't matter.
2341      If the mode is larger than `unsigned int', then synth_mult works only
2342      if the constant value exactly fits in an `unsigned int' without any
2343      truncation.  This means that multiplying by negative values does
2344      not work; results are off by 2^32 on a 32 bit machine.  */
2345
2346   /* If we are multiplying in DImode, it may still be a win
2347      to try to work with shifts and adds.  */
2348   if (GET_CODE (op1) == CONST_DOUBLE
2349       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2350       && HOST_BITS_PER_INT >= BITS_PER_WORD
2351       && CONST_DOUBLE_HIGH (op1) == 0)
2352     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2353   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2354            && GET_CODE (op1) == CONST_INT
2355            && INTVAL (op1) < 0)
2356     const_op1 = 0;
2357
2358   /* We used to test optimize here, on the grounds that it's better to
2359      produce a smaller program when -O is not used.
2360      But this causes such a terrible slowdown sometimes
2361      that it seems better to use synth_mult always.  */
2362
2363   if (const_op1 && GET_CODE (const_op1) == CONST_INT)
2364     {
2365       struct algorithm alg;
2366       struct algorithm alg2;
2367       HOST_WIDE_INT val = INTVAL (op1);
2368       HOST_WIDE_INT val_so_far;
2369       rtx insn;
2370       int mult_cost;
2371       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2372
2373       /* Try to do the computation three ways: multiply by the negative of OP1
2374          and then negate, do the multiplication directly, or do multiplication
2375          by OP1 - 1.  */
2376
2377       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2378       mult_cost = MIN (12 * add_cost, mult_cost);
2379
2380       synth_mult (&alg, val, mult_cost);
2381
2382       /* This works only if the inverted value actually fits in an
2383          `unsigned int' */
2384       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2385         {
2386           synth_mult (&alg2, - val,
2387                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2388           if (alg2.cost + negate_cost < alg.cost)
2389             alg = alg2, variant = negate_variant;
2390         }
2391
2392       /* This proves very useful for division-by-constant.  */
2393       synth_mult (&alg2, val - 1,
2394                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2395       if (alg2.cost + add_cost < alg.cost)
2396         alg = alg2, variant = add_variant;
2397
2398       if (alg.cost < mult_cost)
2399         {
2400           /* We found something cheaper than a multiply insn.  */
2401           int opno;
2402           rtx accum, tem;
2403
2404           op0 = protect_from_queue (op0, 0);
2405
2406           /* Avoid referencing memory over and over.
2407              For speed, but also for correctness when mem is volatile.  */
2408           if (GET_CODE (op0) == MEM)
2409             op0 = force_reg (mode, op0);
2410
2411           /* ACCUM starts out either as OP0 or as a zero, depending on
2412              the first operation.  */
2413
2414           if (alg.op[0] == alg_zero)
2415             {
2416               accum = copy_to_mode_reg (mode, const0_rtx);
2417               val_so_far = 0;
2418             }
2419           else if (alg.op[0] == alg_m)
2420             {
2421               accum = copy_to_mode_reg (mode, op0);
2422               val_so_far = 1;
2423             }
2424           else
2425             abort ();
2426
2427           for (opno = 1; opno < alg.ops; opno++)
2428             {
2429               int log = alg.log[opno];
2430               int preserve = preserve_subexpressions_p ();
2431               rtx shift_subtarget = preserve ? 0 : accum;
2432               rtx add_target
2433                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2434                    && ! preserve)
2435                   ? target : 0;
2436               rtx accum_target = preserve ? 0 : accum;
2437
2438               switch (alg.op[opno])
2439                 {
2440                 case alg_shift:
2441                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2442                                         build_int_2 (log, 0), NULL_RTX, 0);
2443                   val_so_far <<= log;
2444                   break;
2445
2446                 case alg_add_t_m2:
2447                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2448                                       build_int_2 (log, 0), NULL_RTX, 0);
2449                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2450                                          add_target
2451                                          ? add_target : accum_target);
2452                   val_so_far += (HOST_WIDE_INT) 1 << log;
2453                   break;
2454
2455                 case alg_sub_t_m2:
2456                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2457                                       build_int_2 (log, 0), NULL_RTX, 0);
2458                   accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2459                                          add_target
2460                                          ? add_target : accum_target);
2461                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2462                   break;
2463
2464                 case alg_add_t2_m:
2465                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2466                                         build_int_2 (log, 0), shift_subtarget,
2467                                         0);
2468                   accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2469                                          add_target
2470                                          ? add_target : accum_target);
2471                   val_so_far = (val_so_far << log) + 1;
2472                   break;
2473
2474                 case alg_sub_t2_m:
2475                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2476                                         build_int_2 (log, 0), shift_subtarget,
2477                                         0);
2478                   accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2479                                          add_target
2480                                          ? add_target : accum_target);
2481                   val_so_far = (val_so_far << log) - 1;
2482                   break;
2483
2484                 case alg_add_factor:
2485                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2486                                       build_int_2 (log, 0), NULL_RTX, 0);
2487                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2488                                          add_target
2489                                          ? add_target : accum_target);
2490                   val_so_far += val_so_far << log;
2491                   break;
2492
2493                 case alg_sub_factor:
2494                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2495                                       build_int_2 (log, 0), NULL_RTX, 0);
2496                   accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2497                                          (add_target ? add_target
2498                                           : preserve ? 0 : tem));
2499                   val_so_far = (val_so_far << log) - val_so_far;
2500                   break;
2501
2502                 default:
2503                   abort ();
2504                 }
2505
2506               /* Write a REG_EQUAL note on the last insn so that we can cse
2507                  multiplication sequences.  */
2508
2509               insn = get_last_insn ();
2510               set_unique_reg_note (insn,
2511                                    REG_EQUAL,
2512                                    gen_rtx_MULT (mode, op0,
2513                                                  GEN_INT (val_so_far)));
2514             }
2515
2516           if (variant == negate_variant)
2517             {
2518               val_so_far = - val_so_far;
2519               accum = expand_unop (mode, neg_optab, accum, target, 0);
2520             }
2521           else if (variant == add_variant)
2522             {
2523               val_so_far = val_so_far + 1;
2524               accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2525             }
2526
2527           if (val != val_so_far)
2528             abort ();
2529
2530           return accum;
2531         }
2532     }
2533
2534   /* This used to use umul_optab if unsigned, but for non-widening multiply
2535      there is no difference between signed and unsigned.  */
2536   op0 = expand_binop (mode, smul_optab,
2537                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2538   if (op0 == 0)
2539     abort ();
2540   return op0;
2541 }
2542 \f
2543 /* Return the smallest n such that 2**n >= X.  */
2544
2545 int
2546 ceil_log2 (x)
2547      unsigned HOST_WIDE_INT x;
2548 {
2549   return floor_log2 (x - 1) + 1;
2550 }
2551
2552 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2553    replace division by D, and put the least significant N bits of the result
2554    in *MULTIPLIER_PTR and return the most significant bit.
2555
2556    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2557    needed precision is in PRECISION (should be <= N).
2558
2559    PRECISION should be as small as possible so this function can choose
2560    multiplier more freely.
2561
2562    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2563    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2564
2565    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2566    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2567
2568 static
2569 unsigned HOST_WIDE_INT
2570 choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
2571      unsigned HOST_WIDE_INT d;
2572      int n;
2573      int precision;
2574      unsigned HOST_WIDE_INT *multiplier_ptr;
2575      int *post_shift_ptr;
2576      int *lgup_ptr;
2577 {
2578   unsigned HOST_WIDE_INT mhigh_hi, mhigh_lo;
2579   unsigned HOST_WIDE_INT mlow_hi, mlow_lo;
2580   int lgup, post_shift;
2581   int pow, pow2;
2582   unsigned HOST_WIDE_INT nh, nl, dummy1, dummy2;
2583
2584   /* lgup = ceil(log2(divisor)); */
2585   lgup = ceil_log2 (d);
2586
2587   if (lgup > n)
2588     abort ();
2589
2590   pow = n + lgup;
2591   pow2 = n + lgup - precision;
2592
2593   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2594     {
2595       /* We could handle this with some effort, but this case is much better
2596          handled directly with a scc insn, so rely on caller using that.  */
2597       abort ();
2598     }
2599
2600   /* mlow = 2^(N + lgup)/d */
2601  if (pow >= HOST_BITS_PER_WIDE_INT)
2602     {
2603       nh = (unsigned HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2604       nl = 0;
2605     }
2606   else
2607     {
2608       nh = 0;
2609       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2610     }
2611   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2612                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2613
2614   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2615   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2616     nh |= (unsigned HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2617   else
2618     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2619   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2620                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2621
2622   if (mhigh_hi && nh - d >= d)
2623     abort ();
2624   if (mhigh_hi > 1 || mlow_hi > 1)
2625     abort ();
2626   /* assert that mlow < mhigh.  */
2627   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2628     abort();
2629
2630   /* If precision == N, then mlow, mhigh exceed 2^N
2631      (but they do not exceed 2^(N+1)).  */
2632
2633   /* Reduce to lowest terms */
2634   for (post_shift = lgup; post_shift > 0; post_shift--)
2635     {
2636       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2637       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2638       if (ml_lo >= mh_lo)
2639         break;
2640
2641       mlow_hi = 0;
2642       mlow_lo = ml_lo;
2643       mhigh_hi = 0;
2644       mhigh_lo = mh_lo;
2645     }
2646
2647   *post_shift_ptr = post_shift;
2648   *lgup_ptr = lgup;
2649   if (n < HOST_BITS_PER_WIDE_INT)
2650     {
2651       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2652       *multiplier_ptr = mhigh_lo & mask;
2653       return mhigh_lo >= mask;
2654     }
2655   else
2656     {
2657       *multiplier_ptr = mhigh_lo;
2658       return mhigh_hi;
2659     }
2660 }
2661
2662 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2663    congruent to 1 (mod 2**N).  */
2664
2665 static unsigned HOST_WIDE_INT
2666 invert_mod2n (x, n)
2667      unsigned HOST_WIDE_INT x;
2668      int n;
2669 {
2670   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2671
2672   /* The algorithm notes that the choice y = x satisfies
2673      x*y == 1 mod 2^3, since x is assumed odd.
2674      Each iteration doubles the number of bits of significance in y.  */
2675
2676   unsigned HOST_WIDE_INT mask;
2677   unsigned HOST_WIDE_INT y = x;
2678   int nbit = 3;
2679
2680   mask = (n == HOST_BITS_PER_WIDE_INT
2681           ? ~(unsigned HOST_WIDE_INT) 0
2682           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2683
2684   while (nbit < n)
2685     {
2686       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2687       nbit *= 2;
2688     }
2689   return y;
2690 }
2691
2692 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2693    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2694    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2695    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2696    become signed.
2697
2698    The result is put in TARGET if that is convenient.
2699
2700    MODE is the mode of operation.  */
2701
2702 rtx
2703 expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
2704      enum machine_mode mode;
2705      register rtx adj_operand, op0, op1, target;
2706      int unsignedp;
2707 {
2708   rtx tem;
2709   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2710
2711   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2712                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2713                       NULL_RTX, 0);
2714   tem = expand_and (tem, op1, NULL_RTX);
2715   adj_operand
2716     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2717                      adj_operand);
2718
2719   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2720                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2721                       NULL_RTX, 0);
2722   tem = expand_and (tem, op0, NULL_RTX);
2723   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2724                           target);
2725
2726   return target;
2727 }
2728
2729 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2730    in TARGET if that is convenient, and return where the result is.  If the
2731    operation can not be performed, 0 is returned.
2732
2733    MODE is the mode of operation and result.
2734
2735    UNSIGNEDP nonzero means unsigned multiply.
2736
2737    MAX_COST is the total allowed cost for the expanded RTL.  */
2738
2739 rtx
2740 expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
2741      enum machine_mode mode;
2742      register rtx op0, target;
2743      unsigned HOST_WIDE_INT cnst1;
2744      int unsignedp;
2745      int max_cost;
2746 {
2747   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2748   optab mul_highpart_optab;
2749   optab moptab;
2750   rtx tem;
2751   int size = GET_MODE_BITSIZE (mode);
2752   rtx op1, wide_op1;
2753
2754   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2755   if (size > HOST_BITS_PER_WIDE_INT)
2756     abort ();
2757
2758   op1 = GEN_INT (cnst1);
2759
2760   if (GET_MODE_BITSIZE (wider_mode) <= HOST_BITS_PER_INT)
2761     wide_op1 = op1;
2762   else
2763     wide_op1
2764       = immed_double_const (cnst1,
2765                             (unsignedp
2766                              ? (HOST_WIDE_INT) 0
2767                              : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2768                             wider_mode);
2769
2770   /* expand_mult handles constant multiplication of word_mode
2771      or narrower.  It does a poor job for large modes.  */
2772   if (size < BITS_PER_WORD
2773       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2774     {
2775       /* We have to do this, since expand_binop doesn't do conversion for
2776          multiply.  Maybe change expand_binop to handle widening multiply?  */
2777       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2778
2779       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, unsignedp);
2780       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2781                           build_int_2 (size, 0), NULL_RTX, 1);
2782       return convert_modes (mode, wider_mode, tem, unsignedp);
2783     }
2784
2785   if (target == 0)
2786     target = gen_reg_rtx (mode);
2787
2788   /* Firstly, try using a multiplication insn that only generates the needed
2789      high part of the product, and in the sign flavor of unsignedp.  */
2790   if (mul_highpart_cost[(int) mode] < max_cost)
2791     {
2792       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2793       target = expand_binop (mode, mul_highpart_optab,
2794                              op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2795       if (target)
2796         return target;
2797     }
2798
2799   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2800      Need to adjust the result after the multiplication.  */
2801   if (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost < max_cost)
2802     {
2803       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2804       target = expand_binop (mode, mul_highpart_optab,
2805                              op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2806       if (target)
2807         /* We used the wrong signedness.  Adjust the result.  */
2808         return expand_mult_highpart_adjust (mode, target, op0,
2809                                             op1, target, unsignedp);
2810     }
2811
2812   /* Try widening multiplication.  */
2813   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2814   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2815       && mul_widen_cost[(int) wider_mode] < max_cost)
2816     {
2817       op1 = force_reg (mode, op1);
2818       goto try;
2819     }
2820
2821   /* Try widening the mode and perform a non-widening multiplication.  */
2822   moptab = smul_optab;
2823   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2824       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2825     {
2826       op1 = wide_op1;
2827       goto try;
2828     }
2829
2830   /* Try widening multiplication of opposite signedness, and adjust.  */
2831   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2832   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2833       && (mul_widen_cost[(int) wider_mode]
2834           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2835     {
2836       rtx regop1 = force_reg (mode, op1);
2837       tem = expand_binop (wider_mode, moptab, op0, regop1,
2838                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2839       if (tem != 0)
2840         {
2841           /* Extract the high half of the just generated product.  */
2842           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2843                               build_int_2 (size, 0), NULL_RTX, 1);
2844           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2845           /* We used the wrong signedness.  Adjust the result.  */
2846           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2847                                               target, unsignedp);
2848         }
2849     }
2850
2851   return 0;
2852
2853  try:
2854   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2855   tem = expand_binop (wider_mode, moptab, op0, op1,
2856                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2857   if (tem == 0)
2858     return 0;
2859
2860   /* Extract the high half of the just generated product.  */
2861   if (mode == word_mode)
2862     {
2863       return gen_highpart (mode, tem);
2864     }
2865   else
2866     {
2867       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2868                           build_int_2 (size, 0), NULL_RTX, 1);
2869       return convert_modes (mode, wider_mode, tem, unsignedp);
2870     }
2871 }
2872 \f
2873 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2874    if that is convenient, and returning where the result is.
2875    You may request either the quotient or the remainder as the result;
2876    specify REM_FLAG nonzero to get the remainder.
2877
2878    CODE is the expression code for which kind of division this is;
2879    it controls how rounding is done.  MODE is the machine mode to use.
2880    UNSIGNEDP nonzero means do unsigned division.  */
2881
2882 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2883    and then correct it by or'ing in missing high bits
2884    if result of ANDI is nonzero.
2885    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2886    This could optimize to a bfexts instruction.
2887    But C doesn't use these operations, so their optimizations are
2888    left for later.  */
2889 /* ??? For modulo, we don't actually need the highpart of the first product,
2890    the low part will do nicely.  And for small divisors, the second multiply
2891    can also be a low-part only multiply or even be completely left out.
2892    E.g. to calculate the remainder of a division by 3 with a 32 bit
2893    multiply, multiply with 0x55555556 and extract the upper two bits;
2894    the result is exact for inputs up to 0x1fffffff.
2895    The input range can be reduced by using cross-sum rules.
2896    For odd divisors >= 3, the following table gives right shift counts
2897    so that if an number is shifted by an integer multiple of the given
2898    amount, the remainder stays the same:
2899    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
2900    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
2901    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
2902    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
2903    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
2904
2905    Cross-sum rules for even numbers can be derived by leaving as many bits
2906    to the right alone as the divisor has zeros to the right.
2907    E.g. if x is an unsigned 32 bit number:
2908    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
2909    */
2910
2911 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2912
2913 rtx
2914 expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
2915      int rem_flag;
2916      enum tree_code code;
2917      enum machine_mode mode;
2918      register rtx op0, op1, target;
2919      int unsignedp;
2920 {
2921   enum machine_mode compute_mode;
2922   register rtx tquotient;
2923   rtx quotient = 0, remainder = 0;
2924   rtx last;
2925   int size;
2926   rtx insn, set;
2927   optab optab1, optab2;
2928   int op1_is_constant, op1_is_pow2;
2929   int max_cost, extra_cost;
2930   static HOST_WIDE_INT last_div_const = 0;
2931
2932   op1_is_constant = GET_CODE (op1) == CONST_INT;
2933   op1_is_pow2 = (op1_is_constant
2934                  && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2935                       || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1))))));
2936
2937   /*
2938      This is the structure of expand_divmod:
2939
2940      First comes code to fix up the operands so we can perform the operations
2941      correctly and efficiently.
2942
2943      Second comes a switch statement with code specific for each rounding mode.
2944      For some special operands this code emits all RTL for the desired
2945      operation, for other cases, it generates only a quotient and stores it in
2946      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2947      to indicate that it has not done anything.
2948
2949      Last comes code that finishes the operation.  If QUOTIENT is set and
2950      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2951      QUOTIENT is not set, it is computed using trunc rounding.
2952
2953      We try to generate special code for division and remainder when OP1 is a
2954      constant.  If |OP1| = 2**n we can use shifts and some other fast
2955      operations.  For other values of OP1, we compute a carefully selected
2956      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2957      by m.
2958
2959      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2960      half of the product.  Different strategies for generating the product are
2961      implemented in expand_mult_highpart.
2962
2963      If what we actually want is the remainder, we generate that by another
2964      by-constant multiplication and a subtraction.  */
2965
2966   /* We shouldn't be called with OP1 == const1_rtx, but some of the
2967      code below will malfunction if we are, so check here and handle
2968      the special case if so.  */
2969   if (op1 == const1_rtx)
2970     return rem_flag ? const0_rtx : op0;
2971
2972   if (target
2973       /* Don't use the function value register as a target
2974          since we have to read it as well as write it,
2975          and function-inlining gets confused by this.  */
2976       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
2977           /* Don't clobber an operand while doing a multi-step calculation.  */
2978           || ((rem_flag || op1_is_constant)
2979               && (reg_mentioned_p (target, op0)
2980                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
2981           || reg_mentioned_p (target, op1)
2982           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
2983     target = 0;
2984
2985   /* Get the mode in which to perform this computation.  Normally it will
2986      be MODE, but sometimes we can't do the desired operation in MODE.
2987      If so, pick a wider mode in which we can do the operation.  Convert
2988      to that mode at the start to avoid repeated conversions.
2989
2990      First see what operations we need.  These depend on the expression
2991      we are evaluating.  (We assume that divxx3 insns exist under the
2992      same conditions that modxx3 insns and that these insns don't normally
2993      fail.  If these assumptions are not correct, we may generate less
2994      efficient code in some cases.)
2995
2996      Then see if we find a mode in which we can open-code that operation
2997      (either a division, modulus, or shift).  Finally, check for the smallest
2998      mode for which we can do the operation with a library call.  */
2999
3000   /* We might want to refine this now that we have division-by-constant
3001      optimization.  Since expand_mult_highpart tries so many variants, it is
3002      not straightforward to generalize this.  Maybe we should make an array
3003      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3004
3005   optab1 = (op1_is_pow2 ? (unsignedp ? lshr_optab : ashr_optab)
3006             : (unsignedp ? udiv_optab : sdiv_optab));
3007   optab2 = (op1_is_pow2 ? optab1 : (unsignedp ? udivmod_optab : sdivmod_optab));
3008
3009   for (compute_mode = mode; compute_mode != VOIDmode;
3010        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3011     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
3012         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
3013       break;
3014
3015   if (compute_mode == VOIDmode)
3016     for (compute_mode = mode; compute_mode != VOIDmode;
3017          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3018       if (optab1->handlers[(int) compute_mode].libfunc
3019           || optab2->handlers[(int) compute_mode].libfunc)
3020         break;
3021
3022   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3023      in expand_binop.  */
3024   if (compute_mode == VOIDmode)
3025     compute_mode = mode;
3026
3027   if (target && GET_MODE (target) == compute_mode)
3028     tquotient = target;
3029   else
3030     tquotient = gen_reg_rtx (compute_mode);
3031
3032   size = GET_MODE_BITSIZE (compute_mode);
3033 #if 0
3034   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3035      (mode), and thereby get better code when OP1 is a constant.  Do that
3036      later.  It will require going over all usages of SIZE below.  */
3037   size = GET_MODE_BITSIZE (mode);
3038 #endif
3039
3040   /* Only deduct something for a REM if the last divide done was
3041      for a different constant.   Then set the constant of the last
3042      divide.  */
3043   max_cost = div_cost[(int) compute_mode]
3044     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3045                       && INTVAL (op1) == last_div_const)
3046        ? mul_cost[(int) compute_mode] + add_cost : 0);
3047
3048   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3049
3050   /* Now convert to the best mode to use.  */
3051   if (compute_mode != mode)
3052     {
3053       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3054       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3055
3056       /* convert_modes may have placed op1 into a register, so we
3057          must recompute the following.  */
3058       op1_is_constant = GET_CODE (op1) == CONST_INT;
3059       op1_is_pow2 = (op1_is_constant
3060                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3061                           || (! unsignedp
3062                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3063     }
3064
3065   /* If one of the operands is a volatile MEM, copy it into a register.  */
3066
3067   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3068     op0 = force_reg (compute_mode, op0);
3069   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3070     op1 = force_reg (compute_mode, op1);
3071
3072   /* If we need the remainder or if OP1 is constant, we need to
3073      put OP0 in a register in case it has any queued subexpressions.  */
3074   if (rem_flag || op1_is_constant)
3075     op0 = force_reg (compute_mode, op0);
3076
3077   last = get_last_insn ();
3078
3079   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3080   if (unsignedp)
3081     {
3082       if (code == FLOOR_DIV_EXPR)
3083         code = TRUNC_DIV_EXPR;
3084       if (code == FLOOR_MOD_EXPR)
3085         code = TRUNC_MOD_EXPR;
3086       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3087         code = TRUNC_DIV_EXPR;
3088     }
3089
3090   if (op1 != const0_rtx)
3091     switch (code)
3092       {
3093       case TRUNC_MOD_EXPR:
3094       case TRUNC_DIV_EXPR:
3095         if (op1_is_constant)
3096           {
3097             if (unsignedp)
3098               {
3099                 unsigned HOST_WIDE_INT mh, ml;
3100                 int pre_shift, post_shift;
3101                 int dummy;
3102                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3103
3104                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3105                   {
3106                     pre_shift = floor_log2 (d);
3107                     if (rem_flag)
3108                       {
3109                         remainder
3110                           = expand_binop (compute_mode, and_optab, op0,
3111                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3112                                           remainder, 1,
3113                                           OPTAB_LIB_WIDEN);
3114                         if (remainder)
3115                           return gen_lowpart (mode, remainder);
3116                       }
3117                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3118                                              build_int_2 (pre_shift, 0),
3119                                              tquotient, 1);
3120                   }
3121                 else if (size <= HOST_BITS_PER_WIDE_INT)
3122                   {
3123                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3124                       {
3125                         /* Most significant bit of divisor is set; emit an scc
3126                            insn.  */
3127                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3128                                                     compute_mode, 1, 1);
3129                         if (quotient == 0)
3130                           goto fail1;
3131                       }
3132                     else
3133                       {
3134                         /* Find a suitable multiplier and right shift count
3135                            instead of multiplying with D.  */
3136
3137                         mh = choose_multiplier (d, size, size,
3138                                                 &ml, &post_shift, &dummy);
3139
3140                         /* If the suggested multiplier is more than SIZE bits,
3141                            we can do better for even divisors, using an
3142                            initial right shift.  */
3143                         if (mh != 0 && (d & 1) == 0)
3144                           {
3145                             pre_shift = floor_log2 (d & -d);
3146                             mh = choose_multiplier (d >> pre_shift, size,
3147                                                     size - pre_shift,
3148                                                     &ml, &post_shift, &dummy);
3149                             if (mh)
3150                               abort ();
3151                           }
3152                         else
3153                           pre_shift = 0;
3154
3155                         if (mh != 0)
3156                           {
3157                             rtx t1, t2, t3, t4;
3158
3159                             extra_cost = (shift_cost[post_shift - 1]
3160                                           + shift_cost[1] + 2 * add_cost);
3161                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3162                                                        NULL_RTX, 1,
3163                                                        max_cost - extra_cost);
3164                             if (t1 == 0)
3165                               goto fail1;
3166                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3167                                                                op0, t1),
3168                                                 NULL_RTX);
3169                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3170                                                build_int_2 (1, 0), NULL_RTX,1);
3171                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3172                                                               t1, t3),
3173                                                 NULL_RTX);
3174                             quotient
3175                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3176                                               build_int_2 (post_shift - 1, 0),
3177                                               tquotient, 1);
3178                           }
3179                         else
3180                           {
3181                             rtx t1, t2;
3182
3183                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3184                                                build_int_2 (pre_shift, 0),
3185                                                NULL_RTX, 1);
3186                             extra_cost = (shift_cost[pre_shift]
3187                                           + shift_cost[post_shift]);
3188                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3189                                                        NULL_RTX, 1,
3190                                                        max_cost - extra_cost);
3191                             if (t2 == 0)
3192                               goto fail1;
3193                             quotient
3194                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3195                                               build_int_2 (post_shift, 0),
3196                                               tquotient, 1);
3197                           }
3198                       }
3199                   }
3200                 else            /* Too wide mode to use tricky code */
3201                   break;
3202
3203                 insn = get_last_insn ();
3204                 if (insn != last
3205                     && (set = single_set (insn)) != 0
3206                     && SET_DEST (set) == quotient)
3207                   set_unique_reg_note (insn,
3208                                        REG_EQUAL,
3209                                        gen_rtx_UDIV (compute_mode, op0, op1));
3210               }
3211             else                /* TRUNC_DIV, signed */
3212               {
3213                 unsigned HOST_WIDE_INT ml;
3214                 int lgup, post_shift;
3215                 HOST_WIDE_INT d = INTVAL (op1);
3216                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3217
3218                 /* n rem d = n rem -d */
3219                 if (rem_flag && d < 0)
3220                   {
3221                     d = abs_d;
3222                     op1 = GEN_INT (abs_d);
3223                   }
3224
3225                 if (d == 1)
3226                   quotient = op0;
3227                 else if (d == -1)
3228                   quotient = expand_unop (compute_mode, neg_optab, op0,
3229                                           tquotient, 0);
3230                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3231                   {
3232                     /* This case is not handled correctly below.  */
3233                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3234                                                 compute_mode, 1, 1);
3235                     if (quotient == 0)
3236                       goto fail1;
3237                   }
3238                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3239                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap))
3240                   ;
3241                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3242                   {
3243                     lgup = floor_log2 (abs_d);
3244                     if (abs_d != 2 && BRANCH_COST < 3)
3245                       {
3246                         rtx label = gen_label_rtx ();
3247                         rtx t1;
3248
3249                         t1 = copy_to_mode_reg (compute_mode, op0);
3250                         do_cmp_and_jump (t1, const0_rtx, GE,
3251                                          compute_mode, label);
3252                         expand_inc (t1, GEN_INT (abs_d - 1));
3253                         emit_label (label);
3254                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3255                                                  build_int_2 (lgup, 0),
3256                                                  tquotient, 0);
3257                       }
3258                     else
3259                       {
3260                         rtx t1, t2, t3;
3261                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3262                                            build_int_2 (size - 1, 0),
3263                                            NULL_RTX, 0);
3264                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3265                                            build_int_2 (size - lgup, 0),
3266                                            NULL_RTX, 1);
3267                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3268                                                           op0, t2),
3269                                             NULL_RTX);
3270                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3271                                                  build_int_2 (lgup, 0),
3272                                                  tquotient, 0);
3273                       }
3274
3275                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3276                        the quotient.  */
3277                     if (d < 0)
3278                       {
3279                         insn = get_last_insn ();
3280                         if (insn != last
3281                             && (set = single_set (insn)) != 0
3282                             && SET_DEST (set) == quotient
3283                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3284                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3285                           set_unique_reg_note (insn,
3286                                                REG_EQUAL,
3287                                                gen_rtx_DIV (compute_mode,
3288                                                             op0,
3289                                                             GEN_INT (abs_d)));
3290
3291                         quotient = expand_unop (compute_mode, neg_optab,
3292                                                 quotient, quotient, 0);
3293                       }
3294                   }
3295                 else if (size <= HOST_BITS_PER_WIDE_INT)
3296                   {
3297                     choose_multiplier (abs_d, size, size - 1,
3298                                        &ml, &post_shift, &lgup);
3299                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3300                       {
3301                         rtx t1, t2, t3;
3302
3303                         extra_cost = (shift_cost[post_shift]
3304                                       + shift_cost[size - 1] + add_cost);
3305                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3306                                                    NULL_RTX, 0,
3307                                                    max_cost - extra_cost);
3308                         if (t1 == 0)
3309                           goto fail1;
3310                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3311                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3312                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3313                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3314                         if (d < 0)
3315                           quotient
3316                             = force_operand (gen_rtx_MINUS (compute_mode,
3317                                                             t3, t2),
3318                                              tquotient);
3319                         else
3320                           quotient
3321                             = force_operand (gen_rtx_MINUS (compute_mode,
3322                                                             t2, t3),
3323                                              tquotient);
3324                       }
3325                     else
3326                       {
3327                         rtx t1, t2, t3, t4;
3328
3329                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3330                         extra_cost = (shift_cost[post_shift]
3331                                       + shift_cost[size - 1] + 2 * add_cost);
3332                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3333                                                    NULL_RTX, 0,
3334                                                    max_cost - extra_cost);
3335                         if (t1 == 0)
3336                           goto fail1;
3337                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3338                                                           t1, op0),
3339                                             NULL_RTX);
3340                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3341                                            build_int_2 (post_shift, 0),
3342                                            NULL_RTX, 0);
3343                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3344                                            build_int_2 (size - 1, 0),
3345                                            NULL_RTX, 0);
3346                         if (d < 0)
3347                           quotient
3348                             = force_operand (gen_rtx_MINUS (compute_mode,
3349                                                             t4, t3),
3350                                              tquotient);
3351                         else
3352                           quotient
3353                             = force_operand (gen_rtx_MINUS (compute_mode,
3354                                                             t3, t4),
3355                                              tquotient);
3356                       }
3357                   }
3358                 else            /* Too wide mode to use tricky code */
3359                   break;
3360
3361                 insn = get_last_insn ();
3362                 if (insn != last
3363                     && (set = single_set (insn)) != 0
3364                     && SET_DEST (set) == quotient)
3365                   set_unique_reg_note (insn,
3366                                        REG_EQUAL,
3367                                        gen_rtx_DIV (compute_mode, op0, op1));
3368               }
3369             break;
3370           }
3371       fail1:
3372         delete_insns_since (last);
3373         break;
3374
3375       case FLOOR_DIV_EXPR:
3376       case FLOOR_MOD_EXPR:
3377       /* We will come here only for signed operations.  */
3378         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3379           {
3380             unsigned HOST_WIDE_INT mh, ml;
3381             int pre_shift, lgup, post_shift;
3382             HOST_WIDE_INT d = INTVAL (op1);
3383
3384             if (d > 0)
3385               {
3386                 /* We could just as easily deal with negative constants here,
3387                    but it does not seem worth the trouble for GCC 2.6.  */
3388                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3389                   {
3390                     pre_shift = floor_log2 (d);
3391                     if (rem_flag)
3392                       {
3393                         remainder = expand_binop (compute_mode, and_optab, op0,
3394                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3395                                                   remainder, 0, OPTAB_LIB_WIDEN);
3396                         if (remainder)
3397                           return gen_lowpart (mode, remainder);
3398                       }
3399                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3400                                              build_int_2 (pre_shift, 0),
3401                                              tquotient, 0);
3402                   }
3403                 else
3404                   {
3405                     rtx t1, t2, t3, t4;
3406
3407                     mh = choose_multiplier (d, size, size - 1,
3408                                             &ml, &post_shift, &lgup);
3409                     if (mh)
3410                       abort ();
3411
3412                     t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3413                                        build_int_2 (size - 1, 0), NULL_RTX, 0);
3414                     t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3415                                        NULL_RTX, 0, OPTAB_WIDEN);
3416                     extra_cost = (shift_cost[post_shift]
3417                                   + shift_cost[size - 1] + 2 * add_cost);
3418                     t3 = expand_mult_highpart (compute_mode, t2, ml,
3419                                                NULL_RTX, 1,
3420                                                max_cost - extra_cost);
3421                     if (t3 != 0)
3422                       {
3423                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3424                                            build_int_2 (post_shift, 0),
3425                                            NULL_RTX, 1);
3426                         quotient = expand_binop (compute_mode, xor_optab,
3427                                                  t4, t1, tquotient, 0,
3428                                                  OPTAB_WIDEN);
3429                       }
3430                   }
3431               }
3432             else
3433               {
3434                 rtx nsign, t1, t2, t3, t4;
3435                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3436                                                   op0, constm1_rtx), NULL_RTX);
3437                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3438                                    0, OPTAB_WIDEN);
3439                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3440                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3441                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3442                                     NULL_RTX);
3443                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3444                                     NULL_RTX, 0);
3445                 if (t4)
3446                   {
3447                     rtx t5;
3448                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3449                                       NULL_RTX, 0);
3450                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3451                                                             t4, t5),
3452                                               tquotient);
3453                   }
3454               }
3455           }
3456
3457         if (quotient != 0)
3458           break;
3459         delete_insns_since (last);
3460
3461         /* Try using an instruction that produces both the quotient and
3462            remainder, using truncation.  We can easily compensate the quotient
3463            or remainder to get floor rounding, once we have the remainder.
3464            Notice that we compute also the final remainder value here,
3465            and return the result right away.  */
3466         if (target == 0 || GET_MODE (target) != compute_mode)
3467           target = gen_reg_rtx (compute_mode);
3468
3469         if (rem_flag)
3470           {
3471             remainder
3472               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3473             quotient = gen_reg_rtx (compute_mode);
3474           }
3475         else
3476           {
3477             quotient
3478               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3479             remainder = gen_reg_rtx (compute_mode);
3480           }
3481
3482         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3483                                  quotient, remainder, 0))
3484           {
3485             /* This could be computed with a branch-less sequence.
3486                Save that for later.  */
3487             rtx tem;
3488             rtx label = gen_label_rtx ();
3489             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3490             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3491                                 NULL_RTX, 0, OPTAB_WIDEN);
3492             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3493             expand_dec (quotient, const1_rtx);
3494             expand_inc (remainder, op1);
3495             emit_label (label);
3496             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3497           }
3498
3499         /* No luck with division elimination or divmod.  Have to do it
3500            by conditionally adjusting op0 *and* the result.  */
3501         {
3502           rtx label1, label2, label3, label4, label5;
3503           rtx adjusted_op0;
3504           rtx tem;
3505
3506           quotient = gen_reg_rtx (compute_mode);
3507           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3508           label1 = gen_label_rtx ();
3509           label2 = gen_label_rtx ();
3510           label3 = gen_label_rtx ();
3511           label4 = gen_label_rtx ();
3512           label5 = gen_label_rtx ();
3513           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3514           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3515           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3516                               quotient, 0, OPTAB_LIB_WIDEN);
3517           if (tem != quotient)
3518             emit_move_insn (quotient, tem);
3519           emit_jump_insn (gen_jump (label5));
3520           emit_barrier ();
3521           emit_label (label1);
3522           expand_inc (adjusted_op0, const1_rtx);
3523           emit_jump_insn (gen_jump (label4));
3524           emit_barrier ();
3525           emit_label (label2);
3526           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3527           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3528                               quotient, 0, OPTAB_LIB_WIDEN);
3529           if (tem != quotient)
3530             emit_move_insn (quotient, tem);
3531           emit_jump_insn (gen_jump (label5));
3532           emit_barrier ();
3533           emit_label (label3);
3534           expand_dec (adjusted_op0, const1_rtx);
3535           emit_label (label4);
3536           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3537                               quotient, 0, OPTAB_LIB_WIDEN);
3538           if (tem != quotient)
3539             emit_move_insn (quotient, tem);
3540           expand_dec (quotient, const1_rtx);
3541           emit_label (label5);
3542         }
3543         break;
3544
3545       case CEIL_DIV_EXPR:
3546       case CEIL_MOD_EXPR:
3547         if (unsignedp)
3548           {
3549             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3550               {
3551                 rtx t1, t2, t3;
3552                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3553                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3554                                    build_int_2 (floor_log2 (d), 0),
3555                                    tquotient, 1);
3556                 t2 = expand_binop (compute_mode, and_optab, op0,
3557                                    GEN_INT (d - 1),
3558                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3559                 t3 = gen_reg_rtx (compute_mode);
3560                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3561                                       compute_mode, 1, 1);
3562                 if (t3 == 0)
3563                   {
3564                     rtx lab;
3565                     lab = gen_label_rtx ();
3566                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3567                     expand_inc (t1, const1_rtx);
3568                     emit_label (lab);
3569                     quotient = t1;
3570                   }
3571                 else
3572                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3573                                                           t1, t3),
3574                                             tquotient);
3575                 break;
3576               }
3577
3578             /* Try using an instruction that produces both the quotient and
3579                remainder, using truncation.  We can easily compensate the
3580                quotient or remainder to get ceiling rounding, once we have the
3581                remainder.  Notice that we compute also the final remainder
3582                value here, and return the result right away.  */
3583             if (target == 0 || GET_MODE (target) != compute_mode)
3584               target = gen_reg_rtx (compute_mode);
3585
3586             if (rem_flag)
3587               {
3588                 remainder = (GET_CODE (target) == REG
3589                              ? target : gen_reg_rtx (compute_mode));
3590                 quotient = gen_reg_rtx (compute_mode);
3591               }
3592             else
3593               {
3594                 quotient = (GET_CODE (target) == REG
3595                             ? target : gen_reg_rtx (compute_mode));
3596                 remainder = gen_reg_rtx (compute_mode);
3597               }
3598
3599             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3600                                      remainder, 1))
3601               {
3602                 /* This could be computed with a branch-less sequence.
3603                    Save that for later.  */
3604                 rtx label = gen_label_rtx ();
3605                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3606                                  compute_mode, label);
3607                 expand_inc (quotient, const1_rtx);
3608                 expand_dec (remainder, op1);
3609                 emit_label (label);
3610                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3611               }
3612
3613             /* No luck with division elimination or divmod.  Have to do it
3614                by conditionally adjusting op0 *and* the result.  */
3615             {
3616               rtx label1, label2;
3617               rtx adjusted_op0, tem;
3618
3619               quotient = gen_reg_rtx (compute_mode);
3620               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3621               label1 = gen_label_rtx ();
3622               label2 = gen_label_rtx ();
3623               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3624                                compute_mode, label1);
3625               emit_move_insn  (quotient, const0_rtx);
3626               emit_jump_insn (gen_jump (label2));
3627               emit_barrier ();
3628               emit_label (label1);
3629               expand_dec (adjusted_op0, const1_rtx);
3630               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3631                                   quotient, 1, OPTAB_LIB_WIDEN);
3632               if (tem != quotient)
3633                 emit_move_insn (quotient, tem);
3634               expand_inc (quotient, const1_rtx);
3635               emit_label (label2);
3636             }
3637           }
3638         else /* signed */
3639           {
3640             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3641                 && INTVAL (op1) >= 0)
3642               {
3643                 /* This is extremely similar to the code for the unsigned case
3644                    above.  For 2.7 we should merge these variants, but for
3645                    2.6.1 I don't want to touch the code for unsigned since that
3646                    get used in C.  The signed case will only be used by other
3647                    languages (Ada).  */
3648
3649                 rtx t1, t2, t3;
3650                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3651                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3652                                    build_int_2 (floor_log2 (d), 0),
3653                                    tquotient, 0);
3654                 t2 = expand_binop (compute_mode, and_optab, op0,
3655                                    GEN_INT (d - 1),
3656                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3657                 t3 = gen_reg_rtx (compute_mode);
3658                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3659                                       compute_mode, 1, 1);
3660                 if (t3 == 0)
3661                   {
3662                     rtx lab;
3663                     lab = gen_label_rtx ();
3664                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3665                     expand_inc (t1, const1_rtx);
3666                     emit_label (lab);
3667                     quotient = t1;
3668                   }
3669                 else
3670                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3671                                                           t1, t3),
3672                                             tquotient);
3673                 break;
3674               }
3675
3676             /* Try using an instruction that produces both the quotient and
3677                remainder, using truncation.  We can easily compensate the
3678                quotient or remainder to get ceiling rounding, once we have the
3679                remainder.  Notice that we compute also the final remainder
3680                value here, and return the result right away.  */
3681             if (target == 0 || GET_MODE (target) != compute_mode)
3682               target = gen_reg_rtx (compute_mode);
3683             if (rem_flag)
3684               {
3685                 remainder= (GET_CODE (target) == REG
3686                             ? target : gen_reg_rtx (compute_mode));
3687                 quotient = gen_reg_rtx (compute_mode);
3688               }
3689             else
3690               {
3691                 quotient = (GET_CODE (target) == REG
3692                             ? target : gen_reg_rtx (compute_mode));
3693                 remainder = gen_reg_rtx (compute_mode);
3694               }
3695
3696             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3697                                      remainder, 0))
3698               {
3699                 /* This could be computed with a branch-less sequence.
3700                    Save that for later.  */
3701                 rtx tem;
3702                 rtx label = gen_label_rtx ();
3703                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3704                                  compute_mode, label);
3705                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3706                                     NULL_RTX, 0, OPTAB_WIDEN);
3707                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3708                 expand_inc (quotient, const1_rtx);
3709                 expand_dec (remainder, op1);
3710                 emit_label (label);
3711                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3712               }
3713
3714             /* No luck with division elimination or divmod.  Have to do it
3715                by conditionally adjusting op0 *and* the result.  */
3716             {
3717               rtx label1, label2, label3, label4, label5;
3718               rtx adjusted_op0;
3719               rtx tem;
3720
3721               quotient = gen_reg_rtx (compute_mode);
3722               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3723               label1 = gen_label_rtx ();
3724               label2 = gen_label_rtx ();
3725               label3 = gen_label_rtx ();
3726               label4 = gen_label_rtx ();
3727               label5 = gen_label_rtx ();
3728               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3729               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3730                                compute_mode, label1);
3731               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3732                                   quotient, 0, OPTAB_LIB_WIDEN);
3733               if (tem != quotient)
3734                 emit_move_insn (quotient, tem);
3735               emit_jump_insn (gen_jump (label5));
3736               emit_barrier ();
3737               emit_label (label1);
3738               expand_dec (adjusted_op0, const1_rtx);
3739               emit_jump_insn (gen_jump (label4));
3740               emit_barrier ();
3741               emit_label (label2);
3742               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3743                                compute_mode, label3);
3744               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3745                                   quotient, 0, OPTAB_LIB_WIDEN);
3746               if (tem != quotient)
3747                 emit_move_insn (quotient, tem);
3748               emit_jump_insn (gen_jump (label5));
3749               emit_barrier ();
3750               emit_label (label3);
3751               expand_inc (adjusted_op0, const1_rtx);
3752               emit_label (label4);
3753               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3754                                   quotient, 0, OPTAB_LIB_WIDEN);
3755               if (tem != quotient)
3756                 emit_move_insn (quotient, tem);
3757               expand_inc (quotient, const1_rtx);
3758               emit_label (label5);
3759             }
3760           }
3761         break;
3762
3763       case EXACT_DIV_EXPR:
3764         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3765           {
3766             HOST_WIDE_INT d = INTVAL (op1);
3767             unsigned HOST_WIDE_INT ml;
3768             int post_shift;
3769             rtx t1;
3770
3771             post_shift = floor_log2 (d & -d);
3772             ml = invert_mod2n (d >> post_shift, size);
3773             t1 = expand_mult (compute_mode, op0, GEN_INT (ml), NULL_RTX,
3774                               unsignedp);
3775             quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3776                                      build_int_2 (post_shift, 0),
3777                                      NULL_RTX, unsignedp);
3778
3779             insn = get_last_insn ();
3780             set_unique_reg_note (insn,
3781                                  REG_EQUAL,
3782                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3783                                                  compute_mode,
3784                                                  op0, op1));
3785           }
3786         break;
3787
3788       case ROUND_DIV_EXPR:
3789       case ROUND_MOD_EXPR:
3790         if (unsignedp)
3791           {
3792             rtx tem;
3793             rtx label;
3794             label = gen_label_rtx ();
3795             quotient = gen_reg_rtx (compute_mode);
3796             remainder = gen_reg_rtx (compute_mode);
3797             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3798               {
3799                 rtx tem;
3800                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3801                                          quotient, 1, OPTAB_LIB_WIDEN);
3802                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3803                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3804                                           remainder, 1, OPTAB_LIB_WIDEN);
3805               }
3806             tem = plus_constant (op1, -1);
3807             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3808                                 build_int_2 (1, 0), NULL_RTX, 1);
3809             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3810             expand_inc (quotient, const1_rtx);
3811             expand_dec (remainder, op1);
3812             emit_label (label);
3813           }
3814         else
3815           {
3816             rtx abs_rem, abs_op1, tem, mask;
3817             rtx label;
3818             label = gen_label_rtx ();
3819             quotient = gen_reg_rtx (compute_mode);
3820             remainder = gen_reg_rtx (compute_mode);
3821             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3822               {
3823                 rtx tem;
3824                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3825                                          quotient, 0, OPTAB_LIB_WIDEN);
3826                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3827                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3828                                           remainder, 0, OPTAB_LIB_WIDEN);
3829               }
3830             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 0);
3831             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 0);
3832             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3833                                 build_int_2 (1, 0), NULL_RTX, 1);
3834             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3835             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3836                                 NULL_RTX, 0, OPTAB_WIDEN);
3837             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3838                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3839             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3840                                 NULL_RTX, 0, OPTAB_WIDEN);
3841             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3842                                 NULL_RTX, 0, OPTAB_WIDEN);
3843             expand_inc (quotient, tem);
3844             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3845                                 NULL_RTX, 0, OPTAB_WIDEN);
3846             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3847                                 NULL_RTX, 0, OPTAB_WIDEN);
3848             expand_dec (remainder, tem);
3849             emit_label (label);
3850           }
3851         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3852
3853       default:
3854         abort ();
3855       }
3856
3857   if (quotient == 0)
3858     {
3859       if (target && GET_MODE (target) != compute_mode)
3860         target = 0;
3861
3862       if (rem_flag)
3863         {
3864           /* Try to produce the remainder without producing the quotient.
3865              If we seem to have a divmod patten that does not require widening,
3866              don't try windening here.  We should really have an WIDEN argument
3867              to expand_twoval_binop, since what we'd really like to do here is
3868              1) try a mod insn in compute_mode
3869              2) try a divmod insn in compute_mode
3870              3) try a div insn in compute_mode and multiply-subtract to get
3871                 remainder
3872              4) try the same things with widening allowed.  */
3873           remainder
3874             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3875                                  op0, op1, target,
3876                                  unsignedp,
3877                                  ((optab2->handlers[(int) compute_mode].insn_code
3878                                    != CODE_FOR_nothing)
3879                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
3880           if (remainder == 0)
3881             {
3882               /* No luck there.  Can we do remainder and divide at once
3883                  without a library call?  */
3884               remainder = gen_reg_rtx (compute_mode);
3885               if (! expand_twoval_binop ((unsignedp
3886                                           ? udivmod_optab
3887                                           : sdivmod_optab),
3888                                          op0, op1,
3889                                          NULL_RTX, remainder, unsignedp))
3890                 remainder = 0;
3891             }
3892
3893           if (remainder)
3894             return gen_lowpart (mode, remainder);
3895         }
3896
3897       /* Produce the quotient.  Try a quotient insn, but not a library call.
3898          If we have a divmod in this mode, use it in preference to widening
3899          the div (for this test we assume it will not fail). Note that optab2
3900          is set to the one of the two optabs that the call below will use.  */
3901       quotient
3902         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3903                              op0, op1, rem_flag ? NULL_RTX : target,
3904                              unsignedp,
3905                              ((optab2->handlers[(int) compute_mode].insn_code
3906                                != CODE_FOR_nothing)
3907                               ? OPTAB_DIRECT : OPTAB_WIDEN));
3908
3909       if (quotient == 0)
3910         {
3911           /* No luck there.  Try a quotient-and-remainder insn,
3912              keeping the quotient alone.  */
3913           quotient = gen_reg_rtx (compute_mode);
3914           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3915                                      op0, op1,
3916                                      quotient, NULL_RTX, unsignedp))
3917             {
3918               quotient = 0;
3919               if (! rem_flag)
3920                 /* Still no luck.  If we are not computing the remainder,
3921                    use a library call for the quotient.  */
3922                 quotient = sign_expand_binop (compute_mode,
3923                                               udiv_optab, sdiv_optab,
3924                                               op0, op1, target,
3925                                               unsignedp, OPTAB_LIB_WIDEN);
3926             }
3927         }
3928     }
3929
3930   if (rem_flag)
3931     {
3932       if (target && GET_MODE (target) != compute_mode)
3933         target = 0;
3934
3935       if (quotient == 0)
3936         /* No divide instruction either.  Use library for remainder.  */
3937         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3938                                        op0, op1, target,
3939                                        unsignedp, OPTAB_LIB_WIDEN);
3940       else
3941         {
3942           /* We divided.  Now finish doing X - Y * (X / Y).  */
3943           remainder = expand_mult (compute_mode, quotient, op1,
3944                                    NULL_RTX, unsignedp);
3945           remainder = expand_binop (compute_mode, sub_optab, op0,
3946                                     remainder, target, unsignedp,
3947                                     OPTAB_LIB_WIDEN);
3948         }
3949     }
3950
3951   return gen_lowpart (mode, rem_flag ? remainder : quotient);
3952 }
3953 \f
3954 /* Return a tree node with data type TYPE, describing the value of X.
3955    Usually this is an RTL_EXPR, if there is no obvious better choice.
3956    X may be an expression, however we only support those expressions
3957    generated by loop.c.   */
3958
3959 tree
3960 make_tree (type, x)
3961      tree type;
3962      rtx x;
3963 {
3964   tree t;
3965
3966   switch (GET_CODE (x))
3967     {
3968     case CONST_INT:
3969       t = build_int_2 (INTVAL (x),
3970                        (TREE_UNSIGNED (type)
3971                         && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
3972                        || INTVAL (x) >= 0 ? 0 : -1);
3973       TREE_TYPE (t) = type;
3974       return t;
3975
3976     case CONST_DOUBLE:
3977       if (GET_MODE (x) == VOIDmode)
3978         {
3979           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
3980           TREE_TYPE (t) = type;
3981         }
3982       else
3983         {
3984           REAL_VALUE_TYPE d;
3985
3986           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
3987           t = build_real (type, d);
3988         }
3989
3990       return t;
3991
3992     case PLUS:
3993       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3994                           make_tree (type, XEXP (x, 1))));
3995
3996     case MINUS:
3997       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3998                           make_tree (type, XEXP (x, 1))));
3999
4000     case NEG:
4001       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4002
4003     case MULT:
4004       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4005                           make_tree (type, XEXP (x, 1))));
4006
4007     case ASHIFT:
4008       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4009                           make_tree (type, XEXP (x, 1))));
4010
4011     case LSHIFTRT:
4012       return fold (convert (type,
4013                             build (RSHIFT_EXPR, unsigned_type (type),
4014                                    make_tree (unsigned_type (type),
4015                                               XEXP (x, 0)),
4016                                    make_tree (type, XEXP (x, 1)))));
4017
4018     case ASHIFTRT:
4019       return fold (convert (type,
4020                             build (RSHIFT_EXPR, signed_type (type),
4021                                    make_tree (signed_type (type), XEXP (x, 0)),
4022                                    make_tree (type, XEXP (x, 1)))));
4023
4024     case DIV:
4025       if (TREE_CODE (type) != REAL_TYPE)
4026         t = signed_type (type);
4027       else
4028         t = type;
4029
4030       return fold (convert (type,
4031                             build (TRUNC_DIV_EXPR, t,
4032                                    make_tree (t, XEXP (x, 0)),
4033                                    make_tree (t, XEXP (x, 1)))));
4034     case UDIV:
4035       t = unsigned_type (type);
4036       return fold (convert (type,
4037                             build (TRUNC_DIV_EXPR, t,
4038                                    make_tree (t, XEXP (x, 0)),
4039                                    make_tree (t, XEXP (x, 1)))));
4040    default:
4041       t = make_node (RTL_EXPR);
4042       TREE_TYPE (t) = type;
4043       RTL_EXPR_RTL (t) = x;
4044       /* There are no insns to be output
4045          when this rtl_expr is used.  */
4046       RTL_EXPR_SEQUENCE (t) = 0;
4047       return t;
4048     }
4049 }
4050
4051 /* Return an rtx representing the value of X * MULT + ADD.
4052    TARGET is a suggestion for where to store the result (an rtx).
4053    MODE is the machine mode for the computation.
4054    X and MULT must have mode MODE.  ADD may have a different mode.
4055    So can X (defaults to same as MODE).
4056    UNSIGNEDP is non-zero to do unsigned multiplication.
4057    This may emit insns.  */
4058
4059 rtx
4060 expand_mult_add (x, target, mult, add, mode, unsignedp)
4061      rtx x, target, mult, add;
4062      enum machine_mode mode;
4063      int unsignedp;
4064 {
4065   tree type = type_for_mode (mode, unsignedp);
4066   tree add_type = (GET_MODE (add) == VOIDmode
4067                    ? type : type_for_mode (GET_MODE (add), unsignedp));
4068   tree result =  fold (build (PLUS_EXPR, type,
4069                               fold (build (MULT_EXPR, type,
4070                                            make_tree (type, x),
4071                                            make_tree (type, mult))),
4072                               make_tree (add_type, add)));
4073
4074   return expand_expr (result, target, VOIDmode, 0);
4075 }
4076 \f
4077 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4078    and returning TARGET.
4079
4080    If TARGET is 0, a pseudo-register or constant is returned.  */
4081
4082 rtx
4083 expand_and (op0, op1, target)
4084      rtx op0, op1, target;
4085 {
4086   enum machine_mode mode = VOIDmode;
4087   rtx tem;
4088
4089   if (GET_MODE (op0) != VOIDmode)
4090     mode = GET_MODE (op0);
4091   else if (GET_MODE (op1) != VOIDmode)
4092     mode = GET_MODE (op1);
4093
4094   if (mode != VOIDmode)
4095     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4096   else if (GET_CODE (op0) == CONST_INT && GET_CODE (op1) == CONST_INT)
4097     tem = GEN_INT (INTVAL (op0) & INTVAL (op1));
4098   else
4099     abort ();
4100
4101   if (target == 0)
4102     target = tem;
4103   else if (tem != target)
4104     emit_move_insn (target, tem);
4105   return target;
4106 }
4107 \f
4108 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4109    and storing in TARGET.  Normally return TARGET.
4110    Return 0 if that cannot be done.
4111
4112    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4113    it is VOIDmode, they cannot both be CONST_INT.
4114
4115    UNSIGNEDP is for the case where we have to widen the operands
4116    to perform the operation.  It says to use zero-extension.
4117
4118    NORMALIZEP is 1 if we should convert the result to be either zero
4119    or one.  Normalize is -1 if we should convert the result to be
4120    either zero or -1.  If NORMALIZEP is zero, the result will be left
4121    "raw" out of the scc insn.  */
4122
4123 rtx
4124 emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
4125      rtx target;
4126      enum rtx_code code;
4127      rtx op0, op1;
4128      enum machine_mode mode;
4129      int unsignedp;
4130      int normalizep;
4131 {
4132   rtx subtarget;
4133   enum insn_code icode;
4134   enum machine_mode compare_mode;
4135   enum machine_mode target_mode = GET_MODE (target);
4136   rtx tem;
4137   rtx last = get_last_insn ();
4138   rtx pattern, comparison;
4139
4140   if (unsignedp)
4141     code = unsigned_condition (code);
4142
4143   /* If one operand is constant, make it the second one.  Only do this
4144      if the other operand is not constant as well.  */
4145
4146   if ((CONSTANT_P (op0) && ! CONSTANT_P (op1))
4147       || (GET_CODE (op0) == CONST_INT && GET_CODE (op1) != CONST_INT))
4148     {
4149       tem = op0;
4150       op0 = op1;
4151       op1 = tem;
4152       code = swap_condition (code);
4153     }
4154
4155   if (mode == VOIDmode)
4156     mode = GET_MODE (op0);
4157
4158   /* For some comparisons with 1 and -1, we can convert this to
4159      comparisons with zero.  This will often produce more opportunities for
4160      store-flag insns.  */
4161
4162   switch (code)
4163     {
4164     case LT:
4165       if (op1 == const1_rtx)
4166         op1 = const0_rtx, code = LE;
4167       break;
4168     case LE:
4169       if (op1 == constm1_rtx)
4170         op1 = const0_rtx, code = LT;
4171       break;
4172     case GE:
4173       if (op1 == const1_rtx)
4174         op1 = const0_rtx, code = GT;
4175       break;
4176     case GT:
4177       if (op1 == constm1_rtx)
4178         op1 = const0_rtx, code = GE;
4179       break;
4180     case GEU:
4181       if (op1 == const1_rtx)
4182         op1 = const0_rtx, code = NE;
4183       break;
4184     case LTU:
4185       if (op1 == const1_rtx)
4186         op1 = const0_rtx, code = EQ;
4187       break;
4188     default:
4189       break;
4190     }
4191
4192   /* From now on, we won't change CODE, so set ICODE now.  */
4193   icode = setcc_gen_code[(int) code];
4194
4195   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4196      complement of A (for GE) and shifting the sign bit to the low bit.  */
4197   if (op1 == const0_rtx && (code == LT || code == GE)
4198       && GET_MODE_CLASS (mode) == MODE_INT
4199       && (normalizep || STORE_FLAG_VALUE == 1
4200           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4201               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4202                   == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4203     {
4204       subtarget = target;
4205
4206       /* If the result is to be wider than OP0, it is best to convert it
4207          first.  If it is to be narrower, it is *incorrect* to convert it
4208          first.  */
4209       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4210         {
4211           op0 = protect_from_queue (op0, 0);
4212           op0 = convert_modes (target_mode, mode, op0, 0);
4213           mode = target_mode;
4214         }
4215
4216       if (target_mode != mode)
4217         subtarget = 0;
4218
4219       if (code == GE)
4220         op0 = expand_unop (mode, one_cmpl_optab, op0,
4221                            ((STORE_FLAG_VALUE == 1 || normalizep)
4222                             ? 0 : subtarget), 0);
4223
4224       if (STORE_FLAG_VALUE == 1 || normalizep)
4225         /* If we are supposed to produce a 0/1 value, we want to do
4226            a logical shift from the sign bit to the low-order bit; for
4227            a -1/0 value, we do an arithmetic shift.  */
4228         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4229                             size_int (GET_MODE_BITSIZE (mode) - 1),
4230                             subtarget, normalizep != -1);
4231
4232       if (mode != target_mode)
4233         op0 = convert_modes (target_mode, mode, op0, 0);
4234
4235       return op0;
4236     }
4237
4238   if (icode != CODE_FOR_nothing)
4239     {
4240       insn_operand_predicate_fn pred;
4241
4242       /* We think we may be able to do this with a scc insn.  Emit the
4243          comparison and then the scc insn.
4244
4245          compare_from_rtx may call emit_queue, which would be deleted below
4246          if the scc insn fails.  So call it ourselves before setting LAST.
4247          Likewise for do_pending_stack_adjust.  */
4248
4249       emit_queue ();
4250       do_pending_stack_adjust ();
4251       last = get_last_insn ();
4252
4253       comparison
4254         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
4255       if (GET_CODE (comparison) == CONST_INT)
4256         return (comparison == const0_rtx ? const0_rtx
4257                 : normalizep == 1 ? const1_rtx
4258                 : normalizep == -1 ? constm1_rtx
4259                 : const_true_rtx);
4260
4261       /* If the code of COMPARISON doesn't match CODE, something is
4262          wrong; we can no longer be sure that we have the operation.
4263          We could handle this case, but it should not happen.  */
4264
4265       if (GET_CODE (comparison) != code)
4266         abort ();
4267
4268       /* Get a reference to the target in the proper mode for this insn.  */
4269       compare_mode = insn_data[(int) icode].operand[0].mode;
4270       subtarget = target;
4271       pred = insn_data[(int) icode].operand[0].predicate;
4272       if (preserve_subexpressions_p ()
4273           || ! (*pred) (subtarget, compare_mode))
4274         subtarget = gen_reg_rtx (compare_mode);
4275
4276       pattern = GEN_FCN (icode) (subtarget);
4277       if (pattern)
4278         {
4279           emit_insn (pattern);
4280
4281           /* If we are converting to a wider mode, first convert to
4282              TARGET_MODE, then normalize.  This produces better combining
4283              opportunities on machines that have a SIGN_EXTRACT when we are
4284              testing a single bit.  This mostly benefits the 68k.
4285
4286              If STORE_FLAG_VALUE does not have the sign bit set when
4287              interpreted in COMPARE_MODE, we can do this conversion as
4288              unsigned, which is usually more efficient.  */
4289           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4290             {
4291               convert_move (target, subtarget,
4292                             (GET_MODE_BITSIZE (compare_mode)
4293                              <= HOST_BITS_PER_WIDE_INT)
4294                             && 0 == (STORE_FLAG_VALUE
4295                                      & ((HOST_WIDE_INT) 1
4296                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4297               op0 = target;
4298               compare_mode = target_mode;
4299             }
4300           else
4301             op0 = subtarget;
4302
4303           /* If we want to keep subexpressions around, don't reuse our
4304              last target.  */
4305
4306           if (preserve_subexpressions_p ())
4307             subtarget = 0;
4308
4309           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4310              we don't have to do anything.  */
4311           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4312             ;
4313           /* STORE_FLAG_VALUE might be the most negative number, so write
4314              the comparison this way to avoid a compiler-time warning.  */
4315           else if (- normalizep == STORE_FLAG_VALUE)
4316             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4317
4318           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4319              makes it hard to use a value of just the sign bit due to
4320              ANSI integer constant typing rules.  */
4321           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4322                    && (STORE_FLAG_VALUE
4323                        & ((HOST_WIDE_INT) 1
4324                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4325             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4326                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4327                                 subtarget, normalizep == 1);
4328           else if (STORE_FLAG_VALUE & 1)
4329             {
4330               op0 = expand_and (op0, const1_rtx, subtarget);
4331               if (normalizep == -1)
4332                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4333             }
4334           else
4335             abort ();
4336
4337           /* If we were converting to a smaller mode, do the
4338              conversion now.  */
4339           if (target_mode != compare_mode)
4340             {
4341               convert_move (target, op0, 0);
4342               return target;
4343             }
4344           else
4345             return op0;
4346         }
4347     }
4348
4349   delete_insns_since (last);
4350
4351   /* If expensive optimizations, use different pseudo registers for each
4352      insn, instead of reusing the same pseudo.  This leads to better CSE,
4353      but slows down the compiler, since there are more pseudos */
4354   subtarget = (!flag_expensive_optimizations
4355                && (target_mode == mode)) ? target : NULL_RTX;
4356
4357   /* If we reached here, we can't do this with a scc insn.  However, there
4358      are some comparisons that can be done directly.  For example, if
4359      this is an equality comparison of integers, we can try to exclusive-or
4360      (or subtract) the two operands and use a recursive call to try the
4361      comparison with zero.  Don't do any of these cases if branches are
4362      very cheap.  */
4363
4364   if (BRANCH_COST > 0
4365       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4366       && op1 != const0_rtx)
4367     {
4368       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4369                           OPTAB_WIDEN);
4370
4371       if (tem == 0)
4372         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4373                             OPTAB_WIDEN);
4374       if (tem != 0)
4375         tem = emit_store_flag (target, code, tem, const0_rtx,
4376                                mode, unsignedp, normalizep);
4377       if (tem == 0)
4378         delete_insns_since (last);
4379       return tem;
4380     }
4381
4382   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4383      the constant zero.  Reject all other comparisons at this point.  Only
4384      do LE and GT if branches are expensive since they are expensive on
4385      2-operand machines.  */
4386
4387   if (BRANCH_COST == 0
4388       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4389       || (code != EQ && code != NE
4390           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4391     return 0;
4392
4393   /* See what we need to return.  We can only return a 1, -1, or the
4394      sign bit.  */
4395
4396   if (normalizep == 0)
4397     {
4398       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4399         normalizep = STORE_FLAG_VALUE;
4400
4401       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4402                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4403                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4404         ;
4405       else
4406         return 0;
4407     }
4408
4409   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4410      do the necessary operation below.  */
4411
4412   tem = 0;
4413
4414   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4415      the sign bit set.  */
4416
4417   if (code == LE)
4418     {
4419       /* This is destructive, so SUBTARGET can't be OP0.  */
4420       if (rtx_equal_p (subtarget, op0))
4421         subtarget = 0;
4422
4423       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4424                           OPTAB_WIDEN);
4425       if (tem)
4426         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4427                             OPTAB_WIDEN);
4428     }
4429
4430   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4431      number of bits in the mode of OP0, minus one.  */
4432
4433   if (code == GT)
4434     {
4435       if (rtx_equal_p (subtarget, op0))
4436         subtarget = 0;
4437
4438       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4439                           size_int (GET_MODE_BITSIZE (mode) - 1),
4440                           subtarget, 0);
4441       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4442                           OPTAB_WIDEN);
4443     }
4444
4445   if (code == EQ || code == NE)
4446     {
4447       /* For EQ or NE, one way to do the comparison is to apply an operation
4448          that converts the operand into a positive number if it is non-zero
4449          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4450          for NE we negate.  This puts the result in the sign bit.  Then we
4451          normalize with a shift, if needed.
4452
4453          Two operations that can do the above actions are ABS and FFS, so try
4454          them.  If that doesn't work, and MODE is smaller than a full word,
4455          we can use zero-extension to the wider mode (an unsigned conversion)
4456          as the operation.  */
4457
4458       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4459         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4460       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4461         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4462       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4463         {
4464           op0 = protect_from_queue (op0, 0);
4465           tem = convert_modes (word_mode, mode, op0, 1);
4466           mode = word_mode;
4467         }
4468
4469       if (tem != 0)
4470         {
4471           if (code == EQ)
4472             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4473                                 0, OPTAB_WIDEN);
4474           else
4475             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4476         }
4477
4478       /* If we couldn't do it that way, for NE we can "or" the two's complement
4479          of the value with itself.  For EQ, we take the one's complement of
4480          that "or", which is an extra insn, so we only handle EQ if branches
4481          are expensive.  */
4482
4483       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4484         {
4485           if (rtx_equal_p (subtarget, op0))
4486             subtarget = 0;
4487
4488           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4489           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4490                               OPTAB_WIDEN);
4491
4492           if (tem && code == EQ)
4493             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4494         }
4495     }
4496
4497   if (tem && normalizep)
4498     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4499                         size_int (GET_MODE_BITSIZE (mode) - 1),
4500                         subtarget, normalizep == 1);
4501
4502   if (tem)
4503     {
4504       if (GET_MODE (tem) != target_mode)
4505         {
4506           convert_move (target, tem, 0);
4507           tem = target;
4508         }
4509       else if (!subtarget)
4510         {
4511           emit_move_insn (target, tem);
4512           tem = target;
4513         }
4514     }
4515   else
4516     delete_insns_since (last);
4517
4518   return tem;
4519 }
4520
4521 /* Like emit_store_flag, but always succeeds.  */
4522
4523 rtx
4524 emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
4525      rtx target;
4526      enum rtx_code code;
4527      rtx op0, op1;
4528      enum machine_mode mode;
4529      int unsignedp;
4530      int normalizep;
4531 {
4532   rtx tem, label;
4533
4534   /* First see if emit_store_flag can do the job.  */
4535   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4536   if (tem != 0)
4537     return tem;
4538
4539   if (normalizep == 0)
4540     normalizep = 1;
4541
4542   /* If this failed, we have to do this with set/compare/jump/set code.  */
4543
4544   if (GET_CODE (target) != REG
4545       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4546     target = gen_reg_rtx (GET_MODE (target));
4547
4548   emit_move_insn (target, const1_rtx);
4549   label = gen_label_rtx ();
4550   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, 0,
4551                            NULL_RTX, label);
4552
4553   emit_move_insn (target, const0_rtx);
4554   emit_label (label);
4555
4556   return target;
4557 }
4558 \f
4559 /* Perform possibly multi-word comparison and conditional jump to LABEL
4560    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4561
4562    The algorithm is based on the code in expr.c:do_jump.
4563
4564    Note that this does not perform a general comparison.  Only variants
4565    generated within expmed.c are correctly handled, others abort (but could
4566    be handled if needed).  */
4567
4568 static void
4569 do_cmp_and_jump (arg1, arg2, op, mode, label)
4570      rtx arg1, arg2, label;
4571      enum rtx_code op;
4572      enum machine_mode mode;
4573 {
4574   /* If this mode is an integer too wide to compare properly,
4575      compare word by word.  Rely on cse to optimize constant cases.  */
4576
4577   if (GET_MODE_CLASS (mode) == MODE_INT
4578       && ! can_compare_p (op, mode, ccp_jump))
4579     {
4580       rtx label2 = gen_label_rtx ();
4581
4582       switch (op)
4583         {
4584         case LTU:
4585           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4586           break;
4587
4588         case LEU:
4589           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4590           break;
4591
4592         case LT:
4593           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4594           break;
4595
4596         case GT:
4597           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4598           break;
4599
4600         case GE:
4601           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4602           break;
4603
4604           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
4605              that's the only equality operations we do */
4606         case EQ:
4607           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4608             abort();
4609           do_jump_by_parts_equality_rtx (arg1, label2, label);
4610           break;
4611
4612         case NE:
4613           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4614             abort();
4615           do_jump_by_parts_equality_rtx (arg1, label, label2);
4616           break;
4617
4618         default:
4619           abort();
4620         }
4621
4622       emit_label (label2);
4623     }
4624   else
4625     {
4626       emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, 0, label);
4627     }
4628 }