gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "toplev.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "tm_p.h"
  30 #include "flags.h"
  31 #include "insn-config.h"
  32 #include "expr.h"
  33 #include "optabs.h"
  34 #include "real.h"
  35 #include "recog.h"
  36 #include "langhooks.h"
  37
  38 static void store_fixed_bit_field       PARAMS ((rtx, unsigned HOST_WIDE_INT,
  39                                                  unsigned HOST_WIDE_INT,
  40                                                  unsigned HOST_WIDE_INT, rtx));
  41 static void store_split_bit_field       PARAMS ((rtx, unsigned HOST_WIDE_INT,
  42                                                  unsigned HOST_WIDE_INT, rtx));
  43 static rtx extract_fixed_bit_field      PARAMS ((enum machine_mode, rtx,
  44                                                  unsigned HOST_WIDE_INT,
  45                                                  unsigned HOST_WIDE_INT,
  46                                                  unsigned HOST_WIDE_INT,
  47                                                  rtx, int));
  48 static rtx mask_rtx                     PARAMS ((enum machine_mode, int,
  49                                                  int, int));
  50 static rtx lshift_value                 PARAMS ((enum machine_mode, rtx,
  51                                                  int, int));
  52 static rtx extract_split_bit_field      PARAMS ((rtx, unsigned HOST_WIDE_INT,
  53                                                  unsigned HOST_WIDE_INT, int));
  54 static void do_cmp_and_jump             PARAMS ((rtx, rtx, enum rtx_code,
  55                                                  enum machine_mode, rtx));
  56
  57 /* Nonzero means divides or modulus operations are relatively cheap for
  58    powers of two, so don't use branches; emit the operation instead.
  59    Usually, this will mean that the MD file will emit non-branch
  60    sequences.  */
  61
  62 static int sdiv_pow2_cheap, smod_pow2_cheap;
  63
  64 #ifndef SLOW_UNALIGNED_ACCESS
  65 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  66 #endif
  67
  68 /* For compilers that support multiple targets with different word sizes,
  69    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  70    is the H8/300(H) compiler.  */
  71
  72 #ifndef MAX_BITS_PER_WORD
  73 #define MAX_BITS_PER_WORD BITS_PER_WORD
  74 #endif
  75
  76 /* Reduce conditional compilation elsewhere.  */
  77 #ifndef HAVE_insv
  78 #define HAVE_insv       0
  79 #define CODE_FOR_insv   CODE_FOR_nothing
  80 #define gen_insv(a,b,c,d) NULL_RTX
  81 #endif
  82 #ifndef HAVE_extv
  83 #define HAVE_extv       0
  84 #define CODE_FOR_extv   CODE_FOR_nothing
  85 #define gen_extv(a,b,c,d) NULL_RTX
  86 #endif
  87 #ifndef HAVE_extzv
  88 #define HAVE_extzv      0
  89 #define CODE_FOR_extzv  CODE_FOR_nothing
  90 #define gen_extzv(a,b,c,d) NULL_RTX
  91 #endif
  92
  93 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  94    shift count and some by mode.  */
  95 static int add_cost, negate_cost, zero_cost;
  96 static int shift_cost[MAX_BITS_PER_WORD];
  97 static int shiftadd_cost[MAX_BITS_PER_WORD];
  98 static int shiftsub_cost[MAX_BITS_PER_WORD];
  99 static int mul_cost[NUM_MACHINE_MODES];
 100 static int div_cost[NUM_MACHINE_MODES];
 101 static int mul_widen_cost[NUM_MACHINE_MODES];
 102 static int mul_highpart_cost[NUM_MACHINE_MODES];
 103
 104 void
 105 init_expmed ()
 106 {
 107   rtx reg, shift_insn, shiftadd_insn, shiftsub_insn;
 108   int dummy;
 109   int m;
 110   enum machine_mode mode, wider_mode;
 111
 112   start_sequence ();
 113
 114   /* This is "some random pseudo register" for purposes of calling recog
 115      to see what insns exist.  */
 116   reg = gen_rtx_REG (word_mode, 10000);
 117
 118   zero_cost = rtx_cost (const0_rtx, 0);
 119   add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
 120
 121   shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
 122                                        gen_rtx_ASHIFT (word_mode, reg,
 123                                                        const0_rtx)));
 124
 125   shiftadd_insn
 126     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 127                               gen_rtx_PLUS (word_mode,
 128                                             gen_rtx_MULT (word_mode,
 129                                                           reg, const0_rtx),
 130                                             reg)));
 131
 132   shiftsub_insn
 133     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 134                               gen_rtx_MINUS (word_mode,
 135                                              gen_rtx_MULT (word_mode,
 136                                                            reg, const0_rtx),
 137                                              reg)));
 138
 139   init_recog ();
 140
 141   shift_cost[0] = 0;
 142   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 143
 144   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 145     {
 146       rtx c_int = GEN_INT ((HOST_WIDE_INT) 1 << m);
 147       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 148
 149       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 150       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 151         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 152
 153       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) = c_int;
 154       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 155         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 156
 157       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) = c_int;
 158       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 159         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 160     }
 161
 162   negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
 163
 164   sdiv_pow2_cheap
 165     = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
 166        <= 2 * add_cost);
 167   smod_pow2_cheap
 168     = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
 169        <= 2 * add_cost);
 170
 171   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 172        mode != VOIDmode;
 173        mode = GET_MODE_WIDER_MODE (mode))
 174     {
 175       reg = gen_rtx_REG (mode, 10000);
 176       div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
 177       mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
 178       wider_mode = GET_MODE_WIDER_MODE (mode);
 179       if (wider_mode != VOIDmode)
 180         {
 181           mul_widen_cost[(int) wider_mode]
 182             = rtx_cost (gen_rtx_MULT (wider_mode,
 183                                       gen_rtx_ZERO_EXTEND (wider_mode, reg),
 184                                       gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 185                         SET);
 186           mul_highpart_cost[(int) mode]
 187             = rtx_cost (gen_rtx_TRUNCATE
 188                         (mode,
 189                          gen_rtx_LSHIFTRT (wider_mode,
 190                                            gen_rtx_MULT (wider_mode,
 191                                                          gen_rtx_ZERO_EXTEND
 192                                                          (wider_mode, reg),
 193                                                          gen_rtx_ZERO_EXTEND
 194                                                          (wider_mode, reg)),
 195                                            GEN_INT (GET_MODE_BITSIZE (mode)))),
 196                         SET);
 197         }
 198     }
 199
 200   end_sequence ();
 201 }
 202
 203 /* Return an rtx representing minus the value of X.
 204    MODE is the intended mode of the result,
 205    useful if X is a CONST_INT.  */
 206
 207 rtx
 208 negate_rtx (mode, x)
 209      enum machine_mode mode;
 210      rtx x;
 211 {
 212   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 213
 214   if (result == 0)
 215     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 216
 217   return result;
 218 }
 219
 220 /* Report on the availability of insv/extv/extzv and the desired mode
 221    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 222    is false; else the mode of the specified operand.  If OPNO is -1,
 223    all the caller cares about is whether the insn is available.  */
 224 enum machine_mode
 225 mode_for_extraction (pattern, opno)
 226      enum extraction_pattern pattern;
 227      int opno;
 228 {
 229   const struct insn_data *data;
 230
 231   switch (pattern)
 232     {
 233     case EP_insv:
 234       if (HAVE_insv)
 235         {
 236           data = &insn_data[CODE_FOR_insv];
 237           break;
 238         }
 239       return MAX_MACHINE_MODE;
 240
 241     case EP_extv:
 242       if (HAVE_extv)
 243         {
 244           data = &insn_data[CODE_FOR_extv];
 245           break;
 246         }
 247       return MAX_MACHINE_MODE;
 248
 249     case EP_extzv:
 250       if (HAVE_extzv)
 251         {
 252           data = &insn_data[CODE_FOR_extzv];
 253           break;
 254         }
 255       return MAX_MACHINE_MODE;
 256
 257     default:
 258       abort ();
 259     }
 260
 261   if (opno == -1)
 262     return VOIDmode;
 263
 264   /* Everyone who uses this function used to follow it with
 265      if (result == VOIDmode) result = word_mode; */
 266   if (data->operand[opno].mode == VOIDmode)
 267     return word_mode;
 268   return data->operand[opno].mode;
 269 }
 270
 271 \f
 272 /* Generate code to store value from rtx VALUE
 273    into a bit-field within structure STR_RTX
 274    containing BITSIZE bits starting at bit BITNUM.
 275    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 276    ALIGN is the alignment that STR_RTX is known to have.
 277    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 278
 279 /* ??? Note that there are two different ideas here for how
 280    to determine the size to count bits within, for a register.
 281    One is BITS_PER_WORD, and the other is the size of operand 3
 282    of the insv pattern.
 283
 284    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 285    else, we use the mode of operand 3.  */
 286
 287 rtx
 288 store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size)
 289      rtx str_rtx;
 290      unsigned HOST_WIDE_INT bitsize;
 291      unsigned HOST_WIDE_INT bitnum;
 292      enum machine_mode fieldmode;
 293      rtx value;
 294      HOST_WIDE_INT total_size;
 295 {
 296   unsigned int unit
 297     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 298   unsigned HOST_WIDE_INT offset = bitnum / unit;
 299   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 300   rtx op0 = str_rtx;
 301   int byte_offset;
 302
 303   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 304
 305   /* Discount the part of the structure before the desired byte.
 306      We need to know how many bytes are safe to reference after it.  */
 307   if (total_size >= 0)
 308     total_size -= (bitpos / BIGGEST_ALIGNMENT
 309                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 310
 311   while (GET_CODE (op0) == SUBREG)
 312     {
 313       /* The following line once was done only if WORDS_BIG_ENDIAN,
 314          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 315          meaningful at a much higher level; when structures are copied
 316          between memory and regs, the higher-numbered regs
 317          always get higher addresses.  */
 318       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 319       /* We used to adjust BITPOS here, but now we do the whole adjustment
 320          right after the loop.  */
 321       op0 = SUBREG_REG (op0);
 322     }
 323
 324   value = protect_from_queue (value, 0);
 325
 326   if (flag_force_mem)
 327     {
 328       int old_generating_concat_p = generating_concat_p;
 329       generating_concat_p = 0;
 330       value = force_not_mem (value);
 331       generating_concat_p = old_generating_concat_p;
 332     }
 333
 334   /* If the target is a register, overwriting the entire object, or storing
 335      a full-word or multi-word field can be done with just a SUBREG.
 336
 337      If the target is memory, storing any naturally aligned field can be
 338      done with a simple store.  For targets that support fast unaligned
 339      memory, any naturally sized, unit aligned field can be done directly.  */
 340
 341   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 342                 + (offset * UNITS_PER_WORD);
 343
 344   if (bitpos == 0
 345       && bitsize == GET_MODE_BITSIZE (fieldmode)
 346       && (GET_CODE (op0) != MEM
 347           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 348              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 349              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 350           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 351              || (offset * BITS_PER_UNIT % bitsize == 0
 352                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 353     {
 354       if (GET_MODE (op0) != fieldmode)
 355         {
 356           if (GET_CODE (op0) == SUBREG)
 357             {
 358               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 359                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 360                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 361                 op0 = SUBREG_REG (op0);
 362               else
 363                 /* Else we've got some float mode source being extracted into
 364                    a different float mode destination -- this combination of
 365                    subregs results in Severe Tire Damage.  */
 366                 abort ();
 367             }
 368           if (GET_CODE (op0) == REG)
 369             op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 370           else
 371             op0 = adjust_address (op0, fieldmode, offset);
 372         }
 373       emit_move_insn (op0, value);
 374       return value;
 375     }
 376
 377   /* Make sure we are playing with integral modes.  Pun with subregs
 378      if we aren't.  This must come after the entire register case above,
 379      since that case is valid for any mode.  The following cases are only
 380      valid for integral modes.  */
 381   {
 382     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 383     if (imode != GET_MODE (op0))
 384       {
 385         if (GET_CODE (op0) == MEM)
 386           op0 = adjust_address (op0, imode, 0);
 387         else if (imode != BLKmode)
 388           op0 = gen_lowpart (imode, op0);
 389         else
 390           abort ();
 391       }
 392   }
 393
 394   /* We may be accessing data outside the field, which means
 395      we can alias adjacent data.  */
 396   if (GET_CODE (op0) == MEM)
 397     {
 398       op0 = shallow_copy_rtx (op0);
 399       set_mem_alias_set (op0, 0);
 400       set_mem_expr (op0, 0);
 401     }
 402
 403   /* If OP0 is a register, BITPOS must count within a word.
 404      But as we have it, it counts within whatever size OP0 now has.
 405      On a bigendian machine, these are not the same, so convert.  */
 406   if (BYTES_BIG_ENDIAN
 407       && GET_CODE (op0) != MEM
 408       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 409     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 410
 411   /* Storing an lsb-aligned field in a register
 412      can be done with a movestrict instruction.  */
 413
 414   if (GET_CODE (op0) != MEM
 415       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 416       && bitsize == GET_MODE_BITSIZE (fieldmode)
 417       && (movstrict_optab->handlers[(int) fieldmode].insn_code
 418           != CODE_FOR_nothing))
 419     {
 420       int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 421
 422       /* Get appropriate low part of the value being stored.  */
 423       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 424         value = gen_lowpart (fieldmode, value);
 425       else if (!(GET_CODE (value) == SYMBOL_REF
 426                  || GET_CODE (value) == LABEL_REF
 427                  || GET_CODE (value) == CONST))
 428         value = convert_to_mode (fieldmode, value, 0);
 429
 430       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 431         value = copy_to_mode_reg (fieldmode, value);
 432
 433       if (GET_CODE (op0) == SUBREG)
 434         {
 435           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 436               || GET_MODE_CLASS (fieldmode) == MODE_INT
 437               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 438             op0 = SUBREG_REG (op0);
 439           else
 440             /* Else we've got some float mode source being extracted into
 441                a different float mode destination -- this combination of
 442                subregs results in Severe Tire Damage.  */
 443             abort ();
 444         }
 445
 446       emit_insn (GEN_FCN (icode)
 447                  (gen_rtx_SUBREG (fieldmode, op0,
 448                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 449                                   + (offset * UNITS_PER_WORD)),
 450                                   value));
 451
 452       return value;
 453     }
 454
 455   /* Handle fields bigger than a word.  */
 456
 457   if (bitsize > BITS_PER_WORD)
 458     {
 459       /* Here we transfer the words of the field
 460          in the order least significant first.
 461          This is because the most significant word is the one which may
 462          be less than full.
 463          However, only do that if the value is not BLKmode.  */
 464
 465       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 466       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 467       unsigned int i;
 468
 469       /* This is the mode we must force value to, so that there will be enough
 470          subwords to extract.  Note that fieldmode will often (always?) be
 471          VOIDmode, because that is what store_field uses to indicate that this
 472          is a bit field, but passing VOIDmode to operand_subword_force will
 473          result in an abort.  */
 474       fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 475
 476       for (i = 0; i < nwords; i++)
 477         {
 478           /* If I is 0, use the low-order word in both field and target;
 479              if I is 1, use the next to lowest word; and so on.  */
 480           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 481           unsigned int bit_offset = (backwards
 482                                      ? MAX ((int) bitsize - ((int) i + 1)
 483                                             * BITS_PER_WORD,
 484                                             0)
 485                                      : (int) i * BITS_PER_WORD);
 486
 487           store_bit_field (op0, MIN (BITS_PER_WORD,
 488                                      bitsize - i * BITS_PER_WORD),
 489                            bitnum + bit_offset, word_mode,
 490                            operand_subword_force (value, wordnum,
 491                                                   (GET_MODE (value) == VOIDmode
 492                                                    ? fieldmode
 493                                                    : GET_MODE (value))),
 494                            total_size);
 495         }
 496       return value;
 497     }
 498
 499   /* From here on we can assume that the field to be stored in is
 500      a full-word (whatever type that is), since it is shorter than a word.  */
 501
 502   /* OFFSET is the number of words or bytes (UNIT says which)
 503      from STR_RTX to the first word or byte containing part of the field.  */
 504
 505   if (GET_CODE (op0) != MEM)
 506     {
 507       if (offset != 0
 508           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 509         {
 510           if (GET_CODE (op0) != REG)
 511             {
 512               /* Since this is a destination (lvalue), we can't copy it to a
 513                  pseudo.  We can trivially remove a SUBREG that does not
 514                  change the size of the operand.  Such a SUBREG may have been
 515                  added above.  Otherwise, abort.  */
 516               if (GET_CODE (op0) == SUBREG
 517                   && (GET_MODE_SIZE (GET_MODE (op0))
 518                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 519                 op0 = SUBREG_REG (op0);
 520               else
 521                 abort ();
 522             }
 523           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 524                                 op0, (offset * UNITS_PER_WORD));
 525         }
 526       offset = 0;
 527     }
 528   else
 529     op0 = protect_from_queue (op0, 1);
 530
 531   /* If VALUE is a floating-point mode, access it as an integer of the
 532      corresponding size.  This can occur on a machine with 64 bit registers
 533      that uses SFmode for float.  This can also occur for unaligned float
 534      structure fields.  */
 535   if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 536       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 537     value = gen_lowpart ((GET_MODE (value) == VOIDmode
 538                           ? word_mode : int_mode_for_mode (GET_MODE (value))),
 539                          value);
 540
 541   /* Now OFFSET is nonzero only if OP0 is memory
 542      and is therefore always measured in bytes.  */
 543
 544   if (HAVE_insv
 545       && GET_MODE (value) != BLKmode
 546       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 547       /* Ensure insv's size is wide enough for this field.  */
 548       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 549       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 550             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 551     {
 552       int xbitpos = bitpos;
 553       rtx value1;
 554       rtx xop0 = op0;
 555       rtx last = get_last_insn ();
 556       rtx pat;
 557       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 558       int save_volatile_ok = volatile_ok;
 559
 560       volatile_ok = 1;
 561
 562       /* If this machine's insv can only insert into a register, copy OP0
 563          into a register and save it back later.  */
 564       /* This used to check flag_force_mem, but that was a serious
 565          de-optimization now that flag_force_mem is enabled by -O2.  */
 566       if (GET_CODE (op0) == MEM
 567           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 568                 (op0, VOIDmode)))
 569         {
 570           rtx tempreg;
 571           enum machine_mode bestmode;
 572
 573           /* Get the mode to use for inserting into this field.  If OP0 is
 574              BLKmode, get the smallest mode consistent with the alignment. If
 575              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 576              mode. Otherwise, use the smallest mode containing the field.  */
 577
 578           if (GET_MODE (op0) == BLKmode
 579               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 580             bestmode
 581               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 582                                MEM_VOLATILE_P (op0));
 583           else
 584             bestmode = GET_MODE (op0);
 585
 586           if (bestmode == VOIDmode
 587               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 588                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 589             goto insv_loses;
 590
 591           /* Adjust address to point to the containing unit of that mode.
 592              Compute offset as multiple of this unit, counting in bytes.  */
 593           unit = GET_MODE_BITSIZE (bestmode);
 594           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 595           bitpos = bitnum % unit;
 596           op0 = adjust_address (op0, bestmode,  offset);
 597
 598           /* Fetch that unit, store the bitfield in it, then store
 599              the unit.  */
 600           tempreg = copy_to_reg (op0);
 601           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 602                            total_size);
 603           emit_move_insn (op0, tempreg);
 604           return value;
 605         }
 606       volatile_ok = save_volatile_ok;
 607
 608       /* Add OFFSET into OP0's address.  */
 609       if (GET_CODE (xop0) == MEM)
 610         xop0 = adjust_address (xop0, byte_mode, offset);
 611
 612       /* If xop0 is a register, we need it in MAXMODE
 613          to make it acceptable to the format of insv.  */
 614       if (GET_CODE (xop0) == SUBREG)
 615         /* We can't just change the mode, because this might clobber op0,
 616            and we will need the original value of op0 if insv fails.  */
 617         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 618       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 619         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 620
 621       /* On big-endian machines, we count bits from the most significant.
 622          If the bit field insn does not, we must invert.  */
 623
 624       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 625         xbitpos = unit - bitsize - xbitpos;
 626
 627       /* We have been counting XBITPOS within UNIT.
 628          Count instead within the size of the register.  */
 629       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 630         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 631
 632       unit = GET_MODE_BITSIZE (maxmode);
 633
 634       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 635       value1 = value;
 636       if (GET_MODE (value) != maxmode)
 637         {
 638           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 639             {
 640               /* Optimization: Don't bother really extending VALUE
 641                  if it has all the bits we will actually use.  However,
 642                  if we must narrow it, be sure we do it correctly.  */
 643
 644               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 645                 {
 646                   rtx tmp;
 647
 648                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 649                   if (! tmp)
 650                     tmp = simplify_gen_subreg (maxmode,
 651                                                force_reg (GET_MODE (value),
 652                                                           value1),
 653                                                GET_MODE (value), 0);
 654                   value1 = tmp;
 655                 }
 656               else
 657                 value1 = gen_lowpart (maxmode, value1);
 658             }
 659           else if (GET_CODE (value) == CONST_INT)
 660             value1 = gen_int_mode (INTVAL (value), maxmode);
 661           else if (!CONSTANT_P (value))
 662             /* Parse phase is supposed to make VALUE's data type
 663                match that of the component reference, which is a type
 664                at least as wide as the field; so VALUE should have
 665                a mode that corresponds to that type.  */
 666             abort ();
 667         }
 668
 669       /* If this machine's insv insists on a register,
 670          get VALUE1 into a register.  */
 671       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 672              (value1, maxmode)))
 673         value1 = force_reg (maxmode, value1);
 674
 675       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 676       if (pat)
 677         emit_insn (pat);
 678       else
 679         {
 680           delete_insns_since (last);
 681           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 682         }
 683     }
 684   else
 685     insv_loses:
 686     /* Insv is not available; store using shifts and boolean ops.  */
 687     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 688   return value;
 689 }
 690 \f
 691 /* Use shifts and boolean operations to store VALUE
 692    into a bit field of width BITSIZE
 693    in a memory location specified by OP0 except offset by OFFSET bytes.
 694      (OFFSET must be 0 if OP0 is a register.)
 695    The field starts at position BITPOS within the byte.
 696     (If OP0 is a register, it may be a full word or a narrower mode,
 697      but BITPOS still counts within a full word,
 698      which is significant on bigendian machines.)
 699
 700    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 701
 702 static void
 703 store_fixed_bit_field (op0, offset, bitsize, bitpos, value)
 704      rtx op0;
 705      unsigned HOST_WIDE_INT offset, bitsize, bitpos;
 706      rtx value;
 707 {
 708   enum machine_mode mode;
 709   unsigned int total_bits = BITS_PER_WORD;
 710   rtx subtarget, temp;
 711   int all_zero = 0;
 712   int all_one = 0;
 713
 714   /* There is a case not handled here:
 715      a structure with a known alignment of just a halfword
 716      and a field split across two aligned halfwords within the structure.
 717      Or likewise a structure with a known alignment of just a byte
 718      and a field split across two bytes.
 719      Such cases are not supposed to be able to occur.  */
 720
 721   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 722     {
 723       if (offset != 0)
 724         abort ();
 725       /* Special treatment for a bit field split across two registers.  */
 726       if (bitsize + bitpos > BITS_PER_WORD)
 727         {
 728           store_split_bit_field (op0, bitsize, bitpos, value);
 729           return;
 730         }
 731     }
 732   else
 733     {
 734       /* Get the proper mode to use for this field.  We want a mode that
 735          includes the entire field.  If such a mode would be larger than
 736          a word, we won't be doing the extraction the normal way.
 737          We don't want a mode bigger than the destination.  */
 738
 739       mode = GET_MODE (op0);
 740       if (GET_MODE_BITSIZE (mode) == 0
 741           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 742         mode = word_mode;
 743       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 744                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 745
 746       if (mode == VOIDmode)
 747         {
 748           /* The only way this should occur is if the field spans word
 749              boundaries.  */
 750           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 751                                  value);
 752           return;
 753         }
 754
 755       total_bits = GET_MODE_BITSIZE (mode);
 756
 757       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 758          be in the range 0 to total_bits-1, and put any excess bytes in
 759          OFFSET.  */
 760       if (bitpos >= total_bits)
 761         {
 762           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 763           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 764                      * BITS_PER_UNIT);
 765         }
 766
 767       /* Get ref to an aligned byte, halfword, or word containing the field.
 768          Adjust BITPOS to be position within a word,
 769          and OFFSET to be the offset of that word.
 770          Then alter OP0 to refer to that word.  */
 771       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 772       offset -= (offset % (total_bits / BITS_PER_UNIT));
 773       op0 = adjust_address (op0, mode, offset);
 774     }
 775
 776   mode = GET_MODE (op0);
 777
 778   /* Now MODE is either some integral mode for a MEM as OP0,
 779      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 780      The bit field is contained entirely within OP0.
 781      BITPOS is the starting bit number within OP0.
 782      (OP0's mode may actually be narrower than MODE.)  */
 783
 784   if (BYTES_BIG_ENDIAN)
 785       /* BITPOS is the distance between our msb
 786          and that of the containing datum.
 787          Convert it to the distance from the lsb.  */
 788       bitpos = total_bits - bitsize - bitpos;
 789
 790   /* Now BITPOS is always the distance between our lsb
 791      and that of OP0.  */
 792
 793   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 794      we must first convert its mode to MODE.  */
 795
 796   if (GET_CODE (value) == CONST_INT)
 797     {
 798       HOST_WIDE_INT v = INTVAL (value);
 799
 800       if (bitsize < HOST_BITS_PER_WIDE_INT)
 801         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 802
 803       if (v == 0)
 804         all_zero = 1;
 805       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 806                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 807                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 808         all_one = 1;
 809
 810       value = lshift_value (mode, value, bitpos, bitsize);
 811     }
 812   else
 813     {
 814       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 815                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 816
 817       if (GET_MODE (value) != mode)
 818         {
 819           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 820               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 821             value = gen_lowpart (mode, value);
 822           else
 823             value = convert_to_mode (mode, value, 1);
 824         }
 825
 826       if (must_and)
 827         value = expand_binop (mode, and_optab, value,
 828                               mask_rtx (mode, 0, bitsize, 0),
 829                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 830       if (bitpos > 0)
 831         value = expand_shift (LSHIFT_EXPR, mode, value,
 832                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 833     }
 834
 835   /* Now clear the chosen bits in OP0,
 836      except that if VALUE is -1 we need not bother.  */
 837
 838   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 839
 840   if (! all_one)
 841     {
 842       temp = expand_binop (mode, and_optab, op0,
 843                            mask_rtx (mode, bitpos, bitsize, 1),
 844                            subtarget, 1, OPTAB_LIB_WIDEN);
 845       subtarget = temp;
 846     }
 847   else
 848     temp = op0;
 849
 850   /* Now logical-or VALUE into OP0, unless it is zero.  */
 851
 852   if (! all_zero)
 853     temp = expand_binop (mode, ior_optab, temp, value,
 854                          subtarget, 1, OPTAB_LIB_WIDEN);
 855   if (op0 != temp)
 856     emit_move_insn (op0, temp);
 857 }
 858 \f
 859 /* Store a bit field that is split across multiple accessible memory objects.
 860
 861    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 862    BITSIZE is the field width; BITPOS the position of its first bit
 863    (within the word).
 864    VALUE is the value to store.
 865
 866    This does not yet handle fields wider than BITS_PER_WORD.  */
 867
 868 static void
 869 store_split_bit_field (op0, bitsize, bitpos, value)
 870      rtx op0;
 871      unsigned HOST_WIDE_INT bitsize, bitpos;
 872      rtx value;
 873 {
 874   unsigned int unit;
 875   unsigned int bitsdone = 0;
 876
 877   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 878      much at a time.  */
 879   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 880     unit = BITS_PER_WORD;
 881   else
 882     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 883
 884   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 885      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 886      that VALUE might be a floating-point constant.  */
 887   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 888     {
 889       rtx word = gen_lowpart_common (word_mode, value);
 890
 891       if (word && (value != word))
 892         value = word;
 893       else
 894         value = gen_lowpart_common (word_mode,
 895                                     force_reg (GET_MODE (value) != VOIDmode
 896                                                ? GET_MODE (value)
 897                                                : word_mode, value));
 898     }
 899   else if (GET_CODE (value) == ADDRESSOF)
 900     value = copy_to_reg (value);
 901
 902   while (bitsdone < bitsize)
 903     {
 904       unsigned HOST_WIDE_INT thissize;
 905       rtx part, word;
 906       unsigned HOST_WIDE_INT thispos;
 907       unsigned HOST_WIDE_INT offset;
 908
 909       offset = (bitpos + bitsdone) / unit;
 910       thispos = (bitpos + bitsdone) % unit;
 911
 912       /* THISSIZE must not overrun a word boundary.  Otherwise,
 913          store_fixed_bit_field will call us again, and we will mutually
 914          recurse forever.  */
 915       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 916       thissize = MIN (thissize, unit - thispos);
 917
 918       if (BYTES_BIG_ENDIAN)
 919         {
 920           int total_bits;
 921
 922           /* We must do an endian conversion exactly the same way as it is
 923              done in extract_bit_field, so that the two calls to
 924              extract_fixed_bit_field will have comparable arguments.  */
 925           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 926             total_bits = BITS_PER_WORD;
 927           else
 928             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 929
 930           /* Fetch successively less significant portions.  */
 931           if (GET_CODE (value) == CONST_INT)
 932             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 933                              >> (bitsize - bitsdone - thissize))
 934                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 935           else
 936             /* The args are chosen so that the last part includes the
 937                lsb.  Give extract_bit_field the value it needs (with
 938                endianness compensation) to fetch the piece we want.  */
 939             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 940                                             total_bits - bitsize + bitsdone,
 941                                             NULL_RTX, 1);
 942         }
 943       else
 944         {
 945           /* Fetch successively more significant portions.  */
 946           if (GET_CODE (value) == CONST_INT)
 947             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 948                              >> bitsdone)
 949                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 950           else
 951             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 952                                             bitsdone, NULL_RTX, 1);
 953         }
 954
 955       /* If OP0 is a register, then handle OFFSET here.
 956
 957          When handling multiword bitfields, extract_bit_field may pass
 958          down a word_mode SUBREG of a larger REG for a bitfield that actually
 959          crosses a word boundary.  Thus, for a SUBREG, we must find
 960          the current word starting from the base register.  */
 961       if (GET_CODE (op0) == SUBREG)
 962         {
 963           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
 964           word = operand_subword_force (SUBREG_REG (op0), word_offset,
 965                                         GET_MODE (SUBREG_REG (op0)));
 966           offset = 0;
 967         }
 968       else if (GET_CODE (op0) == REG)
 969         {
 970           word = operand_subword_force (op0, offset, GET_MODE (op0));
 971           offset = 0;
 972         }
 973       else
 974         word = op0;
 975
 976       /* OFFSET is in UNITs, and UNIT is in bits.
 977          store_fixed_bit_field wants offset in bytes.  */
 978       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
 979                              thispos, part);
 980       bitsdone += thissize;
 981     }
 982 }
 983 \f
 984 /* Generate code to extract a byte-field from STR_RTX
 985    containing BITSIZE bits, starting at BITNUM,
 986    and put it in TARGET if possible (if TARGET is nonzero).
 987    Regardless of TARGET, we return the rtx for where the value is placed.
 988    It may be a QUEUED.
 989
 990    STR_RTX is the structure containing the byte (a REG or MEM).
 991    UNSIGNEDP is nonzero if this is an unsigned bit field.
 992    MODE is the natural mode of the field value once extracted.
 993    TMODE is the mode the caller would like the value to have;
 994    but the value may be returned with type MODE instead.
 995
 996    TOTAL_SIZE is the size in bytes of the containing structure,
 997    or -1 if varying.
 998
 999    If a TARGET is specified and we can store in it at no extra cost,
1000    we do so, and return TARGET.
1001    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1002    if they are equally easy.  */
1003
1004 rtx
1005 extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
1006                    target, mode, tmode, total_size)
1007      rtx str_rtx;
1008      unsigned HOST_WIDE_INT bitsize;
1009      unsigned HOST_WIDE_INT bitnum;
1010      int unsignedp;
1011      rtx target;
1012      enum machine_mode mode, tmode;
1013      HOST_WIDE_INT total_size;
1014 {
1015   unsigned int unit
1016     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
1017   unsigned HOST_WIDE_INT offset = bitnum / unit;
1018   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1019   rtx op0 = str_rtx;
1020   rtx spec_target = target;
1021   rtx spec_target_subreg = 0;
1022   enum machine_mode int_mode;
1023   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1024   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1025   enum machine_mode mode1;
1026   int byte_offset;
1027
1028   /* Discount the part of the structure before the desired byte.
1029      We need to know how many bytes are safe to reference after it.  */
1030   if (total_size >= 0)
1031     total_size -= (bitpos / BIGGEST_ALIGNMENT
1032                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
1033
1034   if (tmode == VOIDmode)
1035     tmode = mode;
1036
1037   while (GET_CODE (op0) == SUBREG)
1038     {
1039       bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1040       if (bitpos > unit)
1041         {
1042           offset += (bitpos / unit);
1043           bitpos %= unit;
1044         }
1045       op0 = SUBREG_REG (op0);
1046     }
1047
1048   if (GET_CODE (op0) == REG
1049       && mode == GET_MODE (op0)
1050       && bitnum == 0
1051       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1052     {
1053       /* We're trying to extract a full register from itself.  */
1054       return op0;
1055     }
1056
1057   /* Make sure we are playing with integral modes.  Pun with subregs
1058      if we aren't.  */
1059   {
1060     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1061     if (imode != GET_MODE (op0))
1062       {
1063         if (GET_CODE (op0) == MEM)
1064           op0 = adjust_address (op0, imode, 0);
1065         else if (imode != BLKmode)
1066           op0 = gen_lowpart (imode, op0);
1067         else
1068           abort ();
1069       }
1070   }
1071
1072   /* We may be accessing data outside the field, which means
1073      we can alias adjacent data.  */
1074   if (GET_CODE (op0) == MEM)
1075     {
1076       op0 = shallow_copy_rtx (op0);
1077       set_mem_alias_set (op0, 0);
1078       set_mem_expr (op0, 0);
1079     }
1080
1081   /* Extraction of a full-word or multi-word value from a structure
1082      in a register or aligned memory can be done with just a SUBREG.
1083      A subword value in the least significant part of a register
1084      can also be extracted with a SUBREG.  For this, we need the
1085      byte offset of the value in op0.  */
1086
1087   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1088
1089   /* If OP0 is a register, BITPOS must count within a word.
1090      But as we have it, it counts within whatever size OP0 now has.
1091      On a bigendian machine, these are not the same, so convert.  */
1092   if (BYTES_BIG_ENDIAN
1093       && GET_CODE (op0) != MEM
1094       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1095     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1096
1097   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1098      If that's wrong, the solution is to test for it and set TARGET to 0
1099      if needed.  */
1100
1101   mode1  = (VECTOR_MODE_P (tmode)
1102             ? mode
1103             : mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0));
1104
1105   if (((GET_CODE (op0) != MEM
1106         && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1107                                   GET_MODE_BITSIZE (GET_MODE (op0)))
1108         && GET_MODE_SIZE (mode1) != 0
1109         && byte_offset % GET_MODE_SIZE (mode1) == 0)
1110        || (GET_CODE (op0) == MEM
1111            && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1112                || (offset * BITS_PER_UNIT % bitsize == 0
1113                    && MEM_ALIGN (op0) % bitsize == 0))))
1114       && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1115            && bitpos % BITS_PER_WORD == 0)
1116           || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
1117               /* ??? The big endian test here is wrong.  This is correct
1118                  if the value is in a register, and if mode_for_size is not
1119                  the same mode as op0.  This causes us to get unnecessarily
1120                  inefficient code from the Thumb port when -mbig-endian.  */
1121               && (BYTES_BIG_ENDIAN
1122                   ? bitpos + bitsize == BITS_PER_WORD
1123                   : bitpos == 0))))
1124     {
1125       if (mode1 != GET_MODE (op0))
1126         {
1127           if (GET_CODE (op0) == SUBREG)
1128             {
1129               if (GET_MODE (SUBREG_REG (op0)) == mode1
1130                   || GET_MODE_CLASS (mode1) == MODE_INT
1131                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1132                 op0 = SUBREG_REG (op0);
1133               else
1134                 /* Else we've got some float mode source being extracted into
1135                    a different float mode destination -- this combination of
1136                    subregs results in Severe Tire Damage.  */
1137                 goto no_subreg_mode_swap;
1138             }
1139           if (GET_CODE (op0) == REG)
1140             op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1141           else
1142             op0 = adjust_address (op0, mode1, offset);
1143         }
1144       if (mode1 != mode)
1145         return convert_to_mode (tmode, op0, unsignedp);
1146       return op0;
1147     }
1148  no_subreg_mode_swap:
1149
1150   /* Handle fields bigger than a word.  */
1151
1152   if (bitsize > BITS_PER_WORD)
1153     {
1154       /* Here we transfer the words of the field
1155          in the order least significant first.
1156          This is because the most significant word is the one which may
1157          be less than full.  */
1158
1159       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1160       unsigned int i;
1161
1162       if (target == 0 || GET_CODE (target) != REG)
1163         target = gen_reg_rtx (mode);
1164
1165       /* Indicate for flow that the entire target reg is being set.  */
1166       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1167
1168       for (i = 0; i < nwords; i++)
1169         {
1170           /* If I is 0, use the low-order word in both field and target;
1171              if I is 1, use the next to lowest word; and so on.  */
1172           /* Word number in TARGET to use.  */
1173           unsigned int wordnum
1174             = (WORDS_BIG_ENDIAN
1175                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1176                : i);
1177           /* Offset from start of field in OP0.  */
1178           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1179                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1180                                                 * (int) BITS_PER_WORD))
1181                                      : (int) i * BITS_PER_WORD);
1182           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1183           rtx result_part
1184             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1185                                            bitsize - i * BITS_PER_WORD),
1186                                  bitnum + bit_offset, 1, target_part, mode,
1187                                  word_mode, total_size);
1188
1189           if (target_part == 0)
1190             abort ();
1191
1192           if (result_part != target_part)
1193             emit_move_insn (target_part, result_part);
1194         }
1195
1196       if (unsignedp)
1197         {
1198           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1199              need to be zero'd out.  */
1200           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1201             {
1202               unsigned int i, total_words;
1203
1204               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1205               for (i = nwords; i < total_words; i++)
1206                 emit_move_insn
1207                   (operand_subword (target,
1208                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1209                                     1, VOIDmode),
1210                    const0_rtx);
1211             }
1212           return target;
1213         }
1214
1215       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1216       target = expand_shift (LSHIFT_EXPR, mode, target,
1217                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1218                              NULL_RTX, 0);
1219       return expand_shift (RSHIFT_EXPR, mode, target,
1220                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1221                            NULL_RTX, 0);
1222     }
1223
1224   /* From here on we know the desired field is smaller than a word.  */
1225
1226   /* Check if there is a correspondingly-sized integer field, so we can
1227      safely extract it as one size of integer, if necessary; then
1228      truncate or extend to the size that is wanted; then use SUBREGs or
1229      convert_to_mode to get one of the modes we really wanted.  */
1230
1231   int_mode = int_mode_for_mode (tmode);
1232   if (int_mode == BLKmode)
1233     int_mode = int_mode_for_mode (mode);
1234   if (int_mode == BLKmode)
1235     abort ();    /* Should probably push op0 out to memory and then
1236                     do a load.  */
1237
1238   /* OFFSET is the number of words or bytes (UNIT says which)
1239      from STR_RTX to the first word or byte containing part of the field.  */
1240
1241   if (GET_CODE (op0) != MEM)
1242     {
1243       if (offset != 0
1244           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1245         {
1246           if (GET_CODE (op0) != REG)
1247             op0 = copy_to_reg (op0);
1248           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1249                                 op0, (offset * UNITS_PER_WORD));
1250         }
1251       offset = 0;
1252     }
1253   else
1254     op0 = protect_from_queue (str_rtx, 1);
1255
1256   /* Now OFFSET is nonzero only for memory operands.  */
1257
1258   if (unsignedp)
1259     {
1260       if (HAVE_extzv
1261           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1262           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1263                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1264         {
1265           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1266           rtx bitsize_rtx, bitpos_rtx;
1267           rtx last = get_last_insn ();
1268           rtx xop0 = op0;
1269           rtx xtarget = target;
1270           rtx xspec_target = spec_target;
1271           rtx xspec_target_subreg = spec_target_subreg;
1272           rtx pat;
1273           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1274
1275           if (GET_CODE (xop0) == MEM)
1276             {
1277               int save_volatile_ok = volatile_ok;
1278               volatile_ok = 1;
1279
1280               /* Is the memory operand acceptable?  */
1281               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1282                      (xop0, GET_MODE (xop0))))
1283                 {
1284                   /* No, load into a reg and extract from there.  */
1285                   enum machine_mode bestmode;
1286
1287                   /* Get the mode to use for inserting into this field.  If
1288                      OP0 is BLKmode, get the smallest mode consistent with the
1289                      alignment. If OP0 is a non-BLKmode object that is no
1290                      wider than MAXMODE, use its mode. Otherwise, use the
1291                      smallest mode containing the field.  */
1292
1293                   if (GET_MODE (xop0) == BLKmode
1294                       || (GET_MODE_SIZE (GET_MODE (op0))
1295                           > GET_MODE_SIZE (maxmode)))
1296                     bestmode = get_best_mode (bitsize, bitnum,
1297                                               MEM_ALIGN (xop0), maxmode,
1298                                               MEM_VOLATILE_P (xop0));
1299                   else
1300                     bestmode = GET_MODE (xop0);
1301
1302                   if (bestmode == VOIDmode
1303                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1304                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1305                     goto extzv_loses;
1306
1307                   /* Compute offset as multiple of this unit,
1308                      counting in bytes.  */
1309                   unit = GET_MODE_BITSIZE (bestmode);
1310                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1311                   xbitpos = bitnum % unit;
1312                   xop0 = adjust_address (xop0, bestmode, xoffset);
1313
1314                   /* Fetch it to a register in that size.  */
1315                   xop0 = force_reg (bestmode, xop0);
1316
1317                   /* XBITPOS counts within UNIT, which is what is expected.  */
1318                 }
1319               else
1320                 /* Get ref to first byte containing part of the field.  */
1321                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1322
1323               volatile_ok = save_volatile_ok;
1324             }
1325
1326           /* If op0 is a register, we need it in MAXMODE (which is usually
1327              SImode). to make it acceptable to the format of extzv.  */
1328           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1329             goto extzv_loses;
1330           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1331             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1332
1333           /* On big-endian machines, we count bits from the most significant.
1334              If the bit field insn does not, we must invert.  */
1335           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1336             xbitpos = unit - bitsize - xbitpos;
1337
1338           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1339           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1340             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1341
1342           unit = GET_MODE_BITSIZE (maxmode);
1343
1344           if (xtarget == 0
1345               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1346             xtarget = xspec_target = gen_reg_rtx (tmode);
1347
1348           if (GET_MODE (xtarget) != maxmode)
1349             {
1350               if (GET_CODE (xtarget) == REG)
1351                 {
1352                   int wider = (GET_MODE_SIZE (maxmode)
1353                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1354                   xtarget = gen_lowpart (maxmode, xtarget);
1355                   if (wider)
1356                     xspec_target_subreg = xtarget;
1357                 }
1358               else
1359                 xtarget = gen_reg_rtx (maxmode);
1360             }
1361
1362           /* If this machine's extzv insists on a register target,
1363              make sure we have one.  */
1364           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1365                  (xtarget, maxmode)))
1366             xtarget = gen_reg_rtx (maxmode);
1367
1368           bitsize_rtx = GEN_INT (bitsize);
1369           bitpos_rtx = GEN_INT (xbitpos);
1370
1371           pat = gen_extzv (protect_from_queue (xtarget, 1),
1372                            xop0, bitsize_rtx, bitpos_rtx);
1373           if (pat)
1374             {
1375               emit_insn (pat);
1376               target = xtarget;
1377               spec_target = xspec_target;
1378               spec_target_subreg = xspec_target_subreg;
1379             }
1380           else
1381             {
1382               delete_insns_since (last);
1383               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1384                                                 bitpos, target, 1);
1385             }
1386         }
1387       else
1388       extzv_loses:
1389         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1390                                           bitpos, target, 1);
1391     }
1392   else
1393     {
1394       if (HAVE_extv
1395           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1396           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1397                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1398         {
1399           int xbitpos = bitpos, xoffset = offset;
1400           rtx bitsize_rtx, bitpos_rtx;
1401           rtx last = get_last_insn ();
1402           rtx xop0 = op0, xtarget = target;
1403           rtx xspec_target = spec_target;
1404           rtx xspec_target_subreg = spec_target_subreg;
1405           rtx pat;
1406           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1407
1408           if (GET_CODE (xop0) == MEM)
1409             {
1410               /* Is the memory operand acceptable?  */
1411               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1412                      (xop0, GET_MODE (xop0))))
1413                 {
1414                   /* No, load into a reg and extract from there.  */
1415                   enum machine_mode bestmode;
1416
1417                   /* Get the mode to use for inserting into this field.  If
1418                      OP0 is BLKmode, get the smallest mode consistent with the
1419                      alignment. If OP0 is a non-BLKmode object that is no
1420                      wider than MAXMODE, use its mode. Otherwise, use the
1421                      smallest mode containing the field.  */
1422
1423                   if (GET_MODE (xop0) == BLKmode
1424                       || (GET_MODE_SIZE (GET_MODE (op0))
1425                           > GET_MODE_SIZE (maxmode)))
1426                     bestmode = get_best_mode (bitsize, bitnum,
1427                                               MEM_ALIGN (xop0), maxmode,
1428                                               MEM_VOLATILE_P (xop0));
1429                   else
1430                     bestmode = GET_MODE (xop0);
1431
1432                   if (bestmode == VOIDmode
1433                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1434                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1435                     goto extv_loses;
1436
1437                   /* Compute offset as multiple of this unit,
1438                      counting in bytes.  */
1439                   unit = GET_MODE_BITSIZE (bestmode);
1440                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1441                   xbitpos = bitnum % unit;
1442                   xop0 = adjust_address (xop0, bestmode, xoffset);
1443
1444                   /* Fetch it to a register in that size.  */
1445                   xop0 = force_reg (bestmode, xop0);
1446
1447                   /* XBITPOS counts within UNIT, which is what is expected.  */
1448                 }
1449               else
1450                 /* Get ref to first byte containing part of the field.  */
1451                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1452             }
1453
1454           /* If op0 is a register, we need it in MAXMODE (which is usually
1455              SImode) to make it acceptable to the format of extv.  */
1456           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1457             goto extv_loses;
1458           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1459             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1460
1461           /* On big-endian machines, we count bits from the most significant.
1462              If the bit field insn does not, we must invert.  */
1463           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1464             xbitpos = unit - bitsize - xbitpos;
1465
1466           /* XBITPOS counts within a size of UNIT.
1467              Adjust to count within a size of MAXMODE.  */
1468           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1469             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1470
1471           unit = GET_MODE_BITSIZE (maxmode);
1472
1473           if (xtarget == 0
1474               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1475             xtarget = xspec_target = gen_reg_rtx (tmode);
1476
1477           if (GET_MODE (xtarget) != maxmode)
1478             {
1479               if (GET_CODE (xtarget) == REG)
1480                 {
1481                   int wider = (GET_MODE_SIZE (maxmode)
1482                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1483                   xtarget = gen_lowpart (maxmode, xtarget);
1484                   if (wider)
1485                     xspec_target_subreg = xtarget;
1486                 }
1487               else
1488                 xtarget = gen_reg_rtx (maxmode);
1489             }
1490
1491           /* If this machine's extv insists on a register target,
1492              make sure we have one.  */
1493           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1494                  (xtarget, maxmode)))
1495             xtarget = gen_reg_rtx (maxmode);
1496
1497           bitsize_rtx = GEN_INT (bitsize);
1498           bitpos_rtx = GEN_INT (xbitpos);
1499
1500           pat = gen_extv (protect_from_queue (xtarget, 1),
1501                           xop0, bitsize_rtx, bitpos_rtx);
1502           if (pat)
1503             {
1504               emit_insn (pat);
1505               target = xtarget;
1506               spec_target = xspec_target;
1507               spec_target_subreg = xspec_target_subreg;
1508             }
1509           else
1510             {
1511               delete_insns_since (last);
1512               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1513                                                 bitpos, target, 0);
1514             }
1515         }
1516       else
1517       extv_loses:
1518         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1519                                           bitpos, target, 0);
1520     }
1521   if (target == spec_target)
1522     return target;
1523   if (target == spec_target_subreg)
1524     return spec_target;
1525   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1526     {
1527       /* If the target mode is floating-point, first convert to the
1528          integer mode of that size and then access it as a floating-point
1529          value via a SUBREG.  */
1530       if (GET_MODE_CLASS (tmode) != MODE_INT
1531           && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1532         {
1533           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1534                                                    MODE_INT, 0),
1535                                     target, unsignedp);
1536           return gen_lowpart (tmode, target);
1537         }
1538       else
1539         return convert_to_mode (tmode, target, unsignedp);
1540     }
1541   return target;
1542 }
1543 \f
1544 /* Extract a bit field using shifts and boolean operations
1545    Returns an rtx to represent the value.
1546    OP0 addresses a register (word) or memory (byte).
1547    BITPOS says which bit within the word or byte the bit field starts in.
1548    OFFSET says how many bytes farther the bit field starts;
1549     it is 0 if OP0 is a register.
1550    BITSIZE says how many bits long the bit field is.
1551     (If OP0 is a register, it may be narrower than a full word,
1552      but BITPOS still counts within a full word,
1553      which is significant on bigendian machines.)
1554
1555    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1556    If TARGET is nonzero, attempts to store the value there
1557    and return TARGET, but this is not guaranteed.
1558    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1559
1560 static rtx
1561 extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1562                          target, unsignedp)
1563      enum machine_mode tmode;
1564      rtx op0, target;
1565      unsigned HOST_WIDE_INT offset, bitsize, bitpos;
1566      int unsignedp;
1567 {
1568   unsigned int total_bits = BITS_PER_WORD;
1569   enum machine_mode mode;
1570
1571   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1572     {
1573       /* Special treatment for a bit field split across two registers.  */
1574       if (bitsize + bitpos > BITS_PER_WORD)
1575         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1576     }
1577   else
1578     {
1579       /* Get the proper mode to use for this field.  We want a mode that
1580          includes the entire field.  If such a mode would be larger than
1581          a word, we won't be doing the extraction the normal way.  */
1582
1583       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1584                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1585
1586       if (mode == VOIDmode)
1587         /* The only way this should occur is if the field spans word
1588            boundaries.  */
1589         return extract_split_bit_field (op0, bitsize,
1590                                         bitpos + offset * BITS_PER_UNIT,
1591                                         unsignedp);
1592
1593       total_bits = GET_MODE_BITSIZE (mode);
1594
1595       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1596          be in the range 0 to total_bits-1, and put any excess bytes in
1597          OFFSET.  */
1598       if (bitpos >= total_bits)
1599         {
1600           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1601           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1602                      * BITS_PER_UNIT);
1603         }
1604
1605       /* Get ref to an aligned byte, halfword, or word containing the field.
1606          Adjust BITPOS to be position within a word,
1607          and OFFSET to be the offset of that word.
1608          Then alter OP0 to refer to that word.  */
1609       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1610       offset -= (offset % (total_bits / BITS_PER_UNIT));
1611       op0 = adjust_address (op0, mode, offset);
1612     }
1613
1614   mode = GET_MODE (op0);
1615
1616   if (BYTES_BIG_ENDIAN)
1617     /* BITPOS is the distance between our msb and that of OP0.
1618        Convert it to the distance from the lsb.  */
1619     bitpos = total_bits - bitsize - bitpos;
1620
1621   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1622      We have reduced the big-endian case to the little-endian case.  */
1623
1624   if (unsignedp)
1625     {
1626       if (bitpos)
1627         {
1628           /* If the field does not already start at the lsb,
1629              shift it so it does.  */
1630           tree amount = build_int_2 (bitpos, 0);
1631           /* Maybe propagate the target for the shift.  */
1632           /* But not if we will return it--could confuse integrate.c.  */
1633           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1634                            && !REG_FUNCTION_VALUE_P (target)
1635                            ? target : 0);
1636           if (tmode != mode) subtarget = 0;
1637           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1638         }
1639       /* Convert the value to the desired mode.  */
1640       if (mode != tmode)
1641         op0 = convert_to_mode (tmode, op0, 1);
1642
1643       /* Unless the msb of the field used to be the msb when we shifted,
1644          mask out the upper bits.  */
1645
1646       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1647         return expand_binop (GET_MODE (op0), and_optab, op0,
1648                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1649                              target, 1, OPTAB_LIB_WIDEN);
1650       return op0;
1651     }
1652
1653   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1654      then arithmetic-shift its lsb to the lsb of the word.  */
1655   op0 = force_reg (mode, op0);
1656   if (mode != tmode)
1657     target = 0;
1658
1659   /* Find the narrowest integer mode that contains the field.  */
1660
1661   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1662        mode = GET_MODE_WIDER_MODE (mode))
1663     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1664       {
1665         op0 = convert_to_mode (mode, op0, 0);
1666         break;
1667       }
1668
1669   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1670     {
1671       tree amount
1672         = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1673       /* Maybe propagate the target for the shift.  */
1674       /* But not if we will return the result--could confuse integrate.c.  */
1675       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1676                        && ! REG_FUNCTION_VALUE_P (target)
1677                        ? target : 0);
1678       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1679     }
1680
1681   return expand_shift (RSHIFT_EXPR, mode, op0,
1682                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1683                        target, 0);
1684 }
1685 \f
1686 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1687    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1688    complement of that if COMPLEMENT.  The mask is truncated if
1689    necessary to the width of mode MODE.  The mask is zero-extended if
1690    BITSIZE+BITPOS is too small for MODE.  */
1691
1692 static rtx
1693 mask_rtx (mode, bitpos, bitsize, complement)
1694      enum machine_mode mode;
1695      int bitpos, bitsize, complement;
1696 {
1697   HOST_WIDE_INT masklow, maskhigh;
1698
1699   if (bitpos < HOST_BITS_PER_WIDE_INT)
1700     masklow = (HOST_WIDE_INT) -1 << bitpos;
1701   else
1702     masklow = 0;
1703
1704   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1705     masklow &= ((unsigned HOST_WIDE_INT) -1
1706                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1707
1708   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1709     maskhigh = -1;
1710   else
1711     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1712
1713   if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1714     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1715                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1716   else
1717     maskhigh = 0;
1718
1719   if (complement)
1720     {
1721       maskhigh = ~maskhigh;
1722       masklow = ~masklow;
1723     }
1724
1725   return immed_double_const (masklow, maskhigh, mode);
1726 }
1727
1728 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1729    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1730
1731 static rtx
1732 lshift_value (mode, value, bitpos, bitsize)
1733      enum machine_mode mode;
1734      rtx value;
1735      int bitpos, bitsize;
1736 {
1737   unsigned HOST_WIDE_INT v = INTVAL (value);
1738   HOST_WIDE_INT low, high;
1739
1740   if (bitsize < HOST_BITS_PER_WIDE_INT)
1741     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1742
1743   if (bitpos < HOST_BITS_PER_WIDE_INT)
1744     {
1745       low = v << bitpos;
1746       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1747     }
1748   else
1749     {
1750       low = 0;
1751       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1752     }
1753
1754   return immed_double_const (low, high, mode);
1755 }
1756 \f
1757 /* Extract a bit field that is split across two words
1758    and return an RTX for the result.
1759
1760    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1761    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1762    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1763
1764 static rtx
1765 extract_split_bit_field (op0, bitsize, bitpos, unsignedp)
1766      rtx op0;
1767      unsigned HOST_WIDE_INT bitsize, bitpos;
1768      int unsignedp;
1769 {
1770   unsigned int unit;
1771   unsigned int bitsdone = 0;
1772   rtx result = NULL_RTX;
1773   int first = 1;
1774
1775   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1776      much at a time.  */
1777   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1778     unit = BITS_PER_WORD;
1779   else
1780     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1781
1782   while (bitsdone < bitsize)
1783     {
1784       unsigned HOST_WIDE_INT thissize;
1785       rtx part, word;
1786       unsigned HOST_WIDE_INT thispos;
1787       unsigned HOST_WIDE_INT offset;
1788
1789       offset = (bitpos + bitsdone) / unit;
1790       thispos = (bitpos + bitsdone) % unit;
1791
1792       /* THISSIZE must not overrun a word boundary.  Otherwise,
1793          extract_fixed_bit_field will call us again, and we will mutually
1794          recurse forever.  */
1795       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1796       thissize = MIN (thissize, unit - thispos);
1797
1798       /* If OP0 is a register, then handle OFFSET here.
1799
1800          When handling multiword bitfields, extract_bit_field may pass
1801          down a word_mode SUBREG of a larger REG for a bitfield that actually
1802          crosses a word boundary.  Thus, for a SUBREG, we must find
1803          the current word starting from the base register.  */
1804       if (GET_CODE (op0) == SUBREG)
1805         {
1806           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1807           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1808                                         GET_MODE (SUBREG_REG (op0)));
1809           offset = 0;
1810         }
1811       else if (GET_CODE (op0) == REG)
1812         {
1813           word = operand_subword_force (op0, offset, GET_MODE (op0));
1814           offset = 0;
1815         }
1816       else
1817         word = op0;
1818
1819       /* Extract the parts in bit-counting order,
1820          whose meaning is determined by BYTES_PER_UNIT.
1821          OFFSET is in UNITs, and UNIT is in bits.
1822          extract_fixed_bit_field wants offset in bytes.  */
1823       part = extract_fixed_bit_field (word_mode, word,
1824                                       offset * unit / BITS_PER_UNIT,
1825                                       thissize, thispos, 0, 1);
1826       bitsdone += thissize;
1827
1828       /* Shift this part into place for the result.  */
1829       if (BYTES_BIG_ENDIAN)
1830         {
1831           if (bitsize != bitsdone)
1832             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1833                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1834         }
1835       else
1836         {
1837           if (bitsdone != thissize)
1838             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1839                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1840         }
1841
1842       if (first)
1843         result = part;
1844       else
1845         /* Combine the parts with bitwise or.  This works
1846            because we extracted each part as an unsigned bit field.  */
1847         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1848                                OPTAB_LIB_WIDEN);
1849
1850       first = 0;
1851     }
1852
1853   /* Unsigned bit field: we are done.  */
1854   if (unsignedp)
1855     return result;
1856   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1857   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1858                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1859                          NULL_RTX, 0);
1860   return expand_shift (RSHIFT_EXPR, word_mode, result,
1861                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1862 }
1863 \f
1864 /* Add INC into TARGET.  */
1865
1866 void
1867 expand_inc (target, inc)
1868      rtx target, inc;
1869 {
1870   rtx value = expand_binop (GET_MODE (target), add_optab,
1871                             target, inc,
1872                             target, 0, OPTAB_LIB_WIDEN);
1873   if (value != target)
1874     emit_move_insn (target, value);
1875 }
1876
1877 /* Subtract DEC from TARGET.  */
1878
1879 void
1880 expand_dec (target, dec)
1881      rtx target, dec;
1882 {
1883   rtx value = expand_binop (GET_MODE (target), sub_optab,
1884                             target, dec,
1885                             target, 0, OPTAB_LIB_WIDEN);
1886   if (value != target)
1887     emit_move_insn (target, value);
1888 }
1889 \f
1890 /* Output a shift instruction for expression code CODE,
1891    with SHIFTED being the rtx for the value to shift,
1892    and AMOUNT the tree for the amount to shift by.
1893    Store the result in the rtx TARGET, if that is convenient.
1894    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1895    Return the rtx for where the value is.  */
1896
1897 rtx
1898 expand_shift (code, mode, shifted, amount, target, unsignedp)
1899      enum tree_code code;
1900      enum machine_mode mode;
1901      rtx shifted;
1902      tree amount;
1903      rtx target;
1904      int unsignedp;
1905 {
1906   rtx op1, temp = 0;
1907   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1908   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1909   int try;
1910
1911   /* Previously detected shift-counts computed by NEGATE_EXPR
1912      and shifted in the other direction; but that does not work
1913      on all machines.  */
1914
1915   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1916
1917 #ifdef SHIFT_COUNT_TRUNCATED
1918   if (SHIFT_COUNT_TRUNCATED)
1919     {
1920       if (GET_CODE (op1) == CONST_INT
1921           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
1922               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
1923         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1924                        % GET_MODE_BITSIZE (mode));
1925       else if (GET_CODE (op1) == SUBREG
1926                && subreg_lowpart_p (op1))
1927         op1 = SUBREG_REG (op1);
1928     }
1929 #endif
1930
1931   if (op1 == const0_rtx)
1932     return shifted;
1933
1934   for (try = 0; temp == 0 && try < 3; try++)
1935     {
1936       enum optab_methods methods;
1937
1938       if (try == 0)
1939         methods = OPTAB_DIRECT;
1940       else if (try == 1)
1941         methods = OPTAB_WIDEN;
1942       else
1943         methods = OPTAB_LIB_WIDEN;
1944
1945       if (rotate)
1946         {
1947           /* Widening does not work for rotation.  */
1948           if (methods == OPTAB_WIDEN)
1949             continue;
1950           else if (methods == OPTAB_LIB_WIDEN)
1951             {
1952               /* If we have been unable to open-code this by a rotation,
1953                  do it as the IOR of two shifts.  I.e., to rotate A
1954                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1955                  where C is the bitsize of A.
1956
1957                  It is theoretically possible that the target machine might
1958                  not be able to perform either shift and hence we would
1959                  be making two libcalls rather than just the one for the
1960                  shift (similarly if IOR could not be done).  We will allow
1961                  this extremely unlikely lossage to avoid complicating the
1962                  code below.  */
1963
1964               rtx subtarget = target == shifted ? 0 : target;
1965               rtx temp1;
1966               tree type = TREE_TYPE (amount);
1967               tree new_amount = make_tree (type, op1);
1968               tree other_amount
1969                 = fold (build (MINUS_EXPR, type,
1970                                convert (type,
1971                                         build_int_2 (GET_MODE_BITSIZE (mode),
1972                                                      0)),
1973                                amount));
1974
1975               shifted = force_reg (mode, shifted);
1976
1977               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1978                                    mode, shifted, new_amount, subtarget, 1);
1979               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1980                                     mode, shifted, other_amount, 0, 1);
1981               return expand_binop (mode, ior_optab, temp, temp1, target,
1982                                    unsignedp, methods);
1983             }
1984
1985           temp = expand_binop (mode,
1986                                left ? rotl_optab : rotr_optab,
1987                                shifted, op1, target, unsignedp, methods);
1988
1989           /* If we don't have the rotate, but we are rotating by a constant
1990              that is in range, try a rotate in the opposite direction.  */
1991
1992           if (temp == 0 && GET_CODE (op1) == CONST_INT
1993               && INTVAL (op1) > 0
1994               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
1995             temp = expand_binop (mode,
1996                                  left ? rotr_optab : rotl_optab,
1997                                  shifted,
1998                                  GEN_INT (GET_MODE_BITSIZE (mode)
1999                                           - INTVAL (op1)),
2000                                  target, unsignedp, methods);
2001         }
2002       else if (unsignedp)
2003         temp = expand_binop (mode,
2004                              left ? ashl_optab : lshr_optab,
2005                              shifted, op1, target, unsignedp, methods);
2006
2007       /* Do arithmetic shifts.
2008          Also, if we are going to widen the operand, we can just as well
2009          use an arithmetic right-shift instead of a logical one.  */
2010       if (temp == 0 && ! rotate
2011           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2012         {
2013           enum optab_methods methods1 = methods;
2014
2015           /* If trying to widen a log shift to an arithmetic shift,
2016              don't accept an arithmetic shift of the same size.  */
2017           if (unsignedp)
2018             methods1 = OPTAB_MUST_WIDEN;
2019
2020           /* Arithmetic shift */
2021
2022           temp = expand_binop (mode,
2023                                left ? ashl_optab : ashr_optab,
2024                                shifted, op1, target, unsignedp, methods1);
2025         }
2026
2027       /* We used to try extzv here for logical right shifts, but that was
2028          only useful for one machine, the VAX, and caused poor code
2029          generation there for lshrdi3, so the code was deleted and a
2030          define_expand for lshrsi3 was added to vax.md.  */
2031     }
2032
2033   if (temp == 0)
2034     abort ();
2035   return temp;
2036 }
2037 \f
2038 enum alg_code { alg_zero, alg_m, alg_shift,
2039                   alg_add_t_m2, alg_sub_t_m2,
2040                   alg_add_factor, alg_sub_factor,
2041                   alg_add_t2_m, alg_sub_t2_m,
2042                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2043
2044 /* This structure records a sequence of operations.
2045    `ops' is the number of operations recorded.
2046    `cost' is their total cost.
2047    The operations are stored in `op' and the corresponding
2048    logarithms of the integer coefficients in `log'.
2049
2050    These are the operations:
2051    alg_zero             total := 0;
2052    alg_m                total := multiplicand;
2053    alg_shift            total := total * coeff
2054    alg_add_t_m2         total := total + multiplicand * coeff;
2055    alg_sub_t_m2         total := total - multiplicand * coeff;
2056    alg_add_factor       total := total * coeff + total;
2057    alg_sub_factor       total := total * coeff - total;
2058    alg_add_t2_m         total := total * coeff + multiplicand;
2059    alg_sub_t2_m         total := total * coeff - multiplicand;
2060
2061    The first operand must be either alg_zero or alg_m.  */
2062
2063 struct algorithm
2064 {
2065   short cost;
2066   short ops;
2067   /* The size of the OP and LOG fields are not directly related to the
2068      word size, but the worst-case algorithms will be if we have few
2069      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2070      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2071      in total wordsize operations.  */
2072   enum alg_code op[MAX_BITS_PER_WORD];
2073   char log[MAX_BITS_PER_WORD];
2074 };
2075
2076 static void synth_mult                  PARAMS ((struct algorithm *,
2077                                                  unsigned HOST_WIDE_INT,
2078                                                  int));
2079 static unsigned HOST_WIDE_INT choose_multiplier PARAMS ((unsigned HOST_WIDE_INT,
2080                                                          int, int,
2081                                                          unsigned HOST_WIDE_INT *,
2082                                                          int *, int *));
2083 static unsigned HOST_WIDE_INT invert_mod2n      PARAMS ((unsigned HOST_WIDE_INT,
2084                                                          int));
2085 /* Compute and return the best algorithm for multiplying by T.
2086    The algorithm must cost less than cost_limit
2087    If retval.cost >= COST_LIMIT, no algorithm was found and all
2088    other field of the returned struct are undefined.  */
2089
2090 static void
2091 synth_mult (alg_out, t, cost_limit)
2092      struct algorithm *alg_out;
2093      unsigned HOST_WIDE_INT t;
2094      int cost_limit;
2095 {
2096   int m;
2097   struct algorithm *alg_in, *best_alg;
2098   int cost;
2099   unsigned HOST_WIDE_INT q;
2100
2101   /* Indicate that no algorithm is yet found.  If no algorithm
2102      is found, this value will be returned and indicate failure.  */
2103   alg_out->cost = cost_limit;
2104
2105   if (cost_limit <= 0)
2106     return;
2107
2108   /* t == 1 can be done in zero cost.  */
2109   if (t == 1)
2110     {
2111       alg_out->ops = 1;
2112       alg_out->cost = 0;
2113       alg_out->op[0] = alg_m;
2114       return;
2115     }
2116
2117   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2118      fail now.  */
2119   if (t == 0)
2120     {
2121       if (zero_cost >= cost_limit)
2122         return;
2123       else
2124         {
2125           alg_out->ops = 1;
2126           alg_out->cost = zero_cost;
2127           alg_out->op[0] = alg_zero;
2128           return;
2129         }
2130     }
2131
2132   /* We'll be needing a couple extra algorithm structures now.  */
2133
2134   alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
2135   best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
2136
2137   /* If we have a group of zero bits at the low-order part of T, try
2138      multiplying by the remaining bits and then doing a shift.  */
2139
2140   if ((t & 1) == 0)
2141     {
2142       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2143       if (m < BITS_PER_WORD)
2144         {
2145           q = t >> m;
2146           cost = shift_cost[m];
2147           synth_mult (alg_in, q, cost_limit - cost);
2148
2149           cost += alg_in->cost;
2150           if (cost < cost_limit)
2151             {
2152               struct algorithm *x;
2153               x = alg_in, alg_in = best_alg, best_alg = x;
2154               best_alg->log[best_alg->ops] = m;
2155               best_alg->op[best_alg->ops] = alg_shift;
2156               cost_limit = cost;
2157             }
2158         }
2159     }
2160
2161   /* If we have an odd number, add or subtract one.  */
2162   if ((t & 1) != 0)
2163     {
2164       unsigned HOST_WIDE_INT w;
2165
2166       for (w = 1; (w & t) != 0; w <<= 1)
2167         ;
2168       /* If T was -1, then W will be zero after the loop.  This is another
2169          case where T ends with ...111.  Handling this with (T + 1) and
2170          subtract 1 produces slightly better code and results in algorithm
2171          selection much faster than treating it like the ...0111 case
2172          below.  */
2173       if (w == 0
2174           || (w > 2
2175               /* Reject the case where t is 3.
2176                  Thus we prefer addition in that case.  */
2177               && t != 3))
2178         {
2179           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2180
2181           cost = add_cost;
2182           synth_mult (alg_in, t + 1, cost_limit - cost);
2183
2184           cost += alg_in->cost;
2185           if (cost < cost_limit)
2186             {
2187               struct algorithm *x;
2188               x = alg_in, alg_in = best_alg, best_alg = x;
2189               best_alg->log[best_alg->ops] = 0;
2190               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2191               cost_limit = cost;
2192             }
2193         }
2194       else
2195         {
2196           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2197
2198           cost = add_cost;
2199           synth_mult (alg_in, t - 1, cost_limit - cost);
2200
2201           cost += alg_in->cost;
2202           if (cost < cost_limit)
2203             {
2204               struct algorithm *x;
2205               x = alg_in, alg_in = best_alg, best_alg = x;
2206               best_alg->log[best_alg->ops] = 0;
2207               best_alg->op[best_alg->ops] = alg_add_t_m2;
2208               cost_limit = cost;
2209             }
2210         }
2211     }
2212
2213   /* Look for factors of t of the form
2214      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2215      If we find such a factor, we can multiply by t using an algorithm that
2216      multiplies by q, shift the result by m and add/subtract it to itself.
2217
2218      We search for large factors first and loop down, even if large factors
2219      are less probable than small; if we find a large factor we will find a
2220      good sequence quickly, and therefore be able to prune (by decreasing
2221      COST_LIMIT) the search.  */
2222
2223   for (m = floor_log2 (t - 1); m >= 2; m--)
2224     {
2225       unsigned HOST_WIDE_INT d;
2226
2227       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2228       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2229         {
2230           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2231           synth_mult (alg_in, t / d, cost_limit - cost);
2232
2233           cost += alg_in->cost;
2234           if (cost < cost_limit)
2235             {
2236               struct algorithm *x;
2237               x = alg_in, alg_in = best_alg, best_alg = x;
2238               best_alg->log[best_alg->ops] = m;
2239               best_alg->op[best_alg->ops] = alg_add_factor;
2240               cost_limit = cost;
2241             }
2242           /* Other factors will have been taken care of in the recursion.  */
2243           break;
2244         }
2245
2246       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2247       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2248         {
2249           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2250           synth_mult (alg_in, t / d, cost_limit - cost);
2251
2252           cost += alg_in->cost;
2253           if (cost < cost_limit)
2254             {
2255               struct algorithm *x;
2256               x = alg_in, alg_in = best_alg, best_alg = x;
2257               best_alg->log[best_alg->ops] = m;
2258               best_alg->op[best_alg->ops] = alg_sub_factor;
2259               cost_limit = cost;
2260             }
2261           break;
2262         }
2263     }
2264
2265   /* Try shift-and-add (load effective address) instructions,
2266      i.e. do a*3, a*5, a*9.  */
2267   if ((t & 1) != 0)
2268     {
2269       q = t - 1;
2270       q = q & -q;
2271       m = exact_log2 (q);
2272       if (m >= 0 && m < BITS_PER_WORD)
2273         {
2274           cost = shiftadd_cost[m];
2275           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2276
2277           cost += alg_in->cost;
2278           if (cost < cost_limit)
2279             {
2280               struct algorithm *x;
2281               x = alg_in, alg_in = best_alg, best_alg = x;
2282               best_alg->log[best_alg->ops] = m;
2283               best_alg->op[best_alg->ops] = alg_add_t2_m;
2284               cost_limit = cost;
2285             }
2286         }
2287
2288       q = t + 1;
2289       q = q & -q;
2290       m = exact_log2 (q);
2291       if (m >= 0 && m < BITS_PER_WORD)
2292         {
2293           cost = shiftsub_cost[m];
2294           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2295
2296           cost += alg_in->cost;
2297           if (cost < cost_limit)
2298             {
2299               struct algorithm *x;
2300               x = alg_in, alg_in = best_alg, best_alg = x;
2301               best_alg->log[best_alg->ops] = m;
2302               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2303               cost_limit = cost;
2304             }
2305         }
2306     }
2307
2308   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2309      we have not found any algorithm.  */
2310   if (cost_limit == alg_out->cost)
2311     return;
2312
2313   /* If we are getting a too long sequence for `struct algorithm'
2314      to record, make this search fail.  */
2315   if (best_alg->ops == MAX_BITS_PER_WORD)
2316     return;
2317
2318   /* Copy the algorithm from temporary space to the space at alg_out.
2319      We avoid using structure assignment because the majority of
2320      best_alg is normally undefined, and this is a critical function.  */
2321   alg_out->ops = best_alg->ops + 1;
2322   alg_out->cost = cost_limit;
2323   memcpy (alg_out->op, best_alg->op,
2324           alg_out->ops * sizeof *alg_out->op);
2325   memcpy (alg_out->log, best_alg->log,
2326           alg_out->ops * sizeof *alg_out->log);
2327 }
2328 \f
2329 /* Perform a multiplication and return an rtx for the result.
2330    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2331    TARGET is a suggestion for where to store the result (an rtx).
2332
2333    We check specially for a constant integer as OP1.
2334    If you want this check for OP0 as well, then before calling
2335    you should swap the two operands if OP0 would be constant.  */
2336
2337 rtx
2338 expand_mult (mode, op0, op1, target, unsignedp)
2339      enum machine_mode mode;
2340      rtx op0, op1, target;
2341      int unsignedp;
2342 {
2343   rtx const_op1 = op1;
2344
2345   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2346      less than or equal in size to `unsigned int' this doesn't matter.
2347      If the mode is larger than `unsigned int', then synth_mult works only
2348      if the constant value exactly fits in an `unsigned int' without any
2349      truncation.  This means that multiplying by negative values does
2350      not work; results are off by 2^32 on a 32 bit machine.  */
2351
2352   /* If we are multiplying in DImode, it may still be a win
2353      to try to work with shifts and adds.  */
2354   if (GET_CODE (op1) == CONST_DOUBLE
2355       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2356       && HOST_BITS_PER_INT >= BITS_PER_WORD
2357       && CONST_DOUBLE_HIGH (op1) == 0)
2358     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2359   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2360            && GET_CODE (op1) == CONST_INT
2361            && INTVAL (op1) < 0)
2362     const_op1 = 0;
2363
2364   /* We used to test optimize here, on the grounds that it's better to
2365      produce a smaller program when -O is not used.
2366      But this causes such a terrible slowdown sometimes
2367      that it seems better to use synth_mult always.  */
2368
2369   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2370       && (unsignedp || ! flag_trapv))
2371     {
2372       struct algorithm alg;
2373       struct algorithm alg2;
2374       HOST_WIDE_INT val = INTVAL (op1);
2375       HOST_WIDE_INT val_so_far;
2376       rtx insn;
2377       int mult_cost;
2378       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2379
2380       /* op0 must be register to make mult_cost match the precomputed
2381          shiftadd_cost array.  */
2382       op0 = force_reg (mode, op0);
2383
2384       /* Try to do the computation three ways: multiply by the negative of OP1
2385          and then negate, do the multiplication directly, or do multiplication
2386          by OP1 - 1.  */
2387
2388       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2389       mult_cost = MIN (12 * add_cost, mult_cost);
2390
2391       synth_mult (&alg, val, mult_cost);
2392
2393       /* This works only if the inverted value actually fits in an
2394          `unsigned int' */
2395       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2396         {
2397           synth_mult (&alg2, - val,
2398                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2399           if (alg2.cost + negate_cost < alg.cost)
2400             alg = alg2, variant = negate_variant;
2401         }
2402
2403       /* This proves very useful for division-by-constant.  */
2404       synth_mult (&alg2, val - 1,
2405                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2406       if (alg2.cost + add_cost < alg.cost)
2407         alg = alg2, variant = add_variant;
2408
2409       if (alg.cost < mult_cost)
2410         {
2411           /* We found something cheaper than a multiply insn.  */
2412           int opno;
2413           rtx accum, tem;
2414           enum machine_mode nmode;
2415
2416           op0 = protect_from_queue (op0, 0);
2417
2418           /* Avoid referencing memory over and over.
2419              For speed, but also for correctness when mem is volatile.  */
2420           if (GET_CODE (op0) == MEM)
2421             op0 = force_reg (mode, op0);
2422
2423           /* ACCUM starts out either as OP0 or as a zero, depending on
2424              the first operation.  */
2425
2426           if (alg.op[0] == alg_zero)
2427             {
2428               accum = copy_to_mode_reg (mode, const0_rtx);
2429               val_so_far = 0;
2430             }
2431           else if (alg.op[0] == alg_m)
2432             {
2433               accum = copy_to_mode_reg (mode, op0);
2434               val_so_far = 1;
2435             }
2436           else
2437             abort ();
2438
2439           for (opno = 1; opno < alg.ops; opno++)
2440             {
2441               int log = alg.log[opno];
2442               int preserve = preserve_subexpressions_p ();
2443               rtx shift_subtarget = preserve ? 0 : accum;
2444               rtx add_target
2445                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2446                    && ! preserve)
2447                   ? target : 0;
2448               rtx accum_target = preserve ? 0 : accum;
2449
2450               switch (alg.op[opno])
2451                 {
2452                 case alg_shift:
2453                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2454                                         build_int_2 (log, 0), NULL_RTX, 0);
2455                   val_so_far <<= log;
2456                   break;
2457
2458                 case alg_add_t_m2:
2459                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2460                                       build_int_2 (log, 0), NULL_RTX, 0);
2461                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2462                                          add_target
2463                                          ? add_target : accum_target);
2464                   val_so_far += (HOST_WIDE_INT) 1 << log;
2465                   break;
2466
2467                 case alg_sub_t_m2:
2468                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2469                                       build_int_2 (log, 0), NULL_RTX, 0);
2470                   accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2471                                          add_target
2472                                          ? add_target : accum_target);
2473                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2474                   break;
2475
2476                 case alg_add_t2_m:
2477                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2478                                         build_int_2 (log, 0), shift_subtarget,
2479                                         0);
2480                   accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2481                                          add_target
2482                                          ? add_target : accum_target);
2483                   val_so_far = (val_so_far << log) + 1;
2484                   break;
2485
2486                 case alg_sub_t2_m:
2487                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2488                                         build_int_2 (log, 0), shift_subtarget,
2489                                         0);
2490                   accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2491                                          add_target
2492                                          ? add_target : accum_target);
2493                   val_so_far = (val_so_far << log) - 1;
2494                   break;
2495
2496                 case alg_add_factor:
2497                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2498                                       build_int_2 (log, 0), NULL_RTX, 0);
2499                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2500                                          add_target
2501                                          ? add_target : accum_target);
2502                   val_so_far += val_so_far << log;
2503                   break;
2504
2505                 case alg_sub_factor:
2506                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2507                                       build_int_2 (log, 0), NULL_RTX, 0);
2508                   accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2509                                          (add_target ? add_target
2510                                           : preserve ? 0 : tem));
2511                   val_so_far = (val_so_far << log) - val_so_far;
2512                   break;
2513
2514                 default:
2515                   abort ();
2516                 }
2517
2518               /* Write a REG_EQUAL note on the last insn so that we can cse
2519                  multiplication sequences.  Note that if ACCUM is a SUBREG,
2520                  we've set the inner register and must properly indicate
2521                  that.  */
2522
2523               tem = op0, nmode = mode;
2524               if (GET_CODE (accum) == SUBREG)
2525                 {
2526                   nmode = GET_MODE (SUBREG_REG (accum));
2527                   tem = gen_lowpart (nmode, op0);
2528                 }
2529
2530               insn = get_last_insn ();
2531               set_unique_reg_note (insn,
2532                                    REG_EQUAL,
2533                                    gen_rtx_MULT (nmode, tem,
2534                                                  GEN_INT (val_so_far)));
2535             }
2536
2537           if (variant == negate_variant)
2538             {
2539               val_so_far = - val_so_far;
2540               accum = expand_unop (mode, neg_optab, accum, target, 0);
2541             }
2542           else if (variant == add_variant)
2543             {
2544               val_so_far = val_so_far + 1;
2545               accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2546             }
2547
2548           if (val != val_so_far)
2549             abort ();
2550
2551           return accum;
2552         }
2553     }
2554
2555   /* This used to use umul_optab if unsigned, but for non-widening multiply
2556      there is no difference between signed and unsigned.  */
2557   op0 = expand_binop (mode,
2558                       ! unsignedp
2559                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2560                       ? smulv_optab : smul_optab,
2561                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2562   if (op0 == 0)
2563     abort ();
2564   return op0;
2565 }
2566 \f
2567 /* Return the smallest n such that 2**n >= X.  */
2568
2569 int
2570 ceil_log2 (x)
2571      unsigned HOST_WIDE_INT x;
2572 {
2573   return floor_log2 (x - 1) + 1;
2574 }
2575
2576 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2577    replace division by D, and put the least significant N bits of the result
2578    in *MULTIPLIER_PTR and return the most significant bit.
2579
2580    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2581    needed precision is in PRECISION (should be <= N).
2582
2583    PRECISION should be as small as possible so this function can choose
2584    multiplier more freely.
2585
2586    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2587    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2588
2589    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2590    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2591
2592 static
2593 unsigned HOST_WIDE_INT
2594 choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
2595      unsigned HOST_WIDE_INT d;
2596      int n;
2597      int precision;
2598      unsigned HOST_WIDE_INT *multiplier_ptr;
2599      int *post_shift_ptr;
2600      int *lgup_ptr;
2601 {
2602   HOST_WIDE_INT mhigh_hi, mlow_hi;
2603   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2604   int lgup, post_shift;
2605   int pow, pow2;
2606   unsigned HOST_WIDE_INT nl, dummy1;
2607   HOST_WIDE_INT nh, dummy2;
2608
2609   /* lgup = ceil(log2(divisor)); */
2610   lgup = ceil_log2 (d);
2611
2612   if (lgup > n)
2613     abort ();
2614
2615   pow = n + lgup;
2616   pow2 = n + lgup - precision;
2617
2618   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2619     {
2620       /* We could handle this with some effort, but this case is much better
2621          handled directly with a scc insn, so rely on caller using that.  */
2622       abort ();
2623     }
2624
2625   /* mlow = 2^(N + lgup)/d */
2626  if (pow >= HOST_BITS_PER_WIDE_INT)
2627     {
2628       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2629       nl = 0;
2630     }
2631   else
2632     {
2633       nh = 0;
2634       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2635     }
2636   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2637                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2638
2639   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2640   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2641     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2642   else
2643     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2644   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2645                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2646
2647   if (mhigh_hi && nh - d >= d)
2648     abort ();
2649   if (mhigh_hi > 1 || mlow_hi > 1)
2650     abort ();
2651   /* assert that mlow < mhigh.  */
2652   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2653     abort ();
2654
2655   /* If precision == N, then mlow, mhigh exceed 2^N
2656      (but they do not exceed 2^(N+1)).  */
2657
2658   /* Reduce to lowest terms */
2659   for (post_shift = lgup; post_shift > 0; post_shift--)
2660     {
2661       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2662       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2663       if (ml_lo >= mh_lo)
2664         break;
2665
2666       mlow_hi = 0;
2667       mlow_lo = ml_lo;
2668       mhigh_hi = 0;
2669       mhigh_lo = mh_lo;
2670     }
2671
2672   *post_shift_ptr = post_shift;
2673   *lgup_ptr = lgup;
2674   if (n < HOST_BITS_PER_WIDE_INT)
2675     {
2676       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2677       *multiplier_ptr = mhigh_lo & mask;
2678       return mhigh_lo >= mask;
2679     }
2680   else
2681     {
2682       *multiplier_ptr = mhigh_lo;
2683       return mhigh_hi;
2684     }
2685 }
2686
2687 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2688    congruent to 1 (mod 2**N).  */
2689
2690 static unsigned HOST_WIDE_INT
2691 invert_mod2n (x, n)
2692      unsigned HOST_WIDE_INT x;
2693      int n;
2694 {
2695   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2696
2697   /* The algorithm notes that the choice y = x satisfies
2698      x*y == 1 mod 2^3, since x is assumed odd.
2699      Each iteration doubles the number of bits of significance in y.  */
2700
2701   unsigned HOST_WIDE_INT mask;
2702   unsigned HOST_WIDE_INT y = x;
2703   int nbit = 3;
2704
2705   mask = (n == HOST_BITS_PER_WIDE_INT
2706           ? ~(unsigned HOST_WIDE_INT) 0
2707           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2708
2709   while (nbit < n)
2710     {
2711       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2712       nbit *= 2;
2713     }
2714   return y;
2715 }
2716
2717 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2718    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2719    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2720    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2721    become signed.
2722
2723    The result is put in TARGET if that is convenient.
2724
2725    MODE is the mode of operation.  */
2726
2727 rtx
2728 expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
2729      enum machine_mode mode;
2730      rtx adj_operand, op0, op1, target;
2731      int unsignedp;
2732 {
2733   rtx tem;
2734   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2735
2736   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2737                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2738                       NULL_RTX, 0);
2739   tem = expand_and (mode, tem, op1, NULL_RTX);
2740   adj_operand
2741     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2742                      adj_operand);
2743
2744   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2745                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2746                       NULL_RTX, 0);
2747   tem = expand_and (mode, tem, op0, NULL_RTX);
2748   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2749                           target);
2750
2751   return target;
2752 }
2753
2754 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2755    in TARGET if that is convenient, and return where the result is.  If the
2756    operation can not be performed, 0 is returned.
2757
2758    MODE is the mode of operation and result.
2759
2760    UNSIGNEDP nonzero means unsigned multiply.
2761
2762    MAX_COST is the total allowed cost for the expanded RTL.  */
2763
2764 rtx
2765 expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
2766      enum machine_mode mode;
2767      rtx op0, target;
2768      unsigned HOST_WIDE_INT cnst1;
2769      int unsignedp;
2770      int max_cost;
2771 {
2772   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2773   optab mul_highpart_optab;
2774   optab moptab;
2775   rtx tem;
2776   int size = GET_MODE_BITSIZE (mode);
2777   rtx op1, wide_op1;
2778
2779   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2780   if (size > HOST_BITS_PER_WIDE_INT)
2781     abort ();
2782
2783   op1 = gen_int_mode (cnst1, mode);
2784
2785   wide_op1
2786     = immed_double_const (cnst1,
2787                           (unsignedp
2788                            ? (HOST_WIDE_INT) 0
2789                            : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2790                           wider_mode);
2791
2792   /* expand_mult handles constant multiplication of word_mode
2793      or narrower.  It does a poor job for large modes.  */
2794   if (size < BITS_PER_WORD
2795       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2796     {
2797       /* We have to do this, since expand_binop doesn't do conversion for
2798          multiply.  Maybe change expand_binop to handle widening multiply?  */
2799       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2800
2801       /* We know that this can't have signed overflow, so pretend this is
2802          an unsigned multiply.  */
2803       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
2804       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2805                           build_int_2 (size, 0), NULL_RTX, 1);
2806       return convert_modes (mode, wider_mode, tem, unsignedp);
2807     }
2808
2809   if (target == 0)
2810     target = gen_reg_rtx (mode);
2811
2812   /* Firstly, try using a multiplication insn that only generates the needed
2813      high part of the product, and in the sign flavor of unsignedp.  */
2814   if (mul_highpart_cost[(int) mode] < max_cost)
2815     {
2816       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2817       target = expand_binop (mode, mul_highpart_optab,
2818                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2819       if (target)
2820         return target;
2821     }
2822
2823   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2824      Need to adjust the result after the multiplication.  */
2825   if (size - 1 < BITS_PER_WORD
2826       && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
2827           < max_cost))
2828     {
2829       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2830       target = expand_binop (mode, mul_highpart_optab,
2831                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2832       if (target)
2833         /* We used the wrong signedness.  Adjust the result.  */
2834         return expand_mult_highpart_adjust (mode, target, op0,
2835                                             op1, target, unsignedp);
2836     }
2837
2838   /* Try widening multiplication.  */
2839   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2840   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2841       && mul_widen_cost[(int) wider_mode] < max_cost)
2842     {
2843       op1 = force_reg (mode, op1);
2844       goto try;
2845     }
2846
2847   /* Try widening the mode and perform a non-widening multiplication.  */
2848   moptab = smul_optab;
2849   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2850       && size - 1 < BITS_PER_WORD
2851       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2852     {
2853       op1 = wide_op1;
2854       goto try;
2855     }
2856
2857   /* Try widening multiplication of opposite signedness, and adjust.  */
2858   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2859   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2860       && size - 1 < BITS_PER_WORD
2861       && (mul_widen_cost[(int) wider_mode]
2862           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2863     {
2864       rtx regop1 = force_reg (mode, op1);
2865       tem = expand_binop (wider_mode, moptab, op0, regop1,
2866                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2867       if (tem != 0)
2868         {
2869           /* Extract the high half of the just generated product.  */
2870           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2871                               build_int_2 (size, 0), NULL_RTX, 1);
2872           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2873           /* We used the wrong signedness.  Adjust the result.  */
2874           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2875                                               target, unsignedp);
2876         }
2877     }
2878
2879   return 0;
2880
2881  try:
2882   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2883   tem = expand_binop (wider_mode, moptab, op0, op1,
2884                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2885   if (tem == 0)
2886     return 0;
2887
2888   /* Extract the high half of the just generated product.  */
2889   if (mode == word_mode)
2890     {
2891       return gen_highpart (mode, tem);
2892     }
2893   else
2894     {
2895       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2896                           build_int_2 (size, 0), NULL_RTX, 1);
2897       return convert_modes (mode, wider_mode, tem, unsignedp);
2898     }
2899 }
2900 \f
2901 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2902    if that is convenient, and returning where the result is.
2903    You may request either the quotient or the remainder as the result;
2904    specify REM_FLAG nonzero to get the remainder.
2905
2906    CODE is the expression code for which kind of division this is;
2907    it controls how rounding is done.  MODE is the machine mode to use.
2908    UNSIGNEDP nonzero means do unsigned division.  */
2909
2910 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2911    and then correct it by or'ing in missing high bits
2912    if result of ANDI is nonzero.
2913    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2914    This could optimize to a bfexts instruction.
2915    But C doesn't use these operations, so their optimizations are
2916    left for later.  */
2917 /* ??? For modulo, we don't actually need the highpart of the first product,
2918    the low part will do nicely.  And for small divisors, the second multiply
2919    can also be a low-part only multiply or even be completely left out.
2920    E.g. to calculate the remainder of a division by 3 with a 32 bit
2921    multiply, multiply with 0x55555556 and extract the upper two bits;
2922    the result is exact for inputs up to 0x1fffffff.
2923    The input range can be reduced by using cross-sum rules.
2924    For odd divisors >= 3, the following table gives right shift counts
2925    so that if an number is shifted by an integer multiple of the given
2926    amount, the remainder stays the same:
2927    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
2928    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
2929    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
2930    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
2931    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
2932
2933    Cross-sum rules for even numbers can be derived by leaving as many bits
2934    to the right alone as the divisor has zeros to the right.
2935    E.g. if x is an unsigned 32 bit number:
2936    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
2937    */
2938
2939 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2940
2941 rtx
2942 expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
2943      int rem_flag;
2944      enum tree_code code;
2945      enum machine_mode mode;
2946      rtx op0, op1, target;
2947      int unsignedp;
2948 {
2949   enum machine_mode compute_mode;
2950   rtx tquotient;
2951   rtx quotient = 0, remainder = 0;
2952   rtx last;
2953   int size;
2954   rtx insn, set;
2955   optab optab1, optab2;
2956   int op1_is_constant, op1_is_pow2;
2957   int max_cost, extra_cost;
2958   static HOST_WIDE_INT last_div_const = 0;
2959
2960   op1_is_constant = GET_CODE (op1) == CONST_INT;
2961   op1_is_pow2 = (op1_is_constant
2962                  && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2963                       || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1))))));
2964
2965   /*
2966      This is the structure of expand_divmod:
2967
2968      First comes code to fix up the operands so we can perform the operations
2969      correctly and efficiently.
2970
2971      Second comes a switch statement with code specific for each rounding mode.
2972      For some special operands this code emits all RTL for the desired
2973      operation, for other cases, it generates only a quotient and stores it in
2974      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2975      to indicate that it has not done anything.
2976
2977      Last comes code that finishes the operation.  If QUOTIENT is set and
2978      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2979      QUOTIENT is not set, it is computed using trunc rounding.
2980
2981      We try to generate special code for division and remainder when OP1 is a
2982      constant.  If |OP1| = 2**n we can use shifts and some other fast
2983      operations.  For other values of OP1, we compute a carefully selected
2984      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2985      by m.
2986
2987      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2988      half of the product.  Different strategies for generating the product are
2989      implemented in expand_mult_highpart.
2990
2991      If what we actually want is the remainder, we generate that by another
2992      by-constant multiplication and a subtraction.  */
2993
2994   /* We shouldn't be called with OP1 == const1_rtx, but some of the
2995      code below will malfunction if we are, so check here and handle
2996      the special case if so.  */
2997   if (op1 == const1_rtx)
2998     return rem_flag ? const0_rtx : op0;
2999
3000     /* When dividing by -1, we could get an overflow.
3001      negv_optab can handle overflows.  */
3002   if (! unsignedp && op1 == constm1_rtx)
3003     {
3004       if (rem_flag)
3005         return const0_rtx;
3006       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3007                           ? negv_optab : neg_optab, op0, target, 0);
3008     }
3009
3010   if (target
3011       /* Don't use the function value register as a target
3012          since we have to read it as well as write it,
3013          and function-inlining gets confused by this.  */
3014       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3015           /* Don't clobber an operand while doing a multi-step calculation.  */
3016           || ((rem_flag || op1_is_constant)
3017               && (reg_mentioned_p (target, op0)
3018                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
3019           || reg_mentioned_p (target, op1)
3020           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
3021     target = 0;
3022
3023   /* Get the mode in which to perform this computation.  Normally it will
3024      be MODE, but sometimes we can't do the desired operation in MODE.
3025      If so, pick a wider mode in which we can do the operation.  Convert
3026      to that mode at the start to avoid repeated conversions.
3027
3028      First see what operations we need.  These depend on the expression
3029      we are evaluating.  (We assume that divxx3 insns exist under the
3030      same conditions that modxx3 insns and that these insns don't normally
3031      fail.  If these assumptions are not correct, we may generate less
3032      efficient code in some cases.)
3033
3034      Then see if we find a mode in which we can open-code that operation
3035      (either a division, modulus, or shift).  Finally, check for the smallest
3036      mode for which we can do the operation with a library call.  */
3037
3038   /* We might want to refine this now that we have division-by-constant
3039      optimization.  Since expand_mult_highpart tries so many variants, it is
3040      not straightforward to generalize this.  Maybe we should make an array
3041      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3042
3043   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3044             ? (unsignedp ? lshr_optab : ashr_optab)
3045             : (unsignedp ? udiv_optab : sdiv_optab));
3046   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3047             ? optab1
3048             : (unsignedp ? udivmod_optab : sdivmod_optab));
3049
3050   for (compute_mode = mode; compute_mode != VOIDmode;
3051        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3052     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
3053         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
3054       break;
3055
3056   if (compute_mode == VOIDmode)
3057     for (compute_mode = mode; compute_mode != VOIDmode;
3058          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3059       if (optab1->handlers[(int) compute_mode].libfunc
3060           || optab2->handlers[(int) compute_mode].libfunc)
3061         break;
3062
3063   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3064      in expand_binop.  */
3065   if (compute_mode == VOIDmode)
3066     compute_mode = mode;
3067
3068   if (target && GET_MODE (target) == compute_mode)
3069     tquotient = target;
3070   else
3071     tquotient = gen_reg_rtx (compute_mode);
3072
3073   size = GET_MODE_BITSIZE (compute_mode);
3074 #if 0
3075   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3076      (mode), and thereby get better code when OP1 is a constant.  Do that
3077      later.  It will require going over all usages of SIZE below.  */
3078   size = GET_MODE_BITSIZE (mode);
3079 #endif
3080
3081   /* Only deduct something for a REM if the last divide done was
3082      for a different constant.   Then set the constant of the last
3083      divide.  */
3084   max_cost = div_cost[(int) compute_mode]
3085     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3086                       && INTVAL (op1) == last_div_const)
3087        ? mul_cost[(int) compute_mode] + add_cost : 0);
3088
3089   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3090
3091   /* Now convert to the best mode to use.  */
3092   if (compute_mode != mode)
3093     {
3094       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3095       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3096
3097       /* convert_modes may have placed op1 into a register, so we
3098          must recompute the following.  */
3099       op1_is_constant = GET_CODE (op1) == CONST_INT;
3100       op1_is_pow2 = (op1_is_constant
3101                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3102                           || (! unsignedp
3103                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3104     }
3105
3106   /* If one of the operands is a volatile MEM, copy it into a register.  */
3107
3108   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3109     op0 = force_reg (compute_mode, op0);
3110   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3111     op1 = force_reg (compute_mode, op1);
3112
3113   /* If we need the remainder or if OP1 is constant, we need to
3114      put OP0 in a register in case it has any queued subexpressions.  */
3115   if (rem_flag || op1_is_constant)
3116     op0 = force_reg (compute_mode, op0);
3117
3118   last = get_last_insn ();
3119
3120   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3121   if (unsignedp)
3122     {
3123       if (code == FLOOR_DIV_EXPR)
3124         code = TRUNC_DIV_EXPR;
3125       if (code == FLOOR_MOD_EXPR)
3126         code = TRUNC_MOD_EXPR;
3127       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3128         code = TRUNC_DIV_EXPR;
3129     }
3130
3131   if (op1 != const0_rtx)
3132     switch (code)
3133       {
3134       case TRUNC_MOD_EXPR:
3135       case TRUNC_DIV_EXPR:
3136         if (op1_is_constant)
3137           {
3138             if (unsignedp)
3139               {
3140                 unsigned HOST_WIDE_INT mh, ml;
3141                 int pre_shift, post_shift;
3142                 int dummy;
3143                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3144
3145                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3146                   {
3147                     pre_shift = floor_log2 (d);
3148                     if (rem_flag)
3149                       {
3150                         remainder
3151                           = expand_binop (compute_mode, and_optab, op0,
3152                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3153                                           remainder, 1,
3154                                           OPTAB_LIB_WIDEN);
3155                         if (remainder)
3156                           return gen_lowpart (mode, remainder);
3157                       }
3158                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3159                                              build_int_2 (pre_shift, 0),
3160                                              tquotient, 1);
3161                   }
3162                 else if (size <= HOST_BITS_PER_WIDE_INT)
3163                   {
3164                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3165                       {
3166                         /* Most significant bit of divisor is set; emit an scc
3167                            insn.  */
3168                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3169                                                     compute_mode, 1, 1);
3170                         if (quotient == 0)
3171                           goto fail1;
3172                       }
3173                     else
3174                       {
3175                         /* Find a suitable multiplier and right shift count
3176                            instead of multiplying with D.  */
3177
3178                         mh = choose_multiplier (d, size, size,
3179                                                 &ml, &post_shift, &dummy);
3180
3181                         /* If the suggested multiplier is more than SIZE bits,
3182                            we can do better for even divisors, using an
3183                            initial right shift.  */
3184                         if (mh != 0 && (d & 1) == 0)
3185                           {
3186                             pre_shift = floor_log2 (d & -d);
3187                             mh = choose_multiplier (d >> pre_shift, size,
3188                                                     size - pre_shift,
3189                                                     &ml, &post_shift, &dummy);
3190                             if (mh)
3191                               abort ();
3192                           }
3193                         else
3194                           pre_shift = 0;
3195
3196                         if (mh != 0)
3197                           {
3198                             rtx t1, t2, t3, t4;
3199
3200                             if (post_shift - 1 >= BITS_PER_WORD)
3201                               goto fail1;
3202
3203                             extra_cost = (shift_cost[post_shift - 1]
3204                                           + shift_cost[1] + 2 * add_cost);
3205                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3206                                                        NULL_RTX, 1,
3207                                                        max_cost - extra_cost);
3208                             if (t1 == 0)
3209                               goto fail1;
3210                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3211                                                                op0, t1),
3212                                                 NULL_RTX);
3213                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3214                                                build_int_2 (1, 0), NULL_RTX,1);
3215                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3216                                                               t1, t3),
3217                                                 NULL_RTX);
3218                             quotient
3219                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3220                                               build_int_2 (post_shift - 1, 0),
3221                                               tquotient, 1);
3222                           }
3223                         else
3224                           {
3225                             rtx t1, t2;
3226
3227                             if (pre_shift >= BITS_PER_WORD
3228                                 || post_shift >= BITS_PER_WORD)
3229                               goto fail1;
3230
3231                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3232                                                build_int_2 (pre_shift, 0),
3233                                                NULL_RTX, 1);
3234                             extra_cost = (shift_cost[pre_shift]
3235                                           + shift_cost[post_shift]);
3236                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3237                                                        NULL_RTX, 1,
3238                                                        max_cost - extra_cost);
3239                             if (t2 == 0)
3240                               goto fail1;
3241                             quotient
3242                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3243                                               build_int_2 (post_shift, 0),
3244                                               tquotient, 1);
3245                           }
3246                       }
3247                   }
3248                 else            /* Too wide mode to use tricky code */
3249                   break;
3250
3251                 insn = get_last_insn ();
3252                 if (insn != last
3253                     && (set = single_set (insn)) != 0
3254                     && SET_DEST (set) == quotient)
3255                   set_unique_reg_note (insn,
3256                                        REG_EQUAL,
3257                                        gen_rtx_UDIV (compute_mode, op0, op1));
3258               }
3259             else                /* TRUNC_DIV, signed */
3260               {
3261                 unsigned HOST_WIDE_INT ml;
3262                 int lgup, post_shift;
3263                 HOST_WIDE_INT d = INTVAL (op1);
3264                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3265
3266                 /* n rem d = n rem -d */
3267                 if (rem_flag && d < 0)
3268                   {
3269                     d = abs_d;
3270                     op1 = gen_int_mode (abs_d, compute_mode);
3271                   }
3272
3273                 if (d == 1)
3274                   quotient = op0;
3275                 else if (d == -1)
3276                   quotient = expand_unop (compute_mode, neg_optab, op0,
3277                                           tquotient, 0);
3278                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3279                   {
3280                     /* This case is not handled correctly below.  */
3281                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3282                                                 compute_mode, 1, 1);
3283                     if (quotient == 0)
3284                       goto fail1;
3285                   }
3286                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3287                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap)
3288                          /* ??? The cheap metric is computed only for
3289                             word_mode.  If this operation is wider, this may
3290                             not be so.  Assume true if the optab has an
3291                             expander for this mode.  */
3292                          && (((rem_flag ? smod_optab : sdiv_optab)
3293                               ->handlers[(int) compute_mode].insn_code
3294                               != CODE_FOR_nothing)
3295                              || (sdivmod_optab->handlers[(int) compute_mode]
3296                                  .insn_code != CODE_FOR_nothing)))
3297                   ;
3298                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3299                   {
3300                     lgup = floor_log2 (abs_d);
3301                     if (BRANCH_COST < 1 || (abs_d != 2 && BRANCH_COST < 3))
3302                       {
3303                         rtx label = gen_label_rtx ();
3304                         rtx t1;
3305
3306                         t1 = copy_to_mode_reg (compute_mode, op0);
3307                         do_cmp_and_jump (t1, const0_rtx, GE,
3308                                          compute_mode, label);
3309                         expand_inc (t1, gen_int_mode (abs_d - 1,
3310                                                       compute_mode));
3311                         emit_label (label);
3312                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3313                                                  build_int_2 (lgup, 0),
3314                                                  tquotient, 0);
3315                       }
3316                     else
3317                       {
3318                         rtx t1, t2, t3;
3319                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3320                                            build_int_2 (size - 1, 0),
3321                                            NULL_RTX, 0);
3322                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3323                                            build_int_2 (size - lgup, 0),
3324                                            NULL_RTX, 1);
3325                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3326                                                           op0, t2),
3327                                             NULL_RTX);
3328                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3329                                                  build_int_2 (lgup, 0),
3330                                                  tquotient, 0);
3331                       }
3332
3333                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3334                        the quotient.  */
3335                     if (d < 0)
3336                       {
3337                         insn = get_last_insn ();
3338                         if (insn != last
3339                             && (set = single_set (insn)) != 0
3340                             && SET_DEST (set) == quotient
3341                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3342                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3343                           set_unique_reg_note (insn,
3344                                                REG_EQUAL,
3345                                                gen_rtx_DIV (compute_mode,
3346                                                             op0,
3347                                                             GEN_INT
3348                                                             (trunc_int_for_mode
3349                                                              (abs_d,
3350                                                               compute_mode))));
3351
3352                         quotient = expand_unop (compute_mode, neg_optab,
3353                                                 quotient, quotient, 0);
3354                       }
3355                   }
3356                 else if (size <= HOST_BITS_PER_WIDE_INT)
3357                   {
3358                     choose_multiplier (abs_d, size, size - 1,
3359                                        &ml, &post_shift, &lgup);
3360                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3361                       {
3362                         rtx t1, t2, t3;
3363
3364                         if (post_shift >= BITS_PER_WORD
3365                             || size - 1 >= BITS_PER_WORD)
3366                           goto fail1;
3367
3368                         extra_cost = (shift_cost[post_shift]
3369                                       + shift_cost[size - 1] + add_cost);
3370                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3371                                                    NULL_RTX, 0,
3372                                                    max_cost - extra_cost);
3373                         if (t1 == 0)
3374                           goto fail1;
3375                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3376                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3377                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3378                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3379                         if (d < 0)
3380                           quotient
3381                             = force_operand (gen_rtx_MINUS (compute_mode,
3382                                                             t3, t2),
3383                                              tquotient);
3384                         else
3385                           quotient
3386                             = force_operand (gen_rtx_MINUS (compute_mode,
3387                                                             t2, t3),
3388                                              tquotient);
3389                       }
3390                     else
3391                       {
3392                         rtx t1, t2, t3, t4;
3393
3394                         if (post_shift >= BITS_PER_WORD
3395                             || size - 1 >= BITS_PER_WORD)
3396                           goto fail1;
3397
3398                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3399                         extra_cost = (shift_cost[post_shift]
3400                                       + shift_cost[size - 1] + 2 * add_cost);
3401                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3402                                                    NULL_RTX, 0,
3403                                                    max_cost - extra_cost);
3404                         if (t1 == 0)
3405                           goto fail1;
3406                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3407                                                           t1, op0),
3408                                             NULL_RTX);
3409                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3410                                            build_int_2 (post_shift, 0),
3411                                            NULL_RTX, 0);
3412                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3413                                            build_int_2 (size - 1, 0),
3414                                            NULL_RTX, 0);
3415                         if (d < 0)
3416                           quotient
3417                             = force_operand (gen_rtx_MINUS (compute_mode,
3418                                                             t4, t3),
3419                                              tquotient);
3420                         else
3421                           quotient
3422                             = force_operand (gen_rtx_MINUS (compute_mode,
3423                                                             t3, t4),
3424                                              tquotient);
3425                       }
3426                   }
3427                 else            /* Too wide mode to use tricky code */
3428                   break;
3429
3430                 insn = get_last_insn ();
3431                 if (insn != last
3432                     && (set = single_set (insn)) != 0
3433                     && SET_DEST (set) == quotient)
3434                   set_unique_reg_note (insn,
3435                                        REG_EQUAL,
3436                                        gen_rtx_DIV (compute_mode, op0, op1));
3437               }
3438             break;
3439           }
3440       fail1:
3441         delete_insns_since (last);
3442         break;
3443
3444       case FLOOR_DIV_EXPR:
3445       case FLOOR_MOD_EXPR:
3446       /* We will come here only for signed operations.  */
3447         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3448           {
3449             unsigned HOST_WIDE_INT mh, ml;
3450             int pre_shift, lgup, post_shift;
3451             HOST_WIDE_INT d = INTVAL (op1);
3452
3453             if (d > 0)
3454               {
3455                 /* We could just as easily deal with negative constants here,
3456                    but it does not seem worth the trouble for GCC 2.6.  */
3457                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3458                   {
3459                     pre_shift = floor_log2 (d);
3460                     if (rem_flag)
3461                       {
3462                         remainder = expand_binop (compute_mode, and_optab, op0,
3463                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3464                                                   remainder, 0, OPTAB_LIB_WIDEN);
3465                         if (remainder)
3466                           return gen_lowpart (mode, remainder);
3467                       }
3468                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3469                                              build_int_2 (pre_shift, 0),
3470                                              tquotient, 0);
3471                   }
3472                 else
3473                   {
3474                     rtx t1, t2, t3, t4;
3475
3476                     mh = choose_multiplier (d, size, size - 1,
3477                                             &ml, &post_shift, &lgup);
3478                     if (mh)
3479                       abort ();
3480
3481                     if (post_shift < BITS_PER_WORD
3482                         && size - 1 < BITS_PER_WORD)
3483                       {
3484                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3485                                            build_int_2 (size - 1, 0),
3486                                            NULL_RTX, 0);
3487                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3488                                            NULL_RTX, 0, OPTAB_WIDEN);
3489                         extra_cost = (shift_cost[post_shift]
3490                                       + shift_cost[size - 1] + 2 * add_cost);
3491                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3492                                                    NULL_RTX, 1,
3493                                                    max_cost - extra_cost);
3494                         if (t3 != 0)
3495                           {
3496                             t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3497                                                build_int_2 (post_shift, 0),
3498                                                NULL_RTX, 1);
3499                             quotient = expand_binop (compute_mode, xor_optab,
3500                                                      t4, t1, tquotient, 0,
3501                                                      OPTAB_WIDEN);
3502                           }
3503                       }
3504                   }
3505               }
3506             else
3507               {
3508                 rtx nsign, t1, t2, t3, t4;
3509                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3510                                                   op0, constm1_rtx), NULL_RTX);
3511                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3512                                    0, OPTAB_WIDEN);
3513                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3514                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3515                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3516                                     NULL_RTX);
3517                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3518                                     NULL_RTX, 0);
3519                 if (t4)
3520                   {
3521                     rtx t5;
3522                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3523                                       NULL_RTX, 0);
3524                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3525                                                             t4, t5),
3526                                               tquotient);
3527                   }
3528               }
3529           }
3530
3531         if (quotient != 0)
3532           break;
3533         delete_insns_since (last);
3534
3535         /* Try using an instruction that produces both the quotient and
3536            remainder, using truncation.  We can easily compensate the quotient
3537            or remainder to get floor rounding, once we have the remainder.
3538            Notice that we compute also the final remainder value here,
3539            and return the result right away.  */
3540         if (target == 0 || GET_MODE (target) != compute_mode)
3541           target = gen_reg_rtx (compute_mode);
3542
3543         if (rem_flag)
3544           {
3545             remainder
3546               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3547             quotient = gen_reg_rtx (compute_mode);
3548           }
3549         else
3550           {
3551             quotient
3552               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3553             remainder = gen_reg_rtx (compute_mode);
3554           }
3555
3556         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3557                                  quotient, remainder, 0))
3558           {
3559             /* This could be computed with a branch-less sequence.
3560                Save that for later.  */
3561             rtx tem;
3562             rtx label = gen_label_rtx ();
3563             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3564             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3565                                 NULL_RTX, 0, OPTAB_WIDEN);
3566             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3567             expand_dec (quotient, const1_rtx);
3568             expand_inc (remainder, op1);
3569             emit_label (label);
3570             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3571           }
3572
3573         /* No luck with division elimination or divmod.  Have to do it
3574            by conditionally adjusting op0 *and* the result.  */
3575         {
3576           rtx label1, label2, label3, label4, label5;
3577           rtx adjusted_op0;
3578           rtx tem;
3579
3580           quotient = gen_reg_rtx (compute_mode);
3581           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3582           label1 = gen_label_rtx ();
3583           label2 = gen_label_rtx ();
3584           label3 = gen_label_rtx ();
3585           label4 = gen_label_rtx ();
3586           label5 = gen_label_rtx ();
3587           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3588           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3589           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3590                               quotient, 0, OPTAB_LIB_WIDEN);
3591           if (tem != quotient)
3592             emit_move_insn (quotient, tem);
3593           emit_jump_insn (gen_jump (label5));
3594           emit_barrier ();
3595           emit_label (label1);
3596           expand_inc (adjusted_op0, const1_rtx);
3597           emit_jump_insn (gen_jump (label4));
3598           emit_barrier ();
3599           emit_label (label2);
3600           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3601           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3602                               quotient, 0, OPTAB_LIB_WIDEN);
3603           if (tem != quotient)
3604             emit_move_insn (quotient, tem);
3605           emit_jump_insn (gen_jump (label5));
3606           emit_barrier ();
3607           emit_label (label3);
3608           expand_dec (adjusted_op0, const1_rtx);
3609           emit_label (label4);
3610           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3611                               quotient, 0, OPTAB_LIB_WIDEN);
3612           if (tem != quotient)
3613             emit_move_insn (quotient, tem);
3614           expand_dec (quotient, const1_rtx);
3615           emit_label (label5);
3616         }
3617         break;
3618
3619       case CEIL_DIV_EXPR:
3620       case CEIL_MOD_EXPR:
3621         if (unsignedp)
3622           {
3623             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3624               {
3625                 rtx t1, t2, t3;
3626                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3627                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3628                                    build_int_2 (floor_log2 (d), 0),
3629                                    tquotient, 1);
3630                 t2 = expand_binop (compute_mode, and_optab, op0,
3631                                    GEN_INT (d - 1),
3632                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3633                 t3 = gen_reg_rtx (compute_mode);
3634                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3635                                       compute_mode, 1, 1);
3636                 if (t3 == 0)
3637                   {
3638                     rtx lab;
3639                     lab = gen_label_rtx ();
3640                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3641                     expand_inc (t1, const1_rtx);
3642                     emit_label (lab);
3643                     quotient = t1;
3644                   }
3645                 else
3646                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3647                                                           t1, t3),
3648                                             tquotient);
3649                 break;
3650               }
3651
3652             /* Try using an instruction that produces both the quotient and
3653                remainder, using truncation.  We can easily compensate the
3654                quotient or remainder to get ceiling rounding, once we have the
3655                remainder.  Notice that we compute also the final remainder
3656                value here, and return the result right away.  */
3657             if (target == 0 || GET_MODE (target) != compute_mode)
3658               target = gen_reg_rtx (compute_mode);
3659
3660             if (rem_flag)
3661               {
3662                 remainder = (GET_CODE (target) == REG
3663                              ? target : gen_reg_rtx (compute_mode));
3664                 quotient = gen_reg_rtx (compute_mode);
3665               }
3666             else
3667               {
3668                 quotient = (GET_CODE (target) == REG
3669                             ? target : gen_reg_rtx (compute_mode));
3670                 remainder = gen_reg_rtx (compute_mode);
3671               }
3672
3673             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3674                                      remainder, 1))
3675               {
3676                 /* This could be computed with a branch-less sequence.
3677                    Save that for later.  */
3678                 rtx label = gen_label_rtx ();
3679                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3680                                  compute_mode, label);
3681                 expand_inc (quotient, const1_rtx);
3682                 expand_dec (remainder, op1);
3683                 emit_label (label);
3684                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3685               }
3686
3687             /* No luck with division elimination or divmod.  Have to do it
3688                by conditionally adjusting op0 *and* the result.  */
3689             {
3690               rtx label1, label2;
3691               rtx adjusted_op0, tem;
3692
3693               quotient = gen_reg_rtx (compute_mode);
3694               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3695               label1 = gen_label_rtx ();
3696               label2 = gen_label_rtx ();
3697               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3698                                compute_mode, label1);
3699               emit_move_insn  (quotient, const0_rtx);
3700               emit_jump_insn (gen_jump (label2));
3701               emit_barrier ();
3702               emit_label (label1);
3703               expand_dec (adjusted_op0, const1_rtx);
3704               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3705                                   quotient, 1, OPTAB_LIB_WIDEN);
3706               if (tem != quotient)
3707                 emit_move_insn (quotient, tem);
3708               expand_inc (quotient, const1_rtx);
3709               emit_label (label2);
3710             }
3711           }
3712         else /* signed */
3713           {
3714             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3715                 && INTVAL (op1) >= 0)
3716               {
3717                 /* This is extremely similar to the code for the unsigned case
3718                    above.  For 2.7 we should merge these variants, but for
3719                    2.6.1 I don't want to touch the code for unsigned since that
3720                    get used in C.  The signed case will only be used by other
3721                    languages (Ada).  */
3722
3723                 rtx t1, t2, t3;
3724                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3725                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3726                                    build_int_2 (floor_log2 (d), 0),
3727                                    tquotient, 0);
3728                 t2 = expand_binop (compute_mode, and_optab, op0,
3729                                    GEN_INT (d - 1),
3730                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3731                 t3 = gen_reg_rtx (compute_mode);
3732                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3733                                       compute_mode, 1, 1);
3734                 if (t3 == 0)
3735                   {
3736                     rtx lab;
3737                     lab = gen_label_rtx ();
3738                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3739                     expand_inc (t1, const1_rtx);
3740                     emit_label (lab);
3741                     quotient = t1;
3742                   }
3743                 else
3744                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3745                                                           t1, t3),
3746                                             tquotient);
3747                 break;
3748               }
3749
3750             /* Try using an instruction that produces both the quotient and
3751                remainder, using truncation.  We can easily compensate the
3752                quotient or remainder to get ceiling rounding, once we have the
3753                remainder.  Notice that we compute also the final remainder
3754                value here, and return the result right away.  */
3755             if (target == 0 || GET_MODE (target) != compute_mode)
3756               target = gen_reg_rtx (compute_mode);
3757             if (rem_flag)
3758               {
3759                 remainder= (GET_CODE (target) == REG
3760                             ? target : gen_reg_rtx (compute_mode));
3761                 quotient = gen_reg_rtx (compute_mode);
3762               }
3763             else
3764               {
3765                 quotient = (GET_CODE (target) == REG
3766                             ? target : gen_reg_rtx (compute_mode));
3767                 remainder = gen_reg_rtx (compute_mode);
3768               }
3769
3770             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3771                                      remainder, 0))
3772               {
3773                 /* This could be computed with a branch-less sequence.
3774                    Save that for later.  */
3775                 rtx tem;
3776                 rtx label = gen_label_rtx ();
3777                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3778                                  compute_mode, label);
3779                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3780                                     NULL_RTX, 0, OPTAB_WIDEN);
3781                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3782                 expand_inc (quotient, const1_rtx);
3783                 expand_dec (remainder, op1);
3784                 emit_label (label);
3785                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3786               }
3787
3788             /* No luck with division elimination or divmod.  Have to do it
3789                by conditionally adjusting op0 *and* the result.  */
3790             {
3791               rtx label1, label2, label3, label4, label5;
3792               rtx adjusted_op0;
3793               rtx tem;
3794
3795               quotient = gen_reg_rtx (compute_mode);
3796               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3797               label1 = gen_label_rtx ();
3798               label2 = gen_label_rtx ();
3799               label3 = gen_label_rtx ();
3800               label4 = gen_label_rtx ();
3801               label5 = gen_label_rtx ();
3802               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3803               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3804                                compute_mode, label1);
3805               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3806                                   quotient, 0, OPTAB_LIB_WIDEN);
3807               if (tem != quotient)
3808                 emit_move_insn (quotient, tem);
3809               emit_jump_insn (gen_jump (label5));
3810               emit_barrier ();
3811               emit_label (label1);
3812               expand_dec (adjusted_op0, const1_rtx);
3813               emit_jump_insn (gen_jump (label4));
3814               emit_barrier ();
3815               emit_label (label2);
3816               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3817                                compute_mode, label3);
3818               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3819                                   quotient, 0, OPTAB_LIB_WIDEN);
3820               if (tem != quotient)
3821                 emit_move_insn (quotient, tem);
3822               emit_jump_insn (gen_jump (label5));
3823               emit_barrier ();
3824               emit_label (label3);
3825               expand_inc (adjusted_op0, const1_rtx);
3826               emit_label (label4);
3827               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3828                                   quotient, 0, OPTAB_LIB_WIDEN);
3829               if (tem != quotient)
3830                 emit_move_insn (quotient, tem);
3831               expand_inc (quotient, const1_rtx);
3832               emit_label (label5);
3833             }
3834           }
3835         break;
3836
3837       case EXACT_DIV_EXPR:
3838         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3839           {
3840             HOST_WIDE_INT d = INTVAL (op1);
3841             unsigned HOST_WIDE_INT ml;
3842             int pre_shift;
3843             rtx t1;
3844
3845             pre_shift = floor_log2 (d & -d);
3846             ml = invert_mod2n (d >> pre_shift, size);
3847             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3848                                build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
3849             quotient = expand_mult (compute_mode, t1,
3850                                     gen_int_mode (ml, compute_mode),
3851                                     NULL_RTX, 0);
3852
3853             insn = get_last_insn ();
3854             set_unique_reg_note (insn,
3855                                  REG_EQUAL,
3856                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3857                                                  compute_mode,
3858                                                  op0, op1));
3859           }
3860         break;
3861
3862       case ROUND_DIV_EXPR:
3863       case ROUND_MOD_EXPR:
3864         if (unsignedp)
3865           {
3866             rtx tem;
3867             rtx label;
3868             label = gen_label_rtx ();
3869             quotient = gen_reg_rtx (compute_mode);
3870             remainder = gen_reg_rtx (compute_mode);
3871             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3872               {
3873                 rtx tem;
3874                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3875                                          quotient, 1, OPTAB_LIB_WIDEN);
3876                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3877                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3878                                           remainder, 1, OPTAB_LIB_WIDEN);
3879               }
3880             tem = plus_constant (op1, -1);
3881             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3882                                 build_int_2 (1, 0), NULL_RTX, 1);
3883             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3884             expand_inc (quotient, const1_rtx);
3885             expand_dec (remainder, op1);
3886             emit_label (label);
3887           }
3888         else
3889           {
3890             rtx abs_rem, abs_op1, tem, mask;
3891             rtx label;
3892             label = gen_label_rtx ();
3893             quotient = gen_reg_rtx (compute_mode);
3894             remainder = gen_reg_rtx (compute_mode);
3895             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3896               {
3897                 rtx tem;
3898                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3899                                          quotient, 0, OPTAB_LIB_WIDEN);
3900                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3901                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3902                                           remainder, 0, OPTAB_LIB_WIDEN);
3903               }
3904             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
3905             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
3906             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3907                                 build_int_2 (1, 0), NULL_RTX, 1);
3908             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3909             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3910                                 NULL_RTX, 0, OPTAB_WIDEN);
3911             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3912                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3913             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3914                                 NULL_RTX, 0, OPTAB_WIDEN);
3915             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3916                                 NULL_RTX, 0, OPTAB_WIDEN);
3917             expand_inc (quotient, tem);
3918             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3919                                 NULL_RTX, 0, OPTAB_WIDEN);
3920             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3921                                 NULL_RTX, 0, OPTAB_WIDEN);
3922             expand_dec (remainder, tem);
3923             emit_label (label);
3924           }
3925         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3926
3927       default:
3928         abort ();
3929       }
3930
3931   if (quotient == 0)
3932     {
3933       if (target && GET_MODE (target) != compute_mode)
3934         target = 0;
3935
3936       if (rem_flag)
3937         {
3938           /* Try to produce the remainder without producing the quotient.
3939              If we seem to have a divmod pattern that does not require widening,
3940              don't try widening here.  We should really have an WIDEN argument
3941              to expand_twoval_binop, since what we'd really like to do here is
3942              1) try a mod insn in compute_mode
3943              2) try a divmod insn in compute_mode
3944              3) try a div insn in compute_mode and multiply-subtract to get
3945                 remainder
3946              4) try the same things with widening allowed.  */
3947           remainder
3948             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3949                                  op0, op1, target,
3950                                  unsignedp,
3951                                  ((optab2->handlers[(int) compute_mode].insn_code
3952                                    != CODE_FOR_nothing)
3953                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
3954           if (remainder == 0)
3955             {
3956               /* No luck there.  Can we do remainder and divide at once
3957                  without a library call?  */
3958               remainder = gen_reg_rtx (compute_mode);
3959               if (! expand_twoval_binop ((unsignedp
3960                                           ? udivmod_optab
3961                                           : sdivmod_optab),
3962                                          op0, op1,
3963                                          NULL_RTX, remainder, unsignedp))
3964                 remainder = 0;
3965             }
3966
3967           if (remainder)
3968             return gen_lowpart (mode, remainder);
3969         }
3970
3971       /* Produce the quotient.  Try a quotient insn, but not a library call.
3972          If we have a divmod in this mode, use it in preference to widening
3973          the div (for this test we assume it will not fail). Note that optab2
3974          is set to the one of the two optabs that the call below will use.  */
3975       quotient
3976         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3977                              op0, op1, rem_flag ? NULL_RTX : target,
3978                              unsignedp,
3979                              ((optab2->handlers[(int) compute_mode].insn_code
3980                                != CODE_FOR_nothing)
3981                               ? OPTAB_DIRECT : OPTAB_WIDEN));
3982
3983       if (quotient == 0)
3984         {
3985           /* No luck there.  Try a quotient-and-remainder insn,
3986              keeping the quotient alone.  */
3987           quotient = gen_reg_rtx (compute_mode);
3988           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3989                                      op0, op1,
3990                                      quotient, NULL_RTX, unsignedp))
3991             {
3992               quotient = 0;
3993               if (! rem_flag)
3994                 /* Still no luck.  If we are not computing the remainder,
3995                    use a library call for the quotient.  */
3996                 quotient = sign_expand_binop (compute_mode,
3997                                               udiv_optab, sdiv_optab,
3998                                               op0, op1, target,
3999                                               unsignedp, OPTAB_LIB_WIDEN);
4000             }
4001         }
4002     }
4003
4004   if (rem_flag)
4005     {
4006       if (target && GET_MODE (target) != compute_mode)
4007         target = 0;
4008
4009       if (quotient == 0)
4010         /* No divide instruction either.  Use library for remainder.  */
4011         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4012                                        op0, op1, target,
4013                                        unsignedp, OPTAB_LIB_WIDEN);
4014       else
4015         {
4016           /* We divided.  Now finish doing X - Y * (X / Y).  */
4017           remainder = expand_mult (compute_mode, quotient, op1,
4018                                    NULL_RTX, unsignedp);
4019           remainder = expand_binop (compute_mode, sub_optab, op0,
4020                                     remainder, target, unsignedp,
4021                                     OPTAB_LIB_WIDEN);
4022         }
4023     }
4024
4025   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4026 }
4027 \f
4028 /* Return a tree node with data type TYPE, describing the value of X.
4029    Usually this is an RTL_EXPR, if there is no obvious better choice.
4030    X may be an expression, however we only support those expressions
4031    generated by loop.c.  */
4032
4033 tree
4034 make_tree (type, x)
4035      tree type;
4036      rtx x;
4037 {
4038   tree t;
4039
4040   switch (GET_CODE (x))
4041     {
4042     case CONST_INT:
4043       t = build_int_2 (INTVAL (x),
4044                        (TREE_UNSIGNED (type)
4045                         && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
4046                        || INTVAL (x) >= 0 ? 0 : -1);
4047       TREE_TYPE (t) = type;
4048       return t;
4049
4050     case CONST_DOUBLE:
4051       if (GET_MODE (x) == VOIDmode)
4052         {
4053           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4054           TREE_TYPE (t) = type;
4055         }
4056       else
4057         {
4058           REAL_VALUE_TYPE d;
4059
4060           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4061           t = build_real (type, d);
4062         }
4063
4064       return t;
4065
4066     case CONST_VECTOR:
4067       {
4068         int i, units;
4069         rtx elt;
4070         tree t = NULL_TREE;
4071
4072         units = CONST_VECTOR_NUNITS (x);
4073
4074         /* Build a tree with vector elements.  */
4075         for (i = units - 1; i >= 0; --i)
4076           {
4077             elt = CONST_VECTOR_ELT (x, i);
4078             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4079           }
4080
4081         return build_vector (type, t);
4082       }
4083
4084     case PLUS:
4085       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4086                           make_tree (type, XEXP (x, 1))));
4087
4088     case MINUS:
4089       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4090                           make_tree (type, XEXP (x, 1))));
4091
4092     case NEG:
4093       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4094
4095     case MULT:
4096       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4097                           make_tree (type, XEXP (x, 1))));
4098
4099     case ASHIFT:
4100       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4101                           make_tree (type, XEXP (x, 1))));
4102
4103     case LSHIFTRT:
4104       t = (*lang_hooks.types.unsigned_type) (type);
4105       return fold (convert (type,
4106                             build (RSHIFT_EXPR, t,
4107                                    make_tree (t, XEXP (x, 0)),
4108                                    make_tree (type, XEXP (x, 1)))));
4109
4110     case ASHIFTRT:
4111       t = (*lang_hooks.types.signed_type) (type);
4112       return fold (convert (type,
4113                             build (RSHIFT_EXPR, t,
4114                                    make_tree (t, XEXP (x, 0)),
4115                                    make_tree (type, XEXP (x, 1)))));
4116
4117     case DIV:
4118       if (TREE_CODE (type) != REAL_TYPE)
4119         t = (*lang_hooks.types.signed_type) (type);
4120       else
4121         t = type;
4122
4123       return fold (convert (type,
4124                             build (TRUNC_DIV_EXPR, t,
4125                                    make_tree (t, XEXP (x, 0)),
4126                                    make_tree (t, XEXP (x, 1)))));
4127     case UDIV:
4128       t = (*lang_hooks.types.unsigned_type) (type);
4129       return fold (convert (type,
4130                             build (TRUNC_DIV_EXPR, t,
4131                                    make_tree (t, XEXP (x, 0)),
4132                                    make_tree (t, XEXP (x, 1)))));
4133
4134     case SIGN_EXTEND:
4135     case ZERO_EXTEND:
4136       t = (*lang_hooks.types.type_for_mode) (GET_MODE (XEXP (x, 0)),
4137                                              GET_CODE (x) == ZERO_EXTEND);
4138       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4139
4140    default:
4141       t = make_node (RTL_EXPR);
4142       TREE_TYPE (t) = type;
4143
4144 #ifdef POINTERS_EXTEND_UNSIGNED
4145       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4146          ptr_mode.  So convert.  */
4147       if (POINTER_TYPE_P (type) && GET_MODE (x) != TYPE_MODE (type))
4148         x = convert_memory_address (TYPE_MODE (type), x);
4149 #endif
4150
4151       RTL_EXPR_RTL (t) = x;
4152       /* There are no insns to be output
4153          when this rtl_expr is used.  */
4154       RTL_EXPR_SEQUENCE (t) = 0;
4155       return t;
4156     }
4157 }
4158
4159 /* Check whether the multiplication X * MULT + ADD overflows.
4160    X, MULT and ADD must be CONST_*.
4161    MODE is the machine mode for the computation.
4162    X and MULT must have mode MODE.  ADD may have a different mode.
4163    So can X (defaults to same as MODE).
4164    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4165
4166 bool
4167 const_mult_add_overflow_p (x, mult, add, mode, unsignedp)
4168      rtx x, mult, add;
4169      enum machine_mode mode;
4170      int unsignedp;
4171 {
4172   tree type, mult_type, add_type, result;
4173
4174   type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4175
4176   /* In order to get a proper overflow indication from an unsigned
4177      type, we have to pretend that it's a sizetype.  */
4178   mult_type = type;
4179   if (unsignedp)
4180     {
4181       mult_type = copy_node (type);
4182       TYPE_IS_SIZETYPE (mult_type) = 1;
4183     }
4184
4185   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4186               : (*lang_hooks.types.type_for_mode) (GET_MODE (add), unsignedp));
4187
4188   result = fold (build (PLUS_EXPR, mult_type,
4189                         fold (build (MULT_EXPR, mult_type,
4190                                      make_tree (mult_type, x),
4191                                      make_tree (mult_type, mult))),
4192                         make_tree (add_type, add)));
4193
4194   return TREE_CONSTANT_OVERFLOW (result);
4195 }
4196
4197 /* Return an rtx representing the value of X * MULT + ADD.
4198    TARGET is a suggestion for where to store the result (an rtx).
4199    MODE is the machine mode for the computation.
4200    X and MULT must have mode MODE.  ADD may have a different mode.
4201    So can X (defaults to same as MODE).
4202    UNSIGNEDP is nonzero to do unsigned multiplication.
4203    This may emit insns.  */
4204
4205 rtx
4206 expand_mult_add (x, target, mult, add, mode, unsignedp)
4207      rtx x, target, mult, add;
4208      enum machine_mode mode;
4209      int unsignedp;
4210 {
4211   tree type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4212   tree add_type = (GET_MODE (add) == VOIDmode
4213                    ? type: (*lang_hooks.types.type_for_mode) (GET_MODE (add),
4214                                                               unsignedp));
4215   tree result =  fold (build (PLUS_EXPR, type,
4216                               fold (build (MULT_EXPR, type,
4217                                            make_tree (type, x),
4218                                            make_tree (type, mult))),
4219                               make_tree (add_type, add)));
4220
4221   return expand_expr (result, target, VOIDmode, 0);
4222 }
4223 \f
4224 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4225    and returning TARGET.
4226
4227    If TARGET is 0, a pseudo-register or constant is returned.  */
4228
4229 rtx
4230 expand_and (mode, op0, op1, target)
4231      enum machine_mode mode;
4232      rtx op0, op1, target;
4233 {
4234   rtx tem = 0;
4235
4236   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4237     tem = simplify_binary_operation (AND, mode, op0, op1);
4238   if (tem == 0)
4239     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4240
4241   if (target == 0)
4242     target = tem;
4243   else if (tem != target)
4244     emit_move_insn (target, tem);
4245   return target;
4246 }
4247 \f
4248 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4249    and storing in TARGET.  Normally return TARGET.
4250    Return 0 if that cannot be done.
4251
4252    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4253    it is VOIDmode, they cannot both be CONST_INT.
4254
4255    UNSIGNEDP is for the case where we have to widen the operands
4256    to perform the operation.  It says to use zero-extension.
4257
4258    NORMALIZEP is 1 if we should convert the result to be either zero
4259    or one.  Normalize is -1 if we should convert the result to be
4260    either zero or -1.  If NORMALIZEP is zero, the result will be left
4261    "raw" out of the scc insn.  */
4262
4263 rtx
4264 emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
4265      rtx target;
4266      enum rtx_code code;
4267      rtx op0, op1;
4268      enum machine_mode mode;
4269      int unsignedp;
4270      int normalizep;
4271 {
4272   rtx subtarget;
4273   enum insn_code icode;
4274   enum machine_mode compare_mode;
4275   enum machine_mode target_mode = GET_MODE (target);
4276   rtx tem;
4277   rtx last = get_last_insn ();
4278   rtx pattern, comparison;
4279
4280   /* ??? Ok to do this and then fail? */
4281   op0 = protect_from_queue (op0, 0);
4282   op1 = protect_from_queue (op1, 0);
4283
4284   if (unsignedp)
4285     code = unsigned_condition (code);
4286
4287   /* If one operand is constant, make it the second one.  Only do this
4288      if the other operand is not constant as well.  */
4289
4290   if (swap_commutative_operands_p (op0, op1))
4291     {
4292       tem = op0;
4293       op0 = op1;
4294       op1 = tem;
4295       code = swap_condition (code);
4296     }
4297
4298   if (mode == VOIDmode)
4299     mode = GET_MODE (op0);
4300
4301   /* For some comparisons with 1 and -1, we can convert this to
4302      comparisons with zero.  This will often produce more opportunities for
4303      store-flag insns.  */
4304
4305   switch (code)
4306     {
4307     case LT:
4308       if (op1 == const1_rtx)
4309         op1 = const0_rtx, code = LE;
4310       break;
4311     case LE:
4312       if (op1 == constm1_rtx)
4313         op1 = const0_rtx, code = LT;
4314       break;
4315     case GE:
4316       if (op1 == const1_rtx)
4317         op1 = const0_rtx, code = GT;
4318       break;
4319     case GT:
4320       if (op1 == constm1_rtx)
4321         op1 = const0_rtx, code = GE;
4322       break;
4323     case GEU:
4324       if (op1 == const1_rtx)
4325         op1 = const0_rtx, code = NE;
4326       break;
4327     case LTU:
4328       if (op1 == const1_rtx)
4329         op1 = const0_rtx, code = EQ;
4330       break;
4331     default:
4332       break;
4333     }
4334
4335   /* If we are comparing a double-word integer with zero, we can convert
4336      the comparison into one involving a single word.  */
4337   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4338       && GET_MODE_CLASS (mode) == MODE_INT
4339       && op1 == const0_rtx
4340       && (GET_CODE (op0) != MEM || ! MEM_VOLATILE_P (op0)))
4341     {
4342       if (code == EQ || code == NE)
4343         {
4344           /* Do a logical OR of the two words and compare the result.  */
4345           rtx op0h = gen_highpart (word_mode, op0);
4346           rtx op0l = gen_lowpart (word_mode, op0);
4347           rtx op0both = expand_binop (word_mode, ior_optab, op0h, op0l,
4348                                       NULL_RTX, unsignedp, OPTAB_DIRECT);
4349           if (op0both != 0)
4350             return emit_store_flag (target, code, op0both, op1, word_mode,
4351                                     unsignedp, normalizep);
4352         }
4353       else if (code == LT || code == GE)
4354         /* If testing the sign bit, can just test on high word.  */
4355         return emit_store_flag (target, code, gen_highpart (word_mode, op0),
4356                                 op1, word_mode, unsignedp, normalizep);
4357     }
4358
4359   /* From now on, we won't change CODE, so set ICODE now.  */
4360   icode = setcc_gen_code[(int) code];
4361
4362   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4363      complement of A (for GE) and shifting the sign bit to the low bit.  */
4364   if (op1 == const0_rtx && (code == LT || code == GE)
4365       && GET_MODE_CLASS (mode) == MODE_INT
4366       && (normalizep || STORE_FLAG_VALUE == 1
4367           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4368               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4369                   == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4370     {
4371       subtarget = target;
4372
4373       /* If the result is to be wider than OP0, it is best to convert it
4374          first.  If it is to be narrower, it is *incorrect* to convert it
4375          first.  */
4376       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4377         {
4378           op0 = protect_from_queue (op0, 0);
4379           op0 = convert_modes (target_mode, mode, op0, 0);
4380           mode = target_mode;
4381         }
4382
4383       if (target_mode != mode)
4384         subtarget = 0;
4385
4386       if (code == GE)
4387         op0 = expand_unop (mode, one_cmpl_optab, op0,
4388                            ((STORE_FLAG_VALUE == 1 || normalizep)
4389                             ? 0 : subtarget), 0);
4390
4391       if (STORE_FLAG_VALUE == 1 || normalizep)
4392         /* If we are supposed to produce a 0/1 value, we want to do
4393            a logical shift from the sign bit to the low-order bit; for
4394            a -1/0 value, we do an arithmetic shift.  */
4395         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4396                             size_int (GET_MODE_BITSIZE (mode) - 1),
4397                             subtarget, normalizep != -1);
4398
4399       if (mode != target_mode)
4400         op0 = convert_modes (target_mode, mode, op0, 0);
4401
4402       return op0;
4403     }
4404
4405   if (icode != CODE_FOR_nothing)
4406     {
4407       insn_operand_predicate_fn pred;
4408
4409       /* We think we may be able to do this with a scc insn.  Emit the
4410          comparison and then the scc insn.
4411
4412          compare_from_rtx may call emit_queue, which would be deleted below
4413          if the scc insn fails.  So call it ourselves before setting LAST.
4414          Likewise for do_pending_stack_adjust.  */
4415
4416       emit_queue ();
4417       do_pending_stack_adjust ();
4418       last = get_last_insn ();
4419
4420       comparison
4421         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4422       if (GET_CODE (comparison) == CONST_INT)
4423         return (comparison == const0_rtx ? const0_rtx
4424                 : normalizep == 1 ? const1_rtx
4425                 : normalizep == -1 ? constm1_rtx
4426                 : const_true_rtx);
4427
4428       /* The code of COMPARISON may not match CODE if compare_from_rtx
4429          decided to swap its operands and reverse the original code.
4430
4431          We know that compare_from_rtx returns either a CONST_INT or
4432          a new comparison code, so it is safe to just extract the
4433          code from COMPARISON.  */
4434       code = GET_CODE (comparison);
4435
4436       /* Get a reference to the target in the proper mode for this insn.  */
4437       compare_mode = insn_data[(int) icode].operand[0].mode;
4438       subtarget = target;
4439       pred = insn_data[(int) icode].operand[0].predicate;
4440       if (preserve_subexpressions_p ()
4441           || ! (*pred) (subtarget, compare_mode))
4442         subtarget = gen_reg_rtx (compare_mode);
4443
4444       pattern = GEN_FCN (icode) (subtarget);
4445       if (pattern)
4446         {
4447           emit_insn (pattern);
4448
4449           /* If we are converting to a wider mode, first convert to
4450              TARGET_MODE, then normalize.  This produces better combining
4451              opportunities on machines that have a SIGN_EXTRACT when we are
4452              testing a single bit.  This mostly benefits the 68k.
4453
4454              If STORE_FLAG_VALUE does not have the sign bit set when
4455              interpreted in COMPARE_MODE, we can do this conversion as
4456              unsigned, which is usually more efficient.  */
4457           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4458             {
4459               convert_move (target, subtarget,
4460                             (GET_MODE_BITSIZE (compare_mode)
4461                              <= HOST_BITS_PER_WIDE_INT)
4462                             && 0 == (STORE_FLAG_VALUE
4463                                      & ((HOST_WIDE_INT) 1
4464                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4465               op0 = target;
4466               compare_mode = target_mode;
4467             }
4468           else
4469             op0 = subtarget;
4470
4471           /* If we want to keep subexpressions around, don't reuse our
4472              last target.  */
4473
4474           if (preserve_subexpressions_p ())
4475             subtarget = 0;
4476
4477           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4478              we don't have to do anything.  */
4479           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4480             ;
4481           /* STORE_FLAG_VALUE might be the most negative number, so write
4482              the comparison this way to avoid a compiler-time warning.  */
4483           else if (- normalizep == STORE_FLAG_VALUE)
4484             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4485
4486           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4487              makes it hard to use a value of just the sign bit due to
4488              ANSI integer constant typing rules.  */
4489           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4490                    && (STORE_FLAG_VALUE
4491                        & ((HOST_WIDE_INT) 1
4492                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4493             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4494                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4495                                 subtarget, normalizep == 1);
4496           else if (STORE_FLAG_VALUE & 1)
4497             {
4498               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4499               if (normalizep == -1)
4500                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4501             }
4502           else
4503             abort ();
4504
4505           /* If we were converting to a smaller mode, do the
4506              conversion now.  */
4507           if (target_mode != compare_mode)
4508             {
4509               convert_move (target, op0, 0);
4510               return target;
4511             }
4512           else
4513             return op0;
4514         }
4515     }
4516
4517   delete_insns_since (last);
4518
4519   /* If expensive optimizations, use different pseudo registers for each
4520      insn, instead of reusing the same pseudo.  This leads to better CSE,
4521      but slows down the compiler, since there are more pseudos */
4522   subtarget = (!flag_expensive_optimizations
4523                && (target_mode == mode)) ? target : NULL_RTX;
4524
4525   /* If we reached here, we can't do this with a scc insn.  However, there
4526      are some comparisons that can be done directly.  For example, if
4527      this is an equality comparison of integers, we can try to exclusive-or
4528      (or subtract) the two operands and use a recursive call to try the
4529      comparison with zero.  Don't do any of these cases if branches are
4530      very cheap.  */
4531
4532   if (BRANCH_COST > 0
4533       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4534       && op1 != const0_rtx)
4535     {
4536       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4537                           OPTAB_WIDEN);
4538
4539       if (tem == 0)
4540         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4541                             OPTAB_WIDEN);
4542       if (tem != 0)
4543         tem = emit_store_flag (target, code, tem, const0_rtx,
4544                                mode, unsignedp, normalizep);
4545       if (tem == 0)
4546         delete_insns_since (last);
4547       return tem;
4548     }
4549
4550   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4551      the constant zero.  Reject all other comparisons at this point.  Only
4552      do LE and GT if branches are expensive since they are expensive on
4553      2-operand machines.  */
4554
4555   if (BRANCH_COST == 0
4556       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4557       || (code != EQ && code != NE
4558           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4559     return 0;
4560
4561   /* See what we need to return.  We can only return a 1, -1, or the
4562      sign bit.  */
4563
4564   if (normalizep == 0)
4565     {
4566       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4567         normalizep = STORE_FLAG_VALUE;
4568
4569       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4570                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4571                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4572         ;
4573       else
4574         return 0;
4575     }
4576
4577   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4578      do the necessary operation below.  */
4579
4580   tem = 0;
4581
4582   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4583      the sign bit set.  */
4584
4585   if (code == LE)
4586     {
4587       /* This is destructive, so SUBTARGET can't be OP0.  */
4588       if (rtx_equal_p (subtarget, op0))
4589         subtarget = 0;
4590
4591       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4592                           OPTAB_WIDEN);
4593       if (tem)
4594         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4595                             OPTAB_WIDEN);
4596     }
4597
4598   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4599      number of bits in the mode of OP0, minus one.  */
4600
4601   if (code == GT)
4602     {
4603       if (rtx_equal_p (subtarget, op0))
4604         subtarget = 0;
4605
4606       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4607                           size_int (GET_MODE_BITSIZE (mode) - 1),
4608                           subtarget, 0);
4609       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4610                           OPTAB_WIDEN);
4611     }
4612
4613   if (code == EQ || code == NE)
4614     {
4615       /* For EQ or NE, one way to do the comparison is to apply an operation
4616          that converts the operand into a positive number if it is nonzero
4617          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4618          for NE we negate.  This puts the result in the sign bit.  Then we
4619          normalize with a shift, if needed.
4620
4621          Two operations that can do the above actions are ABS and FFS, so try
4622          them.  If that doesn't work, and MODE is smaller than a full word,
4623          we can use zero-extension to the wider mode (an unsigned conversion)
4624          as the operation.  */
4625
4626       /* Note that ABS doesn't yield a positive number for INT_MIN, but
4627          that is compensated by the subsequent overflow when subtracting
4628          one / negating.  */
4629
4630       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4631         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4632       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4633         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4634       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4635         {
4636           op0 = protect_from_queue (op0, 0);
4637           tem = convert_modes (word_mode, mode, op0, 1);
4638           mode = word_mode;
4639         }
4640
4641       if (tem != 0)
4642         {
4643           if (code == EQ)
4644             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4645                                 0, OPTAB_WIDEN);
4646           else
4647             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4648         }
4649
4650       /* If we couldn't do it that way, for NE we can "or" the two's complement
4651          of the value with itself.  For EQ, we take the one's complement of
4652          that "or", which is an extra insn, so we only handle EQ if branches
4653          are expensive.  */
4654
4655       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4656         {
4657           if (rtx_equal_p (subtarget, op0))
4658             subtarget = 0;
4659
4660           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4661           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4662                               OPTAB_WIDEN);
4663
4664           if (tem && code == EQ)
4665             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4666         }
4667     }
4668
4669   if (tem && normalizep)
4670     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4671                         size_int (GET_MODE_BITSIZE (mode) - 1),
4672                         subtarget, normalizep == 1);
4673
4674   if (tem)
4675     {
4676       if (GET_MODE (tem) != target_mode)
4677         {
4678           convert_move (target, tem, 0);
4679           tem = target;
4680         }
4681       else if (!subtarget)
4682         {
4683           emit_move_insn (target, tem);
4684           tem = target;
4685         }
4686     }
4687   else
4688     delete_insns_since (last);
4689
4690   return tem;
4691 }
4692
4693 /* Like emit_store_flag, but always succeeds.  */
4694
4695 rtx
4696 emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
4697      rtx target;
4698      enum rtx_code code;
4699      rtx op0, op1;
4700      enum machine_mode mode;
4701      int unsignedp;
4702      int normalizep;
4703 {
4704   rtx tem, label;
4705
4706   /* First see if emit_store_flag can do the job.  */
4707   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4708   if (tem != 0)
4709     return tem;
4710
4711   if (normalizep == 0)
4712     normalizep = 1;
4713
4714   /* If this failed, we have to do this with set/compare/jump/set code.  */
4715
4716   if (GET_CODE (target) != REG
4717       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4718     target = gen_reg_rtx (GET_MODE (target));
4719
4720   emit_move_insn (target, const1_rtx);
4721   label = gen_label_rtx ();
4722   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
4723                            NULL_RTX, label);
4724
4725   emit_move_insn (target, const0_rtx);
4726   emit_label (label);
4727
4728   return target;
4729 }
4730 \f
4731 /* Perform possibly multi-word comparison and conditional jump to LABEL
4732    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4733
4734    The algorithm is based on the code in expr.c:do_jump.
4735
4736    Note that this does not perform a general comparison.  Only variants
4737    generated within expmed.c are correctly handled, others abort (but could
4738    be handled if needed).  */
4739
4740 static void
4741 do_cmp_and_jump (arg1, arg2, op, mode, label)
4742      rtx arg1, arg2, label;
4743      enum rtx_code op;
4744      enum machine_mode mode;
4745 {
4746   /* If this mode is an integer too wide to compare properly,
4747      compare word by word.  Rely on cse to optimize constant cases.  */
4748
4749   if (GET_MODE_CLASS (mode) == MODE_INT
4750       && ! can_compare_p (op, mode, ccp_jump))
4751     {
4752       rtx label2 = gen_label_rtx ();
4753
4754       switch (op)
4755         {
4756         case LTU:
4757           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4758           break;
4759
4760         case LEU:
4761           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4762           break;
4763
4764         case LT:
4765           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4766           break;
4767
4768         case GT:
4769           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4770           break;
4771
4772         case GE:
4773           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4774           break;
4775
4776           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
4777              that's the only equality operations we do */
4778         case EQ:
4779           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4780             abort ();
4781           do_jump_by_parts_equality_rtx (arg1, label2, label);
4782           break;
4783
4784         case NE:
4785           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4786             abort ();
4787           do_jump_by_parts_equality_rtx (arg1, label, label2);
4788           break;
4789
4790         default:
4791           abort ();
4792         }
4793
4794       emit_label (label2);
4795     }
4796   else
4797     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
4798 }