gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54
  55 /* Nonzero means divides or modulus operations are relatively cheap for
  56    powers of two, so don't use branches; emit the operation instead.
  57    Usually, this will mean that the MD file will emit non-branch
  58    sequences.  */
  59
  60 static int sdiv_pow2_cheap, smod_pow2_cheap;
  61
  62 #ifndef SLOW_UNALIGNED_ACCESS
  63 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  64 #endif
  65
  66 /* For compilers that support multiple targets with different word sizes,
  67    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  68    is the H8/300(H) compiler.  */
  69
  70 #ifndef MAX_BITS_PER_WORD
  71 #define MAX_BITS_PER_WORD BITS_PER_WORD
  72 #endif
  73
  74 /* Reduce conditional compilation elsewhere.  */
  75 #ifndef HAVE_insv
  76 #define HAVE_insv       0
  77 #define CODE_FOR_insv   CODE_FOR_nothing
  78 #define gen_insv(a,b,c,d) NULL_RTX
  79 #endif
  80 #ifndef HAVE_extv
  81 #define HAVE_extv       0
  82 #define CODE_FOR_extv   CODE_FOR_nothing
  83 #define gen_extv(a,b,c,d) NULL_RTX
  84 #endif
  85 #ifndef HAVE_extzv
  86 #define HAVE_extzv      0
  87 #define CODE_FOR_extzv  CODE_FOR_nothing
  88 #define gen_extzv(a,b,c,d) NULL_RTX
  89 #endif
  90
  91 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  92    shift count and some by mode.  */
  93 static int add_cost, negate_cost, zero_cost;
  94 static int shift_cost[MAX_BITS_PER_WORD];
  95 static int shiftadd_cost[MAX_BITS_PER_WORD];
  96 static int shiftsub_cost[MAX_BITS_PER_WORD];
  97 static int mul_cost[NUM_MACHINE_MODES];
  98 static int div_cost[NUM_MACHINE_MODES];
  99 static int mul_widen_cost[NUM_MACHINE_MODES];
 100 static int mul_highpart_cost[NUM_MACHINE_MODES];
 101
 102 void
 103 init_expmed (void)
 104 {
 105   rtx reg, shift_insn, shiftadd_insn, shiftsub_insn;
 106   int dummy;
 107   int m;
 108   enum machine_mode mode, wider_mode;
 109
 110   start_sequence ();
 111
 112   /* This is "some random pseudo register" for purposes of calling recog
 113      to see what insns exist.  */
 114   reg = gen_rtx_REG (word_mode, 10000);
 115
 116   zero_cost = rtx_cost (const0_rtx, 0);
 117   add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
 118
 119   shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
 120                                        gen_rtx_ASHIFT (word_mode, reg,
 121                                                        const0_rtx)));
 122
 123   shiftadd_insn
 124     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 125                               gen_rtx_PLUS (word_mode,
 126                                             gen_rtx_MULT (word_mode,
 127                                                           reg, const0_rtx),
 128                                             reg)));
 129
 130   shiftsub_insn
 131     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 132                               gen_rtx_MINUS (word_mode,
 133                                              gen_rtx_MULT (word_mode,
 134                                                            reg, const0_rtx),
 135                                              reg)));
 136
 137   init_recog ();
 138
 139   shift_cost[0] = 0;
 140   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 141
 142   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 143     {
 144       rtx c_int = GEN_INT ((HOST_WIDE_INT) 1 << m);
 145       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 146
 147       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 148       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 149         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 150
 151       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) = c_int;
 152       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 153         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 154
 155       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) = c_int;
 156       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 157         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 158     }
 159
 160   negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
 161
 162   sdiv_pow2_cheap
 163     = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
 164        <= 2 * add_cost);
 165   smod_pow2_cheap
 166     = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
 167        <= 2 * add_cost);
 168
 169   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 170        mode != VOIDmode;
 171        mode = GET_MODE_WIDER_MODE (mode))
 172     {
 173       reg = gen_rtx_REG (mode, 10000);
 174       div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
 175       mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
 176       wider_mode = GET_MODE_WIDER_MODE (mode);
 177       if (wider_mode != VOIDmode)
 178         {
 179           mul_widen_cost[(int) wider_mode]
 180             = rtx_cost (gen_rtx_MULT (wider_mode,
 181                                       gen_rtx_ZERO_EXTEND (wider_mode, reg),
 182                                       gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 183                         SET);
 184           mul_highpart_cost[(int) mode]
 185             = rtx_cost (gen_rtx_TRUNCATE
 186                         (mode,
 187                          gen_rtx_LSHIFTRT (wider_mode,
 188                                            gen_rtx_MULT (wider_mode,
 189                                                          gen_rtx_ZERO_EXTEND
 190                                                          (wider_mode, reg),
 191                                                          gen_rtx_ZERO_EXTEND
 192                                                          (wider_mode, reg)),
 193                                            GEN_INT (GET_MODE_BITSIZE (mode)))),
 194                         SET);
 195         }
 196     }
 197
 198   end_sequence ();
 199 }
 200
 201 /* Return an rtx representing minus the value of X.
 202    MODE is the intended mode of the result,
 203    useful if X is a CONST_INT.  */
 204
 205 rtx
 206 negate_rtx (enum machine_mode mode, rtx x)
 207 {
 208   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 209
 210   if (result == 0)
 211     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 212
 213   return result;
 214 }
 215
 216 /* Report on the availability of insv/extv/extzv and the desired mode
 217    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 218    is false; else the mode of the specified operand.  If OPNO is -1,
 219    all the caller cares about is whether the insn is available.  */
 220 enum machine_mode
 221 mode_for_extraction (enum extraction_pattern pattern, int opno)
 222 {
 223   const struct insn_data *data;
 224
 225   switch (pattern)
 226     {
 227     case EP_insv:
 228       if (HAVE_insv)
 229         {
 230           data = &insn_data[CODE_FOR_insv];
 231           break;
 232         }
 233       return MAX_MACHINE_MODE;
 234
 235     case EP_extv:
 236       if (HAVE_extv)
 237         {
 238           data = &insn_data[CODE_FOR_extv];
 239           break;
 240         }
 241       return MAX_MACHINE_MODE;
 242
 243     case EP_extzv:
 244       if (HAVE_extzv)
 245         {
 246           data = &insn_data[CODE_FOR_extzv];
 247           break;
 248         }
 249       return MAX_MACHINE_MODE;
 250
 251     default:
 252       abort ();
 253     }
 254
 255   if (opno == -1)
 256     return VOIDmode;
 257
 258   /* Everyone who uses this function used to follow it with
 259      if (result == VOIDmode) result = word_mode; */
 260   if (data->operand[opno].mode == VOIDmode)
 261     return word_mode;
 262   return data->operand[opno].mode;
 263 }
 264
 265 \f
 266 /* Generate code to store value from rtx VALUE
 267    into a bit-field within structure STR_RTX
 268    containing BITSIZE bits starting at bit BITNUM.
 269    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 270    ALIGN is the alignment that STR_RTX is known to have.
 271    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 272
 273 /* ??? Note that there are two different ideas here for how
 274    to determine the size to count bits within, for a register.
 275    One is BITS_PER_WORD, and the other is the size of operand 3
 276    of the insv pattern.
 277
 278    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 279    else, we use the mode of operand 3.  */
 280
 281 rtx
 282 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 283                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 284                  rtx value, HOST_WIDE_INT total_size)
 285 {
 286   unsigned int unit
 287     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 288   unsigned HOST_WIDE_INT offset = bitnum / unit;
 289   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 290   rtx op0 = str_rtx;
 291   int byte_offset;
 292
 293   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 294
 295   /* Discount the part of the structure before the desired byte.
 296      We need to know how many bytes are safe to reference after it.  */
 297   if (total_size >= 0)
 298     total_size -= (bitpos / BIGGEST_ALIGNMENT
 299                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 300
 301   while (GET_CODE (op0) == SUBREG)
 302     {
 303       /* The following line once was done only if WORDS_BIG_ENDIAN,
 304          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 305          meaningful at a much higher level; when structures are copied
 306          between memory and regs, the higher-numbered regs
 307          always get higher addresses.  */
 308       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 309       /* We used to adjust BITPOS here, but now we do the whole adjustment
 310          right after the loop.  */
 311       op0 = SUBREG_REG (op0);
 312     }
 313
 314   value = protect_from_queue (value, 0);
 315
 316   if (flag_force_mem)
 317     {
 318       int old_generating_concat_p = generating_concat_p;
 319       generating_concat_p = 0;
 320       value = force_not_mem (value);
 321       generating_concat_p = old_generating_concat_p;
 322     }
 323
 324   /* If the target is a register, overwriting the entire object, or storing
 325      a full-word or multi-word field can be done with just a SUBREG.
 326
 327      If the target is memory, storing any naturally aligned field can be
 328      done with a simple store.  For targets that support fast unaligned
 329      memory, any naturally sized, unit aligned field can be done directly.  */
 330
 331   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 332                 + (offset * UNITS_PER_WORD);
 333
 334   if (bitpos == 0
 335       && bitsize == GET_MODE_BITSIZE (fieldmode)
 336       && (GET_CODE (op0) != MEM
 337           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 338              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 339              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 340           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 341              || (offset * BITS_PER_UNIT % bitsize == 0
 342                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 343     {
 344       if (GET_MODE (op0) != fieldmode)
 345         {
 346           if (GET_CODE (op0) == SUBREG)
 347             {
 348               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 349                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 350                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 351                 op0 = SUBREG_REG (op0);
 352               else
 353                 /* Else we've got some float mode source being extracted into
 354                    a different float mode destination -- this combination of
 355                    subregs results in Severe Tire Damage.  */
 356                 abort ();
 357             }
 358           if (GET_CODE (op0) == REG)
 359             op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 360           else
 361             op0 = adjust_address (op0, fieldmode, offset);
 362         }
 363       emit_move_insn (op0, value);
 364       return value;
 365     }
 366
 367   /* Make sure we are playing with integral modes.  Pun with subregs
 368      if we aren't.  This must come after the entire register case above,
 369      since that case is valid for any mode.  The following cases are only
 370      valid for integral modes.  */
 371   {
 372     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 373     if (imode != GET_MODE (op0))
 374       {
 375         if (GET_CODE (op0) == MEM)
 376           op0 = adjust_address (op0, imode, 0);
 377         else if (imode != BLKmode)
 378           op0 = gen_lowpart (imode, op0);
 379         else
 380           abort ();
 381       }
 382   }
 383
 384   /* We may be accessing data outside the field, which means
 385      we can alias adjacent data.  */
 386   if (GET_CODE (op0) == MEM)
 387     {
 388       op0 = shallow_copy_rtx (op0);
 389       set_mem_alias_set (op0, 0);
 390       set_mem_expr (op0, 0);
 391     }
 392
 393   /* If OP0 is a register, BITPOS must count within a word.
 394      But as we have it, it counts within whatever size OP0 now has.
 395      On a bigendian machine, these are not the same, so convert.  */
 396   if (BYTES_BIG_ENDIAN
 397       && GET_CODE (op0) != MEM
 398       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 399     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 400
 401   /* Storing an lsb-aligned field in a register
 402      can be done with a movestrict instruction.  */
 403
 404   if (GET_CODE (op0) != MEM
 405       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 406       && bitsize == GET_MODE_BITSIZE (fieldmode)
 407       && (movstrict_optab->handlers[(int) fieldmode].insn_code
 408           != CODE_FOR_nothing))
 409     {
 410       int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 411
 412       /* Get appropriate low part of the value being stored.  */
 413       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 414         value = gen_lowpart (fieldmode, value);
 415       else if (!(GET_CODE (value) == SYMBOL_REF
 416                  || GET_CODE (value) == LABEL_REF
 417                  || GET_CODE (value) == CONST))
 418         value = convert_to_mode (fieldmode, value, 0);
 419
 420       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 421         value = copy_to_mode_reg (fieldmode, value);
 422
 423       if (GET_CODE (op0) == SUBREG)
 424         {
 425           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 426               || GET_MODE_CLASS (fieldmode) == MODE_INT
 427               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 428             op0 = SUBREG_REG (op0);
 429           else
 430             /* Else we've got some float mode source being extracted into
 431                a different float mode destination -- this combination of
 432                subregs results in Severe Tire Damage.  */
 433             abort ();
 434         }
 435
 436       emit_insn (GEN_FCN (icode)
 437                  (gen_rtx_SUBREG (fieldmode, op0,
 438                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 439                                   + (offset * UNITS_PER_WORD)),
 440                                   value));
 441
 442       return value;
 443     }
 444
 445   /* Handle fields bigger than a word.  */
 446
 447   if (bitsize > BITS_PER_WORD)
 448     {
 449       /* Here we transfer the words of the field
 450          in the order least significant first.
 451          This is because the most significant word is the one which may
 452          be less than full.
 453          However, only do that if the value is not BLKmode.  */
 454
 455       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 456       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 457       unsigned int i;
 458
 459       /* This is the mode we must force value to, so that there will be enough
 460          subwords to extract.  Note that fieldmode will often (always?) be
 461          VOIDmode, because that is what store_field uses to indicate that this
 462          is a bit field, but passing VOIDmode to operand_subword_force will
 463          result in an abort.  */
 464       fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 465
 466       for (i = 0; i < nwords; i++)
 467         {
 468           /* If I is 0, use the low-order word in both field and target;
 469              if I is 1, use the next to lowest word; and so on.  */
 470           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 471           unsigned int bit_offset = (backwards
 472                                      ? MAX ((int) bitsize - ((int) i + 1)
 473                                             * BITS_PER_WORD,
 474                                             0)
 475                                      : (int) i * BITS_PER_WORD);
 476
 477           store_bit_field (op0, MIN (BITS_PER_WORD,
 478                                      bitsize - i * BITS_PER_WORD),
 479                            bitnum + bit_offset, word_mode,
 480                            operand_subword_force (value, wordnum,
 481                                                   (GET_MODE (value) == VOIDmode
 482                                                    ? fieldmode
 483                                                    : GET_MODE (value))),
 484                            total_size);
 485         }
 486       return value;
 487     }
 488
 489   /* From here on we can assume that the field to be stored in is
 490      a full-word (whatever type that is), since it is shorter than a word.  */
 491
 492   /* OFFSET is the number of words or bytes (UNIT says which)
 493      from STR_RTX to the first word or byte containing part of the field.  */
 494
 495   if (GET_CODE (op0) != MEM)
 496     {
 497       if (offset != 0
 498           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 499         {
 500           if (GET_CODE (op0) != REG)
 501             {
 502               /* Since this is a destination (lvalue), we can't copy it to a
 503                  pseudo.  We can trivially remove a SUBREG that does not
 504                  change the size of the operand.  Such a SUBREG may have been
 505                  added above.  Otherwise, abort.  */
 506               if (GET_CODE (op0) == SUBREG
 507                   && (GET_MODE_SIZE (GET_MODE (op0))
 508                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 509                 op0 = SUBREG_REG (op0);
 510               else
 511                 abort ();
 512             }
 513           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 514                                 op0, (offset * UNITS_PER_WORD));
 515         }
 516       offset = 0;
 517     }
 518   else
 519     op0 = protect_from_queue (op0, 1);
 520
 521   /* If VALUE is a floating-point mode, access it as an integer of the
 522      corresponding size.  This can occur on a machine with 64 bit registers
 523      that uses SFmode for float.  This can also occur for unaligned float
 524      structure fields.  */
 525   if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 526       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 527     value = gen_lowpart ((GET_MODE (value) == VOIDmode
 528                           ? word_mode : int_mode_for_mode (GET_MODE (value))),
 529                          value);
 530
 531   /* Now OFFSET is nonzero only if OP0 is memory
 532      and is therefore always measured in bytes.  */
 533
 534   if (HAVE_insv
 535       && GET_MODE (value) != BLKmode
 536       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 537       /* Ensure insv's size is wide enough for this field.  */
 538       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 539       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 540             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 541     {
 542       int xbitpos = bitpos;
 543       rtx value1;
 544       rtx xop0 = op0;
 545       rtx last = get_last_insn ();
 546       rtx pat;
 547       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 548       int save_volatile_ok = volatile_ok;
 549
 550       volatile_ok = 1;
 551
 552       /* If this machine's insv can only insert into a register, copy OP0
 553          into a register and save it back later.  */
 554       /* This used to check flag_force_mem, but that was a serious
 555          de-optimization now that flag_force_mem is enabled by -O2.  */
 556       if (GET_CODE (op0) == MEM
 557           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 558                 (op0, VOIDmode)))
 559         {
 560           rtx tempreg;
 561           enum machine_mode bestmode;
 562
 563           /* Get the mode to use for inserting into this field.  If OP0 is
 564              BLKmode, get the smallest mode consistent with the alignment. If
 565              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 566              mode. Otherwise, use the smallest mode containing the field.  */
 567
 568           if (GET_MODE (op0) == BLKmode
 569               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 570             bestmode
 571               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 572                                MEM_VOLATILE_P (op0));
 573           else
 574             bestmode = GET_MODE (op0);
 575
 576           if (bestmode == VOIDmode
 577               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 578                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 579             goto insv_loses;
 580
 581           /* Adjust address to point to the containing unit of that mode.
 582              Compute offset as multiple of this unit, counting in bytes.  */
 583           unit = GET_MODE_BITSIZE (bestmode);
 584           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 585           bitpos = bitnum % unit;
 586           op0 = adjust_address (op0, bestmode,  offset);
 587
 588           /* Fetch that unit, store the bitfield in it, then store
 589              the unit.  */
 590           tempreg = copy_to_reg (op0);
 591           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 592                            total_size);
 593           emit_move_insn (op0, tempreg);
 594           return value;
 595         }
 596       volatile_ok = save_volatile_ok;
 597
 598       /* Add OFFSET into OP0's address.  */
 599       if (GET_CODE (xop0) == MEM)
 600         xop0 = adjust_address (xop0, byte_mode, offset);
 601
 602       /* If xop0 is a register, we need it in MAXMODE
 603          to make it acceptable to the format of insv.  */
 604       if (GET_CODE (xop0) == SUBREG)
 605         /* We can't just change the mode, because this might clobber op0,
 606            and we will need the original value of op0 if insv fails.  */
 607         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 608       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 609         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 610
 611       /* On big-endian machines, we count bits from the most significant.
 612          If the bit field insn does not, we must invert.  */
 613
 614       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 615         xbitpos = unit - bitsize - xbitpos;
 616
 617       /* We have been counting XBITPOS within UNIT.
 618          Count instead within the size of the register.  */
 619       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 620         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 621
 622       unit = GET_MODE_BITSIZE (maxmode);
 623
 624       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 625       value1 = value;
 626       if (GET_MODE (value) != maxmode)
 627         {
 628           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 629             {
 630               /* Optimization: Don't bother really extending VALUE
 631                  if it has all the bits we will actually use.  However,
 632                  if we must narrow it, be sure we do it correctly.  */
 633
 634               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 635                 {
 636                   rtx tmp;
 637
 638                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 639                   if (! tmp)
 640                     tmp = simplify_gen_subreg (maxmode,
 641                                                force_reg (GET_MODE (value),
 642                                                           value1),
 643                                                GET_MODE (value), 0);
 644                   value1 = tmp;
 645                 }
 646               else
 647                 value1 = gen_lowpart (maxmode, value1);
 648             }
 649           else if (GET_CODE (value) == CONST_INT)
 650             value1 = gen_int_mode (INTVAL (value), maxmode);
 651           else if (!CONSTANT_P (value))
 652             /* Parse phase is supposed to make VALUE's data type
 653                match that of the component reference, which is a type
 654                at least as wide as the field; so VALUE should have
 655                a mode that corresponds to that type.  */
 656             abort ();
 657         }
 658
 659       /* If this machine's insv insists on a register,
 660          get VALUE1 into a register.  */
 661       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 662              (value1, maxmode)))
 663         value1 = force_reg (maxmode, value1);
 664
 665       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 666       if (pat)
 667         emit_insn (pat);
 668       else
 669         {
 670           delete_insns_since (last);
 671           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 672         }
 673     }
 674   else
 675     insv_loses:
 676     /* Insv is not available; store using shifts and boolean ops.  */
 677     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 678   return value;
 679 }
 680 \f
 681 /* Use shifts and boolean operations to store VALUE
 682    into a bit field of width BITSIZE
 683    in a memory location specified by OP0 except offset by OFFSET bytes.
 684      (OFFSET must be 0 if OP0 is a register.)
 685    The field starts at position BITPOS within the byte.
 686     (If OP0 is a register, it may be a full word or a narrower mode,
 687      but BITPOS still counts within a full word,
 688      which is significant on bigendian machines.)
 689
 690    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 691
 692 static void
 693 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 694                        unsigned HOST_WIDE_INT bitsize,
 695                        unsigned HOST_WIDE_INT bitpos, rtx value)
 696 {
 697   enum machine_mode mode;
 698   unsigned int total_bits = BITS_PER_WORD;
 699   rtx subtarget, temp;
 700   int all_zero = 0;
 701   int all_one = 0;
 702
 703   /* There is a case not handled here:
 704      a structure with a known alignment of just a halfword
 705      and a field split across two aligned halfwords within the structure.
 706      Or likewise a structure with a known alignment of just a byte
 707      and a field split across two bytes.
 708      Such cases are not supposed to be able to occur.  */
 709
 710   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 711     {
 712       if (offset != 0)
 713         abort ();
 714       /* Special treatment for a bit field split across two registers.  */
 715       if (bitsize + bitpos > BITS_PER_WORD)
 716         {
 717           store_split_bit_field (op0, bitsize, bitpos, value);
 718           return;
 719         }
 720     }
 721   else
 722     {
 723       /* Get the proper mode to use for this field.  We want a mode that
 724          includes the entire field.  If such a mode would be larger than
 725          a word, we won't be doing the extraction the normal way.
 726          We don't want a mode bigger than the destination.  */
 727
 728       mode = GET_MODE (op0);
 729       if (GET_MODE_BITSIZE (mode) == 0
 730           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 731         mode = word_mode;
 732       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 733                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 734
 735       if (mode == VOIDmode)
 736         {
 737           /* The only way this should occur is if the field spans word
 738              boundaries.  */
 739           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 740                                  value);
 741           return;
 742         }
 743
 744       total_bits = GET_MODE_BITSIZE (mode);
 745
 746       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 747          be in the range 0 to total_bits-1, and put any excess bytes in
 748          OFFSET.  */
 749       if (bitpos >= total_bits)
 750         {
 751           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 752           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 753                      * BITS_PER_UNIT);
 754         }
 755
 756       /* Get ref to an aligned byte, halfword, or word containing the field.
 757          Adjust BITPOS to be position within a word,
 758          and OFFSET to be the offset of that word.
 759          Then alter OP0 to refer to that word.  */
 760       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 761       offset -= (offset % (total_bits / BITS_PER_UNIT));
 762       op0 = adjust_address (op0, mode, offset);
 763     }
 764
 765   mode = GET_MODE (op0);
 766
 767   /* Now MODE is either some integral mode for a MEM as OP0,
 768      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 769      The bit field is contained entirely within OP0.
 770      BITPOS is the starting bit number within OP0.
 771      (OP0's mode may actually be narrower than MODE.)  */
 772
 773   if (BYTES_BIG_ENDIAN)
 774       /* BITPOS is the distance between our msb
 775          and that of the containing datum.
 776          Convert it to the distance from the lsb.  */
 777       bitpos = total_bits - bitsize - bitpos;
 778
 779   /* Now BITPOS is always the distance between our lsb
 780      and that of OP0.  */
 781
 782   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 783      we must first convert its mode to MODE.  */
 784
 785   if (GET_CODE (value) == CONST_INT)
 786     {
 787       HOST_WIDE_INT v = INTVAL (value);
 788
 789       if (bitsize < HOST_BITS_PER_WIDE_INT)
 790         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 791
 792       if (v == 0)
 793         all_zero = 1;
 794       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 795                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 796                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 797         all_one = 1;
 798
 799       value = lshift_value (mode, value, bitpos, bitsize);
 800     }
 801   else
 802     {
 803       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 804                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 805
 806       if (GET_MODE (value) != mode)
 807         {
 808           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 809               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 810             value = gen_lowpart (mode, value);
 811           else
 812             value = convert_to_mode (mode, value, 1);
 813         }
 814
 815       if (must_and)
 816         value = expand_binop (mode, and_optab, value,
 817                               mask_rtx (mode, 0, bitsize, 0),
 818                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 819       if (bitpos > 0)
 820         value = expand_shift (LSHIFT_EXPR, mode, value,
 821                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 822     }
 823
 824   /* Now clear the chosen bits in OP0,
 825      except that if VALUE is -1 we need not bother.  */
 826
 827   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 828
 829   if (! all_one)
 830     {
 831       temp = expand_binop (mode, and_optab, op0,
 832                            mask_rtx (mode, bitpos, bitsize, 1),
 833                            subtarget, 1, OPTAB_LIB_WIDEN);
 834       subtarget = temp;
 835     }
 836   else
 837     temp = op0;
 838
 839   /* Now logical-or VALUE into OP0, unless it is zero.  */
 840
 841   if (! all_zero)
 842     temp = expand_binop (mode, ior_optab, temp, value,
 843                          subtarget, 1, OPTAB_LIB_WIDEN);
 844   if (op0 != temp)
 845     emit_move_insn (op0, temp);
 846 }
 847 \f
 848 /* Store a bit field that is split across multiple accessible memory objects.
 849
 850    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 851    BITSIZE is the field width; BITPOS the position of its first bit
 852    (within the word).
 853    VALUE is the value to store.
 854
 855    This does not yet handle fields wider than BITS_PER_WORD.  */
 856
 857 static void
 858 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 859                        unsigned HOST_WIDE_INT bitpos, rtx value)
 860 {
 861   unsigned int unit;
 862   unsigned int bitsdone = 0;
 863
 864   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 865      much at a time.  */
 866   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 867     unit = BITS_PER_WORD;
 868   else
 869     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 870
 871   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 872      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 873      that VALUE might be a floating-point constant.  */
 874   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 875     {
 876       rtx word = gen_lowpart_common (word_mode, value);
 877
 878       if (word && (value != word))
 879         value = word;
 880       else
 881         value = gen_lowpart_common (word_mode,
 882                                     force_reg (GET_MODE (value) != VOIDmode
 883                                                ? GET_MODE (value)
 884                                                : word_mode, value));
 885     }
 886   else if (GET_CODE (value) == ADDRESSOF)
 887     value = copy_to_reg (value);
 888
 889   while (bitsdone < bitsize)
 890     {
 891       unsigned HOST_WIDE_INT thissize;
 892       rtx part, word;
 893       unsigned HOST_WIDE_INT thispos;
 894       unsigned HOST_WIDE_INT offset;
 895
 896       offset = (bitpos + bitsdone) / unit;
 897       thispos = (bitpos + bitsdone) % unit;
 898
 899       /* THISSIZE must not overrun a word boundary.  Otherwise,
 900          store_fixed_bit_field will call us again, and we will mutually
 901          recurse forever.  */
 902       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 903       thissize = MIN (thissize, unit - thispos);
 904
 905       if (BYTES_BIG_ENDIAN)
 906         {
 907           int total_bits;
 908
 909           /* We must do an endian conversion exactly the same way as it is
 910              done in extract_bit_field, so that the two calls to
 911              extract_fixed_bit_field will have comparable arguments.  */
 912           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 913             total_bits = BITS_PER_WORD;
 914           else
 915             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 916
 917           /* Fetch successively less significant portions.  */
 918           if (GET_CODE (value) == CONST_INT)
 919             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 920                              >> (bitsize - bitsdone - thissize))
 921                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 922           else
 923             /* The args are chosen so that the last part includes the
 924                lsb.  Give extract_bit_field the value it needs (with
 925                endianness compensation) to fetch the piece we want.  */
 926             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 927                                             total_bits - bitsize + bitsdone,
 928                                             NULL_RTX, 1);
 929         }
 930       else
 931         {
 932           /* Fetch successively more significant portions.  */
 933           if (GET_CODE (value) == CONST_INT)
 934             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 935                              >> bitsdone)
 936                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 937           else
 938             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 939                                             bitsdone, NULL_RTX, 1);
 940         }
 941
 942       /* If OP0 is a register, then handle OFFSET here.
 943
 944          When handling multiword bitfields, extract_bit_field may pass
 945          down a word_mode SUBREG of a larger REG for a bitfield that actually
 946          crosses a word boundary.  Thus, for a SUBREG, we must find
 947          the current word starting from the base register.  */
 948       if (GET_CODE (op0) == SUBREG)
 949         {
 950           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
 951           word = operand_subword_force (SUBREG_REG (op0), word_offset,
 952                                         GET_MODE (SUBREG_REG (op0)));
 953           offset = 0;
 954         }
 955       else if (GET_CODE (op0) == REG)
 956         {
 957           word = operand_subword_force (op0, offset, GET_MODE (op0));
 958           offset = 0;
 959         }
 960       else
 961         word = op0;
 962
 963       /* OFFSET is in UNITs, and UNIT is in bits.
 964          store_fixed_bit_field wants offset in bytes.  */
 965       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
 966                              thispos, part);
 967       bitsdone += thissize;
 968     }
 969 }
 970 \f
 971 /* Generate code to extract a byte-field from STR_RTX
 972    containing BITSIZE bits, starting at BITNUM,
 973    and put it in TARGET if possible (if TARGET is nonzero).
 974    Regardless of TARGET, we return the rtx for where the value is placed.
 975    It may be a QUEUED.
 976
 977    STR_RTX is the structure containing the byte (a REG or MEM).
 978    UNSIGNEDP is nonzero if this is an unsigned bit field.
 979    MODE is the natural mode of the field value once extracted.
 980    TMODE is the mode the caller would like the value to have;
 981    but the value may be returned with type MODE instead.
 982
 983    TOTAL_SIZE is the size in bytes of the containing structure,
 984    or -1 if varying.
 985
 986    If a TARGET is specified and we can store in it at no extra cost,
 987    we do so, and return TARGET.
 988    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
 989    if they are equally easy.  */
 990
 991 rtx
 992 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 993                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
 994                    enum machine_mode mode, enum machine_mode tmode,
 995                    HOST_WIDE_INT total_size)
 996 {
 997   unsigned int unit
 998     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 999   unsigned HOST_WIDE_INT offset = bitnum / unit;
1000   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1001   rtx op0 = str_rtx;
1002   rtx spec_target = target;
1003   rtx spec_target_subreg = 0;
1004   enum machine_mode int_mode;
1005   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1006   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1007   enum machine_mode mode1;
1008   int byte_offset;
1009
1010   /* Discount the part of the structure before the desired byte.
1011      We need to know how many bytes are safe to reference after it.  */
1012   if (total_size >= 0)
1013     total_size -= (bitpos / BIGGEST_ALIGNMENT
1014                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
1015
1016   if (tmode == VOIDmode)
1017     tmode = mode;
1018
1019   while (GET_CODE (op0) == SUBREG)
1020     {
1021       bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1022       if (bitpos > unit)
1023         {
1024           offset += (bitpos / unit);
1025           bitpos %= unit;
1026         }
1027       op0 = SUBREG_REG (op0);
1028     }
1029
1030   if (GET_CODE (op0) == REG
1031       && mode == GET_MODE (op0)
1032       && bitnum == 0
1033       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1034     {
1035       /* We're trying to extract a full register from itself.  */
1036       return op0;
1037     }
1038
1039   /* Make sure we are playing with integral modes.  Pun with subregs
1040      if we aren't.  */
1041   {
1042     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1043     if (imode != GET_MODE (op0))
1044       {
1045         if (GET_CODE (op0) == MEM)
1046           op0 = adjust_address (op0, imode, 0);
1047         else if (imode != BLKmode)
1048           op0 = gen_lowpart (imode, op0);
1049         else
1050           abort ();
1051       }
1052   }
1053
1054   /* We may be accessing data outside the field, which means
1055      we can alias adjacent data.  */
1056   if (GET_CODE (op0) == MEM)
1057     {
1058       op0 = shallow_copy_rtx (op0);
1059       set_mem_alias_set (op0, 0);
1060       set_mem_expr (op0, 0);
1061     }
1062
1063   /* Extraction of a full-word or multi-word value from a structure
1064      in a register or aligned memory can be done with just a SUBREG.
1065      A subword value in the least significant part of a register
1066      can also be extracted with a SUBREG.  For this, we need the
1067      byte offset of the value in op0.  */
1068
1069   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1070
1071   /* If OP0 is a register, BITPOS must count within a word.
1072      But as we have it, it counts within whatever size OP0 now has.
1073      On a bigendian machine, these are not the same, so convert.  */
1074   if (BYTES_BIG_ENDIAN
1075       && GET_CODE (op0) != MEM
1076       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1077     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1078
1079   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1080      If that's wrong, the solution is to test for it and set TARGET to 0
1081      if needed.  */
1082
1083   mode1  = (VECTOR_MODE_P (tmode)
1084             ? mode
1085             : mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0));
1086
1087   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1088         && bitpos % BITS_PER_WORD == 0)
1089        || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
1090            /* ??? The big endian test here is wrong.  This is correct
1091               if the value is in a register, and if mode_for_size is not
1092               the same mode as op0.  This causes us to get unnecessarily
1093               inefficient code from the Thumb port when -mbig-endian.  */
1094            && (BYTES_BIG_ENDIAN
1095                ? bitpos + bitsize == BITS_PER_WORD
1096                : bitpos == 0)))
1097       && ((GET_CODE (op0) != MEM
1098            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1099                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1100            && GET_MODE_SIZE (mode1) != 0
1101            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1102           || (GET_CODE (op0) == MEM
1103               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1104                   || (offset * BITS_PER_UNIT % bitsize == 0
1105                       && MEM_ALIGN (op0) % bitsize == 0)))))
1106     {
1107       if (mode1 != GET_MODE (op0))
1108         {
1109           if (GET_CODE (op0) == SUBREG)
1110             {
1111               if (GET_MODE (SUBREG_REG (op0)) == mode1
1112                   || GET_MODE_CLASS (mode1) == MODE_INT
1113                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1114                 op0 = SUBREG_REG (op0);
1115               else
1116                 /* Else we've got some float mode source being extracted into
1117                    a different float mode destination -- this combination of
1118                    subregs results in Severe Tire Damage.  */
1119                 goto no_subreg_mode_swap;
1120             }
1121           if (GET_CODE (op0) == REG)
1122             op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1123           else
1124             op0 = adjust_address (op0, mode1, offset);
1125         }
1126       if (mode1 != mode)
1127         return convert_to_mode (tmode, op0, unsignedp);
1128       return op0;
1129     }
1130  no_subreg_mode_swap:
1131
1132   /* Handle fields bigger than a word.  */
1133
1134   if (bitsize > BITS_PER_WORD)
1135     {
1136       /* Here we transfer the words of the field
1137          in the order least significant first.
1138          This is because the most significant word is the one which may
1139          be less than full.  */
1140
1141       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1142       unsigned int i;
1143
1144       if (target == 0 || GET_CODE (target) != REG)
1145         target = gen_reg_rtx (mode);
1146
1147       /* Indicate for flow that the entire target reg is being set.  */
1148       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1149
1150       for (i = 0; i < nwords; i++)
1151         {
1152           /* If I is 0, use the low-order word in both field and target;
1153              if I is 1, use the next to lowest word; and so on.  */
1154           /* Word number in TARGET to use.  */
1155           unsigned int wordnum
1156             = (WORDS_BIG_ENDIAN
1157                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1158                : i);
1159           /* Offset from start of field in OP0.  */
1160           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1161                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1162                                                 * (int) BITS_PER_WORD))
1163                                      : (int) i * BITS_PER_WORD);
1164           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1165           rtx result_part
1166             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1167                                            bitsize - i * BITS_PER_WORD),
1168                                  bitnum + bit_offset, 1, target_part, mode,
1169                                  word_mode, total_size);
1170
1171           if (target_part == 0)
1172             abort ();
1173
1174           if (result_part != target_part)
1175             emit_move_insn (target_part, result_part);
1176         }
1177
1178       if (unsignedp)
1179         {
1180           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1181              need to be zero'd out.  */
1182           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1183             {
1184               unsigned int i, total_words;
1185
1186               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1187               for (i = nwords; i < total_words; i++)
1188                 emit_move_insn
1189                   (operand_subword (target,
1190                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1191                                     1, VOIDmode),
1192                    const0_rtx);
1193             }
1194           return target;
1195         }
1196
1197       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1198       target = expand_shift (LSHIFT_EXPR, mode, target,
1199                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1200                              NULL_RTX, 0);
1201       return expand_shift (RSHIFT_EXPR, mode, target,
1202                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1203                            NULL_RTX, 0);
1204     }
1205
1206   /* From here on we know the desired field is smaller than a word.  */
1207
1208   /* Check if there is a correspondingly-sized integer field, so we can
1209      safely extract it as one size of integer, if necessary; then
1210      truncate or extend to the size that is wanted; then use SUBREGs or
1211      convert_to_mode to get one of the modes we really wanted.  */
1212
1213   int_mode = int_mode_for_mode (tmode);
1214   if (int_mode == BLKmode)
1215     int_mode = int_mode_for_mode (mode);
1216   if (int_mode == BLKmode)
1217     abort ();    /* Should probably push op0 out to memory and then
1218                     do a load.  */
1219
1220   /* OFFSET is the number of words or bytes (UNIT says which)
1221      from STR_RTX to the first word or byte containing part of the field.  */
1222
1223   if (GET_CODE (op0) != MEM)
1224     {
1225       if (offset != 0
1226           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1227         {
1228           if (GET_CODE (op0) != REG)
1229             op0 = copy_to_reg (op0);
1230           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1231                                 op0, (offset * UNITS_PER_WORD));
1232         }
1233       offset = 0;
1234     }
1235   else
1236     op0 = protect_from_queue (str_rtx, 1);
1237
1238   /* Now OFFSET is nonzero only for memory operands.  */
1239
1240   if (unsignedp)
1241     {
1242       if (HAVE_extzv
1243           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1244           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1245                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1246         {
1247           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1248           rtx bitsize_rtx, bitpos_rtx;
1249           rtx last = get_last_insn ();
1250           rtx xop0 = op0;
1251           rtx xtarget = target;
1252           rtx xspec_target = spec_target;
1253           rtx xspec_target_subreg = spec_target_subreg;
1254           rtx pat;
1255           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1256
1257           if (GET_CODE (xop0) == MEM)
1258             {
1259               int save_volatile_ok = volatile_ok;
1260               volatile_ok = 1;
1261
1262               /* Is the memory operand acceptable?  */
1263               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1264                      (xop0, GET_MODE (xop0))))
1265                 {
1266                   /* No, load into a reg and extract from there.  */
1267                   enum machine_mode bestmode;
1268
1269                   /* Get the mode to use for inserting into this field.  If
1270                      OP0 is BLKmode, get the smallest mode consistent with the
1271                      alignment. If OP0 is a non-BLKmode object that is no
1272                      wider than MAXMODE, use its mode. Otherwise, use the
1273                      smallest mode containing the field.  */
1274
1275                   if (GET_MODE (xop0) == BLKmode
1276                       || (GET_MODE_SIZE (GET_MODE (op0))
1277                           > GET_MODE_SIZE (maxmode)))
1278                     bestmode = get_best_mode (bitsize, bitnum,
1279                                               MEM_ALIGN (xop0), maxmode,
1280                                               MEM_VOLATILE_P (xop0));
1281                   else
1282                     bestmode = GET_MODE (xop0);
1283
1284                   if (bestmode == VOIDmode
1285                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1286                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1287                     goto extzv_loses;
1288
1289                   /* Compute offset as multiple of this unit,
1290                      counting in bytes.  */
1291                   unit = GET_MODE_BITSIZE (bestmode);
1292                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1293                   xbitpos = bitnum % unit;
1294                   xop0 = adjust_address (xop0, bestmode, xoffset);
1295
1296                   /* Fetch it to a register in that size.  */
1297                   xop0 = force_reg (bestmode, xop0);
1298
1299                   /* XBITPOS counts within UNIT, which is what is expected.  */
1300                 }
1301               else
1302                 /* Get ref to first byte containing part of the field.  */
1303                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1304
1305               volatile_ok = save_volatile_ok;
1306             }
1307
1308           /* If op0 is a register, we need it in MAXMODE (which is usually
1309              SImode). to make it acceptable to the format of extzv.  */
1310           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1311             goto extzv_loses;
1312           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1313             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1314
1315           /* On big-endian machines, we count bits from the most significant.
1316              If the bit field insn does not, we must invert.  */
1317           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1318             xbitpos = unit - bitsize - xbitpos;
1319
1320           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1321           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1322             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1323
1324           unit = GET_MODE_BITSIZE (maxmode);
1325
1326           if (xtarget == 0
1327               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1328             xtarget = xspec_target = gen_reg_rtx (tmode);
1329
1330           if (GET_MODE (xtarget) != maxmode)
1331             {
1332               if (GET_CODE (xtarget) == REG)
1333                 {
1334                   int wider = (GET_MODE_SIZE (maxmode)
1335                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1336                   xtarget = gen_lowpart (maxmode, xtarget);
1337                   if (wider)
1338                     xspec_target_subreg = xtarget;
1339                 }
1340               else
1341                 xtarget = gen_reg_rtx (maxmode);
1342             }
1343
1344           /* If this machine's extzv insists on a register target,
1345              make sure we have one.  */
1346           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1347                  (xtarget, maxmode)))
1348             xtarget = gen_reg_rtx (maxmode);
1349
1350           bitsize_rtx = GEN_INT (bitsize);
1351           bitpos_rtx = GEN_INT (xbitpos);
1352
1353           pat = gen_extzv (protect_from_queue (xtarget, 1),
1354                            xop0, bitsize_rtx, bitpos_rtx);
1355           if (pat)
1356             {
1357               emit_insn (pat);
1358               target = xtarget;
1359               spec_target = xspec_target;
1360               spec_target_subreg = xspec_target_subreg;
1361             }
1362           else
1363             {
1364               delete_insns_since (last);
1365               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1366                                                 bitpos, target, 1);
1367             }
1368         }
1369       else
1370       extzv_loses:
1371         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1372                                           bitpos, target, 1);
1373     }
1374   else
1375     {
1376       if (HAVE_extv
1377           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1378           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1379                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1380         {
1381           int xbitpos = bitpos, xoffset = offset;
1382           rtx bitsize_rtx, bitpos_rtx;
1383           rtx last = get_last_insn ();
1384           rtx xop0 = op0, xtarget = target;
1385           rtx xspec_target = spec_target;
1386           rtx xspec_target_subreg = spec_target_subreg;
1387           rtx pat;
1388           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1389
1390           if (GET_CODE (xop0) == MEM)
1391             {
1392               /* Is the memory operand acceptable?  */
1393               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1394                      (xop0, GET_MODE (xop0))))
1395                 {
1396                   /* No, load into a reg and extract from there.  */
1397                   enum machine_mode bestmode;
1398
1399                   /* Get the mode to use for inserting into this field.  If
1400                      OP0 is BLKmode, get the smallest mode consistent with the
1401                      alignment. If OP0 is a non-BLKmode object that is no
1402                      wider than MAXMODE, use its mode. Otherwise, use the
1403                      smallest mode containing the field.  */
1404
1405                   if (GET_MODE (xop0) == BLKmode
1406                       || (GET_MODE_SIZE (GET_MODE (op0))
1407                           > GET_MODE_SIZE (maxmode)))
1408                     bestmode = get_best_mode (bitsize, bitnum,
1409                                               MEM_ALIGN (xop0), maxmode,
1410                                               MEM_VOLATILE_P (xop0));
1411                   else
1412                     bestmode = GET_MODE (xop0);
1413
1414                   if (bestmode == VOIDmode
1415                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1416                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1417                     goto extv_loses;
1418
1419                   /* Compute offset as multiple of this unit,
1420                      counting in bytes.  */
1421                   unit = GET_MODE_BITSIZE (bestmode);
1422                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1423                   xbitpos = bitnum % unit;
1424                   xop0 = adjust_address (xop0, bestmode, xoffset);
1425
1426                   /* Fetch it to a register in that size.  */
1427                   xop0 = force_reg (bestmode, xop0);
1428
1429                   /* XBITPOS counts within UNIT, which is what is expected.  */
1430                 }
1431               else
1432                 /* Get ref to first byte containing part of the field.  */
1433                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1434             }
1435
1436           /* If op0 is a register, we need it in MAXMODE (which is usually
1437              SImode) to make it acceptable to the format of extv.  */
1438           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1439             goto extv_loses;
1440           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1441             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1442
1443           /* On big-endian machines, we count bits from the most significant.
1444              If the bit field insn does not, we must invert.  */
1445           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1446             xbitpos = unit - bitsize - xbitpos;
1447
1448           /* XBITPOS counts within a size of UNIT.
1449              Adjust to count within a size of MAXMODE.  */
1450           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1451             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1452
1453           unit = GET_MODE_BITSIZE (maxmode);
1454
1455           if (xtarget == 0
1456               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1457             xtarget = xspec_target = gen_reg_rtx (tmode);
1458
1459           if (GET_MODE (xtarget) != maxmode)
1460             {
1461               if (GET_CODE (xtarget) == REG)
1462                 {
1463                   int wider = (GET_MODE_SIZE (maxmode)
1464                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1465                   xtarget = gen_lowpart (maxmode, xtarget);
1466                   if (wider)
1467                     xspec_target_subreg = xtarget;
1468                 }
1469               else
1470                 xtarget = gen_reg_rtx (maxmode);
1471             }
1472
1473           /* If this machine's extv insists on a register target,
1474              make sure we have one.  */
1475           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1476                  (xtarget, maxmode)))
1477             xtarget = gen_reg_rtx (maxmode);
1478
1479           bitsize_rtx = GEN_INT (bitsize);
1480           bitpos_rtx = GEN_INT (xbitpos);
1481
1482           pat = gen_extv (protect_from_queue (xtarget, 1),
1483                           xop0, bitsize_rtx, bitpos_rtx);
1484           if (pat)
1485             {
1486               emit_insn (pat);
1487               target = xtarget;
1488               spec_target = xspec_target;
1489               spec_target_subreg = xspec_target_subreg;
1490             }
1491           else
1492             {
1493               delete_insns_since (last);
1494               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1495                                                 bitpos, target, 0);
1496             }
1497         }
1498       else
1499       extv_loses:
1500         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1501                                           bitpos, target, 0);
1502     }
1503   if (target == spec_target)
1504     return target;
1505   if (target == spec_target_subreg)
1506     return spec_target;
1507   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1508     {
1509       /* If the target mode is floating-point, first convert to the
1510          integer mode of that size and then access it as a floating-point
1511          value via a SUBREG.  */
1512       if (GET_MODE_CLASS (tmode) != MODE_INT
1513           && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1514         {
1515           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1516                                                    MODE_INT, 0),
1517                                     target, unsignedp);
1518           return gen_lowpart (tmode, target);
1519         }
1520       else
1521         return convert_to_mode (tmode, target, unsignedp);
1522     }
1523   return target;
1524 }
1525 \f
1526 /* Extract a bit field using shifts and boolean operations
1527    Returns an rtx to represent the value.
1528    OP0 addresses a register (word) or memory (byte).
1529    BITPOS says which bit within the word or byte the bit field starts in.
1530    OFFSET says how many bytes farther the bit field starts;
1531     it is 0 if OP0 is a register.
1532    BITSIZE says how many bits long the bit field is.
1533     (If OP0 is a register, it may be narrower than a full word,
1534      but BITPOS still counts within a full word,
1535      which is significant on bigendian machines.)
1536
1537    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1538    If TARGET is nonzero, attempts to store the value there
1539    and return TARGET, but this is not guaranteed.
1540    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1541
1542 static rtx
1543 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1544                          unsigned HOST_WIDE_INT offset,
1545                          unsigned HOST_WIDE_INT bitsize,
1546                          unsigned HOST_WIDE_INT bitpos, rtx target,
1547                          int unsignedp)
1548 {
1549   unsigned int total_bits = BITS_PER_WORD;
1550   enum machine_mode mode;
1551
1552   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1553     {
1554       /* Special treatment for a bit field split across two registers.  */
1555       if (bitsize + bitpos > BITS_PER_WORD)
1556         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1557     }
1558   else
1559     {
1560       /* Get the proper mode to use for this field.  We want a mode that
1561          includes the entire field.  If such a mode would be larger than
1562          a word, we won't be doing the extraction the normal way.  */
1563
1564       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1565                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1566
1567       if (mode == VOIDmode)
1568         /* The only way this should occur is if the field spans word
1569            boundaries.  */
1570         return extract_split_bit_field (op0, bitsize,
1571                                         bitpos + offset * BITS_PER_UNIT,
1572                                         unsignedp);
1573
1574       total_bits = GET_MODE_BITSIZE (mode);
1575
1576       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1577          be in the range 0 to total_bits-1, and put any excess bytes in
1578          OFFSET.  */
1579       if (bitpos >= total_bits)
1580         {
1581           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1582           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1583                      * BITS_PER_UNIT);
1584         }
1585
1586       /* Get ref to an aligned byte, halfword, or word containing the field.
1587          Adjust BITPOS to be position within a word,
1588          and OFFSET to be the offset of that word.
1589          Then alter OP0 to refer to that word.  */
1590       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1591       offset -= (offset % (total_bits / BITS_PER_UNIT));
1592       op0 = adjust_address (op0, mode, offset);
1593     }
1594
1595   mode = GET_MODE (op0);
1596
1597   if (BYTES_BIG_ENDIAN)
1598     /* BITPOS is the distance between our msb and that of OP0.
1599        Convert it to the distance from the lsb.  */
1600     bitpos = total_bits - bitsize - bitpos;
1601
1602   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1603      We have reduced the big-endian case to the little-endian case.  */
1604
1605   if (unsignedp)
1606     {
1607       if (bitpos)
1608         {
1609           /* If the field does not already start at the lsb,
1610              shift it so it does.  */
1611           tree amount = build_int_2 (bitpos, 0);
1612           /* Maybe propagate the target for the shift.  */
1613           /* But not if we will return it--could confuse integrate.c.  */
1614           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1615                            && !REG_FUNCTION_VALUE_P (target)
1616                            ? target : 0);
1617           if (tmode != mode) subtarget = 0;
1618           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1619         }
1620       /* Convert the value to the desired mode.  */
1621       if (mode != tmode)
1622         op0 = convert_to_mode (tmode, op0, 1);
1623
1624       /* Unless the msb of the field used to be the msb when we shifted,
1625          mask out the upper bits.  */
1626
1627       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1628         return expand_binop (GET_MODE (op0), and_optab, op0,
1629                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1630                              target, 1, OPTAB_LIB_WIDEN);
1631       return op0;
1632     }
1633
1634   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1635      then arithmetic-shift its lsb to the lsb of the word.  */
1636   op0 = force_reg (mode, op0);
1637   if (mode != tmode)
1638     target = 0;
1639
1640   /* Find the narrowest integer mode that contains the field.  */
1641
1642   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1643        mode = GET_MODE_WIDER_MODE (mode))
1644     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1645       {
1646         op0 = convert_to_mode (mode, op0, 0);
1647         break;
1648       }
1649
1650   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1651     {
1652       tree amount
1653         = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1654       /* Maybe propagate the target for the shift.  */
1655       /* But not if we will return the result--could confuse integrate.c.  */
1656       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1657                        && ! REG_FUNCTION_VALUE_P (target)
1658                        ? target : 0);
1659       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1660     }
1661
1662   return expand_shift (RSHIFT_EXPR, mode, op0,
1663                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1664                        target, 0);
1665 }
1666 \f
1667 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1668    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1669    complement of that if COMPLEMENT.  The mask is truncated if
1670    necessary to the width of mode MODE.  The mask is zero-extended if
1671    BITSIZE+BITPOS is too small for MODE.  */
1672
1673 static rtx
1674 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1675 {
1676   HOST_WIDE_INT masklow, maskhigh;
1677
1678   if (bitsize == 0)
1679     masklow = 0;
1680   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1681     masklow = (HOST_WIDE_INT) -1 << bitpos;
1682   else
1683     masklow = 0;
1684
1685   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1686     masklow &= ((unsigned HOST_WIDE_INT) -1
1687                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1688
1689   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1690     maskhigh = -1;
1691   else
1692     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1693
1694   if (bitsize == 0)
1695     maskhigh = 0;
1696   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1697     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1698                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1699   else
1700     maskhigh = 0;
1701
1702   if (complement)
1703     {
1704       maskhigh = ~maskhigh;
1705       masklow = ~masklow;
1706     }
1707
1708   return immed_double_const (masklow, maskhigh, mode);
1709 }
1710
1711 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1712    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1713
1714 static rtx
1715 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1716 {
1717   unsigned HOST_WIDE_INT v = INTVAL (value);
1718   HOST_WIDE_INT low, high;
1719
1720   if (bitsize < HOST_BITS_PER_WIDE_INT)
1721     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1722
1723   if (bitpos < HOST_BITS_PER_WIDE_INT)
1724     {
1725       low = v << bitpos;
1726       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1727     }
1728   else
1729     {
1730       low = 0;
1731       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1732     }
1733
1734   return immed_double_const (low, high, mode);
1735 }
1736 \f
1737 /* Extract a bit field that is split across two words
1738    and return an RTX for the result.
1739
1740    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1741    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1742    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1743
1744 static rtx
1745 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1746                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1747 {
1748   unsigned int unit;
1749   unsigned int bitsdone = 0;
1750   rtx result = NULL_RTX;
1751   int first = 1;
1752
1753   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1754      much at a time.  */
1755   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1756     unit = BITS_PER_WORD;
1757   else
1758     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1759
1760   while (bitsdone < bitsize)
1761     {
1762       unsigned HOST_WIDE_INT thissize;
1763       rtx part, word;
1764       unsigned HOST_WIDE_INT thispos;
1765       unsigned HOST_WIDE_INT offset;
1766
1767       offset = (bitpos + bitsdone) / unit;
1768       thispos = (bitpos + bitsdone) % unit;
1769
1770       /* THISSIZE must not overrun a word boundary.  Otherwise,
1771          extract_fixed_bit_field will call us again, and we will mutually
1772          recurse forever.  */
1773       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1774       thissize = MIN (thissize, unit - thispos);
1775
1776       /* If OP0 is a register, then handle OFFSET here.
1777
1778          When handling multiword bitfields, extract_bit_field may pass
1779          down a word_mode SUBREG of a larger REG for a bitfield that actually
1780          crosses a word boundary.  Thus, for a SUBREG, we must find
1781          the current word starting from the base register.  */
1782       if (GET_CODE (op0) == SUBREG)
1783         {
1784           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1785           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1786                                         GET_MODE (SUBREG_REG (op0)));
1787           offset = 0;
1788         }
1789       else if (GET_CODE (op0) == REG)
1790         {
1791           word = operand_subword_force (op0, offset, GET_MODE (op0));
1792           offset = 0;
1793         }
1794       else
1795         word = op0;
1796
1797       /* Extract the parts in bit-counting order,
1798          whose meaning is determined by BYTES_PER_UNIT.
1799          OFFSET is in UNITs, and UNIT is in bits.
1800          extract_fixed_bit_field wants offset in bytes.  */
1801       part = extract_fixed_bit_field (word_mode, word,
1802                                       offset * unit / BITS_PER_UNIT,
1803                                       thissize, thispos, 0, 1);
1804       bitsdone += thissize;
1805
1806       /* Shift this part into place for the result.  */
1807       if (BYTES_BIG_ENDIAN)
1808         {
1809           if (bitsize != bitsdone)
1810             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1811                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1812         }
1813       else
1814         {
1815           if (bitsdone != thissize)
1816             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1817                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1818         }
1819
1820       if (first)
1821         result = part;
1822       else
1823         /* Combine the parts with bitwise or.  This works
1824            because we extracted each part as an unsigned bit field.  */
1825         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1826                                OPTAB_LIB_WIDEN);
1827
1828       first = 0;
1829     }
1830
1831   /* Unsigned bit field: we are done.  */
1832   if (unsignedp)
1833     return result;
1834   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1835   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1836                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1837                          NULL_RTX, 0);
1838   return expand_shift (RSHIFT_EXPR, word_mode, result,
1839                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1840 }
1841 \f
1842 /* Add INC into TARGET.  */
1843
1844 void
1845 expand_inc (rtx target, rtx inc)
1846 {
1847   rtx value = expand_binop (GET_MODE (target), add_optab,
1848                             target, inc,
1849                             target, 0, OPTAB_LIB_WIDEN);
1850   if (value != target)
1851     emit_move_insn (target, value);
1852 }
1853
1854 /* Subtract DEC from TARGET.  */
1855
1856 void
1857 expand_dec (rtx target, rtx dec)
1858 {
1859   rtx value = expand_binop (GET_MODE (target), sub_optab,
1860                             target, dec,
1861                             target, 0, OPTAB_LIB_WIDEN);
1862   if (value != target)
1863     emit_move_insn (target, value);
1864 }
1865 \f
1866 /* Output a shift instruction for expression code CODE,
1867    with SHIFTED being the rtx for the value to shift,
1868    and AMOUNT the tree for the amount to shift by.
1869    Store the result in the rtx TARGET, if that is convenient.
1870    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1871    Return the rtx for where the value is.  */
1872
1873 rtx
1874 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
1875               tree amount, rtx target, int unsignedp)
1876 {
1877   rtx op1, temp = 0;
1878   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1879   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1880   int try;
1881
1882   /* Previously detected shift-counts computed by NEGATE_EXPR
1883      and shifted in the other direction; but that does not work
1884      on all machines.  */
1885
1886   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1887
1888 #ifdef SHIFT_COUNT_TRUNCATED
1889   if (SHIFT_COUNT_TRUNCATED)
1890     {
1891       if (GET_CODE (op1) == CONST_INT
1892           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
1893               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
1894         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1895                        % GET_MODE_BITSIZE (mode));
1896       else if (GET_CODE (op1) == SUBREG
1897                && subreg_lowpart_p (op1))
1898         op1 = SUBREG_REG (op1);
1899     }
1900 #endif
1901
1902   if (op1 == const0_rtx)
1903     return shifted;
1904
1905   for (try = 0; temp == 0 && try < 3; try++)
1906     {
1907       enum optab_methods methods;
1908
1909       if (try == 0)
1910         methods = OPTAB_DIRECT;
1911       else if (try == 1)
1912         methods = OPTAB_WIDEN;
1913       else
1914         methods = OPTAB_LIB_WIDEN;
1915
1916       if (rotate)
1917         {
1918           /* Widening does not work for rotation.  */
1919           if (methods == OPTAB_WIDEN)
1920             continue;
1921           else if (methods == OPTAB_LIB_WIDEN)
1922             {
1923               /* If we have been unable to open-code this by a rotation,
1924                  do it as the IOR of two shifts.  I.e., to rotate A
1925                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1926                  where C is the bitsize of A.
1927
1928                  It is theoretically possible that the target machine might
1929                  not be able to perform either shift and hence we would
1930                  be making two libcalls rather than just the one for the
1931                  shift (similarly if IOR could not be done).  We will allow
1932                  this extremely unlikely lossage to avoid complicating the
1933                  code below.  */
1934
1935               rtx subtarget = target == shifted ? 0 : target;
1936               rtx temp1;
1937               tree type = TREE_TYPE (amount);
1938               tree new_amount = make_tree (type, op1);
1939               tree other_amount
1940                 = fold (build (MINUS_EXPR, type,
1941                                convert (type,
1942                                         build_int_2 (GET_MODE_BITSIZE (mode),
1943                                                      0)),
1944                                amount));
1945
1946               shifted = force_reg (mode, shifted);
1947
1948               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1949                                    mode, shifted, new_amount, subtarget, 1);
1950               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1951                                     mode, shifted, other_amount, 0, 1);
1952               return expand_binop (mode, ior_optab, temp, temp1, target,
1953                                    unsignedp, methods);
1954             }
1955
1956           temp = expand_binop (mode,
1957                                left ? rotl_optab : rotr_optab,
1958                                shifted, op1, target, unsignedp, methods);
1959
1960           /* If we don't have the rotate, but we are rotating by a constant
1961              that is in range, try a rotate in the opposite direction.  */
1962
1963           if (temp == 0 && GET_CODE (op1) == CONST_INT
1964               && INTVAL (op1) > 0
1965               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
1966             temp = expand_binop (mode,
1967                                  left ? rotr_optab : rotl_optab,
1968                                  shifted,
1969                                  GEN_INT (GET_MODE_BITSIZE (mode)
1970                                           - INTVAL (op1)),
1971                                  target, unsignedp, methods);
1972         }
1973       else if (unsignedp)
1974         temp = expand_binop (mode,
1975                              left ? ashl_optab : lshr_optab,
1976                              shifted, op1, target, unsignedp, methods);
1977
1978       /* Do arithmetic shifts.
1979          Also, if we are going to widen the operand, we can just as well
1980          use an arithmetic right-shift instead of a logical one.  */
1981       if (temp == 0 && ! rotate
1982           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
1983         {
1984           enum optab_methods methods1 = methods;
1985
1986           /* If trying to widen a log shift to an arithmetic shift,
1987              don't accept an arithmetic shift of the same size.  */
1988           if (unsignedp)
1989             methods1 = OPTAB_MUST_WIDEN;
1990
1991           /* Arithmetic shift */
1992
1993           temp = expand_binop (mode,
1994                                left ? ashl_optab : ashr_optab,
1995                                shifted, op1, target, unsignedp, methods1);
1996         }
1997
1998       /* We used to try extzv here for logical right shifts, but that was
1999          only useful for one machine, the VAX, and caused poor code
2000          generation there for lshrdi3, so the code was deleted and a
2001          define_expand for lshrsi3 was added to vax.md.  */
2002     }
2003
2004   if (temp == 0)
2005     abort ();
2006   return temp;
2007 }
2008 \f
2009 enum alg_code { alg_zero, alg_m, alg_shift,
2010                   alg_add_t_m2, alg_sub_t_m2,
2011                   alg_add_factor, alg_sub_factor,
2012                   alg_add_t2_m, alg_sub_t2_m,
2013                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2014
2015 /* This structure records a sequence of operations.
2016    `ops' is the number of operations recorded.
2017    `cost' is their total cost.
2018    The operations are stored in `op' and the corresponding
2019    logarithms of the integer coefficients in `log'.
2020
2021    These are the operations:
2022    alg_zero             total := 0;
2023    alg_m                total := multiplicand;
2024    alg_shift            total := total * coeff
2025    alg_add_t_m2         total := total + multiplicand * coeff;
2026    alg_sub_t_m2         total := total - multiplicand * coeff;
2027    alg_add_factor       total := total * coeff + total;
2028    alg_sub_factor       total := total * coeff - total;
2029    alg_add_t2_m         total := total * coeff + multiplicand;
2030    alg_sub_t2_m         total := total * coeff - multiplicand;
2031
2032    The first operand must be either alg_zero or alg_m.  */
2033
2034 struct algorithm
2035 {
2036   short cost;
2037   short ops;
2038   /* The size of the OP and LOG fields are not directly related to the
2039      word size, but the worst-case algorithms will be if we have few
2040      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2041      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2042      in total wordsize operations.  */
2043   enum alg_code op[MAX_BITS_PER_WORD];
2044   char log[MAX_BITS_PER_WORD];
2045 };
2046
2047 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, int);
2048 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2049                                                  int, unsigned HOST_WIDE_INT *,
2050                                                  int *, int *);
2051 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2052 /* Compute and return the best algorithm for multiplying by T.
2053    The algorithm must cost less than cost_limit
2054    If retval.cost >= COST_LIMIT, no algorithm was found and all
2055    other field of the returned struct are undefined.  */
2056
2057 static void
2058 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2059             int cost_limit)
2060 {
2061   int m;
2062   struct algorithm *alg_in, *best_alg;
2063   int cost;
2064   unsigned HOST_WIDE_INT q;
2065
2066   /* Indicate that no algorithm is yet found.  If no algorithm
2067      is found, this value will be returned and indicate failure.  */
2068   alg_out->cost = cost_limit;
2069
2070   if (cost_limit <= 0)
2071     return;
2072
2073   /* t == 1 can be done in zero cost.  */
2074   if (t == 1)
2075     {
2076       alg_out->ops = 1;
2077       alg_out->cost = 0;
2078       alg_out->op[0] = alg_m;
2079       return;
2080     }
2081
2082   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2083      fail now.  */
2084   if (t == 0)
2085     {
2086       if (zero_cost >= cost_limit)
2087         return;
2088       else
2089         {
2090           alg_out->ops = 1;
2091           alg_out->cost = zero_cost;
2092           alg_out->op[0] = alg_zero;
2093           return;
2094         }
2095     }
2096
2097   /* We'll be needing a couple extra algorithm structures now.  */
2098
2099   alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
2100   best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
2101
2102   /* If we have a group of zero bits at the low-order part of T, try
2103      multiplying by the remaining bits and then doing a shift.  */
2104
2105   if ((t & 1) == 0)
2106     {
2107       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2108       if (m < BITS_PER_WORD)
2109         {
2110           q = t >> m;
2111           cost = shift_cost[m];
2112           synth_mult (alg_in, q, cost_limit - cost);
2113
2114           cost += alg_in->cost;
2115           if (cost < cost_limit)
2116             {
2117               struct algorithm *x;
2118               x = alg_in, alg_in = best_alg, best_alg = x;
2119               best_alg->log[best_alg->ops] = m;
2120               best_alg->op[best_alg->ops] = alg_shift;
2121               cost_limit = cost;
2122             }
2123         }
2124     }
2125
2126   /* If we have an odd number, add or subtract one.  */
2127   if ((t & 1) != 0)
2128     {
2129       unsigned HOST_WIDE_INT w;
2130
2131       for (w = 1; (w & t) != 0; w <<= 1)
2132         ;
2133       /* If T was -1, then W will be zero after the loop.  This is another
2134          case where T ends with ...111.  Handling this with (T + 1) and
2135          subtract 1 produces slightly better code and results in algorithm
2136          selection much faster than treating it like the ...0111 case
2137          below.  */
2138       if (w == 0
2139           || (w > 2
2140               /* Reject the case where t is 3.
2141                  Thus we prefer addition in that case.  */
2142               && t != 3))
2143         {
2144           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2145
2146           cost = add_cost;
2147           synth_mult (alg_in, t + 1, cost_limit - cost);
2148
2149           cost += alg_in->cost;
2150           if (cost < cost_limit)
2151             {
2152               struct algorithm *x;
2153               x = alg_in, alg_in = best_alg, best_alg = x;
2154               best_alg->log[best_alg->ops] = 0;
2155               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2156               cost_limit = cost;
2157             }
2158         }
2159       else
2160         {
2161           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2162
2163           cost = add_cost;
2164           synth_mult (alg_in, t - 1, cost_limit - cost);
2165
2166           cost += alg_in->cost;
2167           if (cost < cost_limit)
2168             {
2169               struct algorithm *x;
2170               x = alg_in, alg_in = best_alg, best_alg = x;
2171               best_alg->log[best_alg->ops] = 0;
2172               best_alg->op[best_alg->ops] = alg_add_t_m2;
2173               cost_limit = cost;
2174             }
2175         }
2176     }
2177
2178   /* Look for factors of t of the form
2179      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2180      If we find such a factor, we can multiply by t using an algorithm that
2181      multiplies by q, shift the result by m and add/subtract it to itself.
2182
2183      We search for large factors first and loop down, even if large factors
2184      are less probable than small; if we find a large factor we will find a
2185      good sequence quickly, and therefore be able to prune (by decreasing
2186      COST_LIMIT) the search.  */
2187
2188   for (m = floor_log2 (t - 1); m >= 2; m--)
2189     {
2190       unsigned HOST_WIDE_INT d;
2191
2192       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2193       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2194         {
2195           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2196           synth_mult (alg_in, t / d, cost_limit - cost);
2197
2198           cost += alg_in->cost;
2199           if (cost < cost_limit)
2200             {
2201               struct algorithm *x;
2202               x = alg_in, alg_in = best_alg, best_alg = x;
2203               best_alg->log[best_alg->ops] = m;
2204               best_alg->op[best_alg->ops] = alg_add_factor;
2205               cost_limit = cost;
2206             }
2207           /* Other factors will have been taken care of in the recursion.  */
2208           break;
2209         }
2210
2211       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2212       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2213         {
2214           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2215           synth_mult (alg_in, t / d, cost_limit - cost);
2216
2217           cost += alg_in->cost;
2218           if (cost < cost_limit)
2219             {
2220               struct algorithm *x;
2221               x = alg_in, alg_in = best_alg, best_alg = x;
2222               best_alg->log[best_alg->ops] = m;
2223               best_alg->op[best_alg->ops] = alg_sub_factor;
2224               cost_limit = cost;
2225             }
2226           break;
2227         }
2228     }
2229
2230   /* Try shift-and-add (load effective address) instructions,
2231      i.e. do a*3, a*5, a*9.  */
2232   if ((t & 1) != 0)
2233     {
2234       q = t - 1;
2235       q = q & -q;
2236       m = exact_log2 (q);
2237       if (m >= 0 && m < BITS_PER_WORD)
2238         {
2239           cost = shiftadd_cost[m];
2240           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2241
2242           cost += alg_in->cost;
2243           if (cost < cost_limit)
2244             {
2245               struct algorithm *x;
2246               x = alg_in, alg_in = best_alg, best_alg = x;
2247               best_alg->log[best_alg->ops] = m;
2248               best_alg->op[best_alg->ops] = alg_add_t2_m;
2249               cost_limit = cost;
2250             }
2251         }
2252
2253       q = t + 1;
2254       q = q & -q;
2255       m = exact_log2 (q);
2256       if (m >= 0 && m < BITS_PER_WORD)
2257         {
2258           cost = shiftsub_cost[m];
2259           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2260
2261           cost += alg_in->cost;
2262           if (cost < cost_limit)
2263             {
2264               struct algorithm *x;
2265               x = alg_in, alg_in = best_alg, best_alg = x;
2266               best_alg->log[best_alg->ops] = m;
2267               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2268               cost_limit = cost;
2269             }
2270         }
2271     }
2272
2273   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2274      we have not found any algorithm.  */
2275   if (cost_limit == alg_out->cost)
2276     return;
2277
2278   /* If we are getting a too long sequence for `struct algorithm'
2279      to record, make this search fail.  */
2280   if (best_alg->ops == MAX_BITS_PER_WORD)
2281     return;
2282
2283   /* Copy the algorithm from temporary space to the space at alg_out.
2284      We avoid using structure assignment because the majority of
2285      best_alg is normally undefined, and this is a critical function.  */
2286   alg_out->ops = best_alg->ops + 1;
2287   alg_out->cost = cost_limit;
2288   memcpy (alg_out->op, best_alg->op,
2289           alg_out->ops * sizeof *alg_out->op);
2290   memcpy (alg_out->log, best_alg->log,
2291           alg_out->ops * sizeof *alg_out->log);
2292 }
2293 \f
2294 /* Perform a multiplication and return an rtx for the result.
2295    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2296    TARGET is a suggestion for where to store the result (an rtx).
2297
2298    We check specially for a constant integer as OP1.
2299    If you want this check for OP0 as well, then before calling
2300    you should swap the two operands if OP0 would be constant.  */
2301
2302 rtx
2303 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, int unsignedp)
2304 {
2305   rtx const_op1 = op1;
2306
2307   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2308      less than or equal in size to `unsigned int' this doesn't matter.
2309      If the mode is larger than `unsigned int', then synth_mult works only
2310      if the constant value exactly fits in an `unsigned int' without any
2311      truncation.  This means that multiplying by negative values does
2312      not work; results are off by 2^32 on a 32 bit machine.  */
2313
2314   /* If we are multiplying in DImode, it may still be a win
2315      to try to work with shifts and adds.  */
2316   if (GET_CODE (op1) == CONST_DOUBLE
2317       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2318       && HOST_BITS_PER_INT >= BITS_PER_WORD
2319       && CONST_DOUBLE_HIGH (op1) == 0)
2320     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2321   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2322            && GET_CODE (op1) == CONST_INT
2323            && INTVAL (op1) < 0)
2324     const_op1 = 0;
2325
2326   /* We used to test optimize here, on the grounds that it's better to
2327      produce a smaller program when -O is not used.
2328      But this causes such a terrible slowdown sometimes
2329      that it seems better to use synth_mult always.  */
2330
2331   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2332       && (unsignedp || ! flag_trapv))
2333     {
2334       struct algorithm alg;
2335       struct algorithm alg2;
2336       HOST_WIDE_INT val = INTVAL (op1);
2337       HOST_WIDE_INT val_so_far;
2338       rtx insn;
2339       int mult_cost;
2340       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2341
2342       /* op0 must be register to make mult_cost match the precomputed
2343          shiftadd_cost array.  */
2344       op0 = force_reg (mode, op0);
2345
2346       /* Try to do the computation three ways: multiply by the negative of OP1
2347          and then negate, do the multiplication directly, or do multiplication
2348          by OP1 - 1.  */
2349
2350       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2351       mult_cost = MIN (12 * add_cost, mult_cost);
2352
2353       synth_mult (&alg, val, mult_cost);
2354
2355       /* This works only if the inverted value actually fits in an
2356          `unsigned int' */
2357       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2358         {
2359           synth_mult (&alg2, - val,
2360                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2361           if (alg2.cost + negate_cost < alg.cost)
2362             alg = alg2, variant = negate_variant;
2363         }
2364
2365       /* This proves very useful for division-by-constant.  */
2366       synth_mult (&alg2, val - 1,
2367                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2368       if (alg2.cost + add_cost < alg.cost)
2369         alg = alg2, variant = add_variant;
2370
2371       if (alg.cost < mult_cost)
2372         {
2373           /* We found something cheaper than a multiply insn.  */
2374           int opno;
2375           rtx accum, tem;
2376           enum machine_mode nmode;
2377
2378           op0 = protect_from_queue (op0, 0);
2379
2380           /* Avoid referencing memory over and over.
2381              For speed, but also for correctness when mem is volatile.  */
2382           if (GET_CODE (op0) == MEM)
2383             op0 = force_reg (mode, op0);
2384
2385           /* ACCUM starts out either as OP0 or as a zero, depending on
2386              the first operation.  */
2387
2388           if (alg.op[0] == alg_zero)
2389             {
2390               accum = copy_to_mode_reg (mode, const0_rtx);
2391               val_so_far = 0;
2392             }
2393           else if (alg.op[0] == alg_m)
2394             {
2395               accum = copy_to_mode_reg (mode, op0);
2396               val_so_far = 1;
2397             }
2398           else
2399             abort ();
2400
2401           for (opno = 1; opno < alg.ops; opno++)
2402             {
2403               int log = alg.log[opno];
2404               int preserve = preserve_subexpressions_p ();
2405               rtx shift_subtarget = preserve ? 0 : accum;
2406               rtx add_target
2407                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2408                    && ! preserve)
2409                   ? target : 0;
2410               rtx accum_target = preserve ? 0 : accum;
2411
2412               switch (alg.op[opno])
2413                 {
2414                 case alg_shift:
2415                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2416                                         build_int_2 (log, 0), NULL_RTX, 0);
2417                   val_so_far <<= log;
2418                   break;
2419
2420                 case alg_add_t_m2:
2421                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2422                                       build_int_2 (log, 0), NULL_RTX, 0);
2423                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2424                                          add_target
2425                                          ? add_target : accum_target);
2426                   val_so_far += (HOST_WIDE_INT) 1 << log;
2427                   break;
2428
2429                 case alg_sub_t_m2:
2430                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2431                                       build_int_2 (log, 0), NULL_RTX, 0);
2432                   accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2433                                          add_target
2434                                          ? add_target : accum_target);
2435                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2436                   break;
2437
2438                 case alg_add_t2_m:
2439                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2440                                         build_int_2 (log, 0), shift_subtarget,
2441                                         0);
2442                   accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2443                                          add_target
2444                                          ? add_target : accum_target);
2445                   val_so_far = (val_so_far << log) + 1;
2446                   break;
2447
2448                 case alg_sub_t2_m:
2449                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2450                                         build_int_2 (log, 0), shift_subtarget,
2451                                         0);
2452                   accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2453                                          add_target
2454                                          ? add_target : accum_target);
2455                   val_so_far = (val_so_far << log) - 1;
2456                   break;
2457
2458                 case alg_add_factor:
2459                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2460                                       build_int_2 (log, 0), NULL_RTX, 0);
2461                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2462                                          add_target
2463                                          ? add_target : accum_target);
2464                   val_so_far += val_so_far << log;
2465                   break;
2466
2467                 case alg_sub_factor:
2468                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2469                                       build_int_2 (log, 0), NULL_RTX, 0);
2470                   accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2471                                          (add_target ? add_target
2472                                           : preserve ? 0 : tem));
2473                   val_so_far = (val_so_far << log) - val_so_far;
2474                   break;
2475
2476                 default:
2477                   abort ();
2478                 }
2479
2480               /* Write a REG_EQUAL note on the last insn so that we can cse
2481                  multiplication sequences.  Note that if ACCUM is a SUBREG,
2482                  we've set the inner register and must properly indicate
2483                  that.  */
2484
2485               tem = op0, nmode = mode;
2486               if (GET_CODE (accum) == SUBREG)
2487                 {
2488                   nmode = GET_MODE (SUBREG_REG (accum));
2489                   tem = gen_lowpart (nmode, op0);
2490                 }
2491
2492               insn = get_last_insn ();
2493               set_unique_reg_note (insn,
2494                                    REG_EQUAL,
2495                                    gen_rtx_MULT (nmode, tem,
2496                                                  GEN_INT (val_so_far)));
2497             }
2498
2499           if (variant == negate_variant)
2500             {
2501               val_so_far = - val_so_far;
2502               accum = expand_unop (mode, neg_optab, accum, target, 0);
2503             }
2504           else if (variant == add_variant)
2505             {
2506               val_so_far = val_so_far + 1;
2507               accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2508             }
2509
2510           if (val != val_so_far)
2511             abort ();
2512
2513           return accum;
2514         }
2515     }
2516
2517   /* This used to use umul_optab if unsigned, but for non-widening multiply
2518      there is no difference between signed and unsigned.  */
2519   op0 = expand_binop (mode,
2520                       ! unsignedp
2521                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2522                       ? smulv_optab : smul_optab,
2523                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2524   if (op0 == 0)
2525     abort ();
2526   return op0;
2527 }
2528 \f
2529 /* Return the smallest n such that 2**n >= X.  */
2530
2531 int
2532 ceil_log2 (unsigned HOST_WIDE_INT x)
2533 {
2534   return floor_log2 (x - 1) + 1;
2535 }
2536
2537 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2538    replace division by D, and put the least significant N bits of the result
2539    in *MULTIPLIER_PTR and return the most significant bit.
2540
2541    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2542    needed precision is in PRECISION (should be <= N).
2543
2544    PRECISION should be as small as possible so this function can choose
2545    multiplier more freely.
2546
2547    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2548    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2549
2550    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2551    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2552
2553 static
2554 unsigned HOST_WIDE_INT
2555 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
2556                    unsigned HOST_WIDE_INT *multiplier_ptr,
2557                    int *post_shift_ptr, int *lgup_ptr)
2558 {
2559   HOST_WIDE_INT mhigh_hi, mlow_hi;
2560   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2561   int lgup, post_shift;
2562   int pow, pow2;
2563   unsigned HOST_WIDE_INT nl, dummy1;
2564   HOST_WIDE_INT nh, dummy2;
2565
2566   /* lgup = ceil(log2(divisor)); */
2567   lgup = ceil_log2 (d);
2568
2569   if (lgup > n)
2570     abort ();
2571
2572   pow = n + lgup;
2573   pow2 = n + lgup - precision;
2574
2575   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2576     {
2577       /* We could handle this with some effort, but this case is much better
2578          handled directly with a scc insn, so rely on caller using that.  */
2579       abort ();
2580     }
2581
2582   /* mlow = 2^(N + lgup)/d */
2583  if (pow >= HOST_BITS_PER_WIDE_INT)
2584     {
2585       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2586       nl = 0;
2587     }
2588   else
2589     {
2590       nh = 0;
2591       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2592     }
2593   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2594                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2595
2596   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2597   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2598     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2599   else
2600     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2601   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2602                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2603
2604   if (mhigh_hi && nh - d >= d)
2605     abort ();
2606   if (mhigh_hi > 1 || mlow_hi > 1)
2607     abort ();
2608   /* assert that mlow < mhigh.  */
2609   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2610     abort ();
2611
2612   /* If precision == N, then mlow, mhigh exceed 2^N
2613      (but they do not exceed 2^(N+1)).  */
2614
2615   /* Reduce to lowest terms.  */
2616   for (post_shift = lgup; post_shift > 0; post_shift--)
2617     {
2618       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2619       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2620       if (ml_lo >= mh_lo)
2621         break;
2622
2623       mlow_hi = 0;
2624       mlow_lo = ml_lo;
2625       mhigh_hi = 0;
2626       mhigh_lo = mh_lo;
2627     }
2628
2629   *post_shift_ptr = post_shift;
2630   *lgup_ptr = lgup;
2631   if (n < HOST_BITS_PER_WIDE_INT)
2632     {
2633       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2634       *multiplier_ptr = mhigh_lo & mask;
2635       return mhigh_lo >= mask;
2636     }
2637   else
2638     {
2639       *multiplier_ptr = mhigh_lo;
2640       return mhigh_hi;
2641     }
2642 }
2643
2644 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2645    congruent to 1 (mod 2**N).  */
2646
2647 static unsigned HOST_WIDE_INT
2648 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
2649 {
2650   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2651
2652   /* The algorithm notes that the choice y = x satisfies
2653      x*y == 1 mod 2^3, since x is assumed odd.
2654      Each iteration doubles the number of bits of significance in y.  */
2655
2656   unsigned HOST_WIDE_INT mask;
2657   unsigned HOST_WIDE_INT y = x;
2658   int nbit = 3;
2659
2660   mask = (n == HOST_BITS_PER_WIDE_INT
2661           ? ~(unsigned HOST_WIDE_INT) 0
2662           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2663
2664   while (nbit < n)
2665     {
2666       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2667       nbit *= 2;
2668     }
2669   return y;
2670 }
2671
2672 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2673    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2674    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2675    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2676    become signed.
2677
2678    The result is put in TARGET if that is convenient.
2679
2680    MODE is the mode of operation.  */
2681
2682 rtx
2683 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
2684                              rtx op1, rtx target, int unsignedp)
2685 {
2686   rtx tem;
2687   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2688
2689   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2690                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2691                       NULL_RTX, 0);
2692   tem = expand_and (mode, tem, op1, NULL_RTX);
2693   adj_operand
2694     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2695                      adj_operand);
2696
2697   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2698                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2699                       NULL_RTX, 0);
2700   tem = expand_and (mode, tem, op0, NULL_RTX);
2701   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2702                           target);
2703
2704   return target;
2705 }
2706
2707 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2708    in TARGET if that is convenient, and return where the result is.  If the
2709    operation can not be performed, 0 is returned.
2710
2711    MODE is the mode of operation and result.
2712
2713    UNSIGNEDP nonzero means unsigned multiply.
2714
2715    MAX_COST is the total allowed cost for the expanded RTL.  */
2716
2717 rtx
2718 expand_mult_highpart (enum machine_mode mode, rtx op0,
2719                       unsigned HOST_WIDE_INT cnst1, rtx target,
2720                       int unsignedp, int max_cost)
2721 {
2722   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2723   optab mul_highpart_optab;
2724   optab moptab;
2725   rtx tem;
2726   int size = GET_MODE_BITSIZE (mode);
2727   rtx op1, wide_op1;
2728
2729   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2730   if (size > HOST_BITS_PER_WIDE_INT)
2731     abort ();
2732
2733   op1 = gen_int_mode (cnst1, mode);
2734
2735   wide_op1
2736     = immed_double_const (cnst1,
2737                           (unsignedp
2738                            ? (HOST_WIDE_INT) 0
2739                            : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2740                           wider_mode);
2741
2742   /* expand_mult handles constant multiplication of word_mode
2743      or narrower.  It does a poor job for large modes.  */
2744   if (size < BITS_PER_WORD
2745       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2746     {
2747       /* We have to do this, since expand_binop doesn't do conversion for
2748          multiply.  Maybe change expand_binop to handle widening multiply?  */
2749       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2750
2751       /* We know that this can't have signed overflow, so pretend this is
2752          an unsigned multiply.  */
2753       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
2754       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2755                           build_int_2 (size, 0), NULL_RTX, 1);
2756       return convert_modes (mode, wider_mode, tem, unsignedp);
2757     }
2758
2759   if (target == 0)
2760     target = gen_reg_rtx (mode);
2761
2762   /* Firstly, try using a multiplication insn that only generates the needed
2763      high part of the product, and in the sign flavor of unsignedp.  */
2764   if (mul_highpart_cost[(int) mode] < max_cost)
2765     {
2766       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2767       target = expand_binop (mode, mul_highpart_optab,
2768                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2769       if (target)
2770         return target;
2771     }
2772
2773   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2774      Need to adjust the result after the multiplication.  */
2775   if (size - 1 < BITS_PER_WORD
2776       && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
2777           < max_cost))
2778     {
2779       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2780       target = expand_binop (mode, mul_highpart_optab,
2781                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2782       if (target)
2783         /* We used the wrong signedness.  Adjust the result.  */
2784         return expand_mult_highpart_adjust (mode, target, op0,
2785                                             op1, target, unsignedp);
2786     }
2787
2788   /* Try widening multiplication.  */
2789   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2790   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2791       && mul_widen_cost[(int) wider_mode] < max_cost)
2792     {
2793       op1 = force_reg (mode, op1);
2794       goto try;
2795     }
2796
2797   /* Try widening the mode and perform a non-widening multiplication.  */
2798   moptab = smul_optab;
2799   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2800       && size - 1 < BITS_PER_WORD
2801       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2802     {
2803       op1 = wide_op1;
2804       goto try;
2805     }
2806
2807   /* Try widening multiplication of opposite signedness, and adjust.  */
2808   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2809   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2810       && size - 1 < BITS_PER_WORD
2811       && (mul_widen_cost[(int) wider_mode]
2812           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2813     {
2814       rtx regop1 = force_reg (mode, op1);
2815       tem = expand_binop (wider_mode, moptab, op0, regop1,
2816                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2817       if (tem != 0)
2818         {
2819           /* Extract the high half of the just generated product.  */
2820           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2821                               build_int_2 (size, 0), NULL_RTX, 1);
2822           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2823           /* We used the wrong signedness.  Adjust the result.  */
2824           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2825                                               target, unsignedp);
2826         }
2827     }
2828
2829   return 0;
2830
2831  try:
2832   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2833   tem = expand_binop (wider_mode, moptab, op0, op1,
2834                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2835   if (tem == 0)
2836     return 0;
2837
2838   /* Extract the high half of the just generated product.  */
2839   if (mode == word_mode)
2840     {
2841       return gen_highpart (mode, tem);
2842     }
2843   else
2844     {
2845       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2846                           build_int_2 (size, 0), NULL_RTX, 1);
2847       return convert_modes (mode, wider_mode, tem, unsignedp);
2848     }
2849 }
2850 \f
2851 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2852    if that is convenient, and returning where the result is.
2853    You may request either the quotient or the remainder as the result;
2854    specify REM_FLAG nonzero to get the remainder.
2855
2856    CODE is the expression code for which kind of division this is;
2857    it controls how rounding is done.  MODE is the machine mode to use.
2858    UNSIGNEDP nonzero means do unsigned division.  */
2859
2860 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2861    and then correct it by or'ing in missing high bits
2862    if result of ANDI is nonzero.
2863    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2864    This could optimize to a bfexts instruction.
2865    But C doesn't use these operations, so their optimizations are
2866    left for later.  */
2867 /* ??? For modulo, we don't actually need the highpart of the first product,
2868    the low part will do nicely.  And for small divisors, the second multiply
2869    can also be a low-part only multiply or even be completely left out.
2870    E.g. to calculate the remainder of a division by 3 with a 32 bit
2871    multiply, multiply with 0x55555556 and extract the upper two bits;
2872    the result is exact for inputs up to 0x1fffffff.
2873    The input range can be reduced by using cross-sum rules.
2874    For odd divisors >= 3, the following table gives right shift counts
2875    so that if a number is shifted by an integer multiple of the given
2876    amount, the remainder stays the same:
2877    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
2878    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
2879    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
2880    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
2881    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
2882
2883    Cross-sum rules for even numbers can be derived by leaving as many bits
2884    to the right alone as the divisor has zeros to the right.
2885    E.g. if x is an unsigned 32 bit number:
2886    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
2887    */
2888
2889 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2890
2891 rtx
2892 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
2893                rtx op0, rtx op1, rtx target, int unsignedp)
2894 {
2895   enum machine_mode compute_mode;
2896   rtx tquotient;
2897   rtx quotient = 0, remainder = 0;
2898   rtx last;
2899   int size;
2900   rtx insn, set;
2901   optab optab1, optab2;
2902   int op1_is_constant, op1_is_pow2 = 0;
2903   int max_cost, extra_cost;
2904   static HOST_WIDE_INT last_div_const = 0;
2905   static HOST_WIDE_INT ext_op1;
2906
2907   op1_is_constant = GET_CODE (op1) == CONST_INT;
2908   if (op1_is_constant)
2909     {
2910       ext_op1 = INTVAL (op1);
2911       if (unsignedp)
2912         ext_op1 &= GET_MODE_MASK (mode);
2913       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
2914                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
2915     }
2916
2917   /*
2918      This is the structure of expand_divmod:
2919
2920      First comes code to fix up the operands so we can perform the operations
2921      correctly and efficiently.
2922
2923      Second comes a switch statement with code specific for each rounding mode.
2924      For some special operands this code emits all RTL for the desired
2925      operation, for other cases, it generates only a quotient and stores it in
2926      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2927      to indicate that it has not done anything.
2928
2929      Last comes code that finishes the operation.  If QUOTIENT is set and
2930      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2931      QUOTIENT is not set, it is computed using trunc rounding.
2932
2933      We try to generate special code for division and remainder when OP1 is a
2934      constant.  If |OP1| = 2**n we can use shifts and some other fast
2935      operations.  For other values of OP1, we compute a carefully selected
2936      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2937      by m.
2938
2939      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2940      half of the product.  Different strategies for generating the product are
2941      implemented in expand_mult_highpart.
2942
2943      If what we actually want is the remainder, we generate that by another
2944      by-constant multiplication and a subtraction.  */
2945
2946   /* We shouldn't be called with OP1 == const1_rtx, but some of the
2947      code below will malfunction if we are, so check here and handle
2948      the special case if so.  */
2949   if (op1 == const1_rtx)
2950     return rem_flag ? const0_rtx : op0;
2951
2952     /* When dividing by -1, we could get an overflow.
2953      negv_optab can handle overflows.  */
2954   if (! unsignedp && op1 == constm1_rtx)
2955     {
2956       if (rem_flag)
2957         return const0_rtx;
2958       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
2959                           ? negv_optab : neg_optab, op0, target, 0);
2960     }
2961
2962   if (target
2963       /* Don't use the function value register as a target
2964          since we have to read it as well as write it,
2965          and function-inlining gets confused by this.  */
2966       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
2967           /* Don't clobber an operand while doing a multi-step calculation.  */
2968           || ((rem_flag || op1_is_constant)
2969               && (reg_mentioned_p (target, op0)
2970                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
2971           || reg_mentioned_p (target, op1)
2972           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
2973     target = 0;
2974
2975   /* Get the mode in which to perform this computation.  Normally it will
2976      be MODE, but sometimes we can't do the desired operation in MODE.
2977      If so, pick a wider mode in which we can do the operation.  Convert
2978      to that mode at the start to avoid repeated conversions.
2979
2980      First see what operations we need.  These depend on the expression
2981      we are evaluating.  (We assume that divxx3 insns exist under the
2982      same conditions that modxx3 insns and that these insns don't normally
2983      fail.  If these assumptions are not correct, we may generate less
2984      efficient code in some cases.)
2985
2986      Then see if we find a mode in which we can open-code that operation
2987      (either a division, modulus, or shift).  Finally, check for the smallest
2988      mode for which we can do the operation with a library call.  */
2989
2990   /* We might want to refine this now that we have division-by-constant
2991      optimization.  Since expand_mult_highpart tries so many variants, it is
2992      not straightforward to generalize this.  Maybe we should make an array
2993      of possible modes in init_expmed?  Save this for GCC 2.7.  */
2994
2995   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
2996             ? (unsignedp ? lshr_optab : ashr_optab)
2997             : (unsignedp ? udiv_optab : sdiv_optab));
2998   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
2999             ? optab1
3000             : (unsignedp ? udivmod_optab : sdivmod_optab));
3001
3002   for (compute_mode = mode; compute_mode != VOIDmode;
3003        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3004     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
3005         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
3006       break;
3007
3008   if (compute_mode == VOIDmode)
3009     for (compute_mode = mode; compute_mode != VOIDmode;
3010          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3011       if (optab1->handlers[(int) compute_mode].libfunc
3012           || optab2->handlers[(int) compute_mode].libfunc)
3013         break;
3014
3015   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3016      in expand_binop.  */
3017   if (compute_mode == VOIDmode)
3018     compute_mode = mode;
3019
3020   if (target && GET_MODE (target) == compute_mode)
3021     tquotient = target;
3022   else
3023     tquotient = gen_reg_rtx (compute_mode);
3024
3025   size = GET_MODE_BITSIZE (compute_mode);
3026 #if 0
3027   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3028      (mode), and thereby get better code when OP1 is a constant.  Do that
3029      later.  It will require going over all usages of SIZE below.  */
3030   size = GET_MODE_BITSIZE (mode);
3031 #endif
3032
3033   /* Only deduct something for a REM if the last divide done was
3034      for a different constant.   Then set the constant of the last
3035      divide.  */
3036   max_cost = div_cost[(int) compute_mode]
3037     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3038                       && INTVAL (op1) == last_div_const)
3039        ? mul_cost[(int) compute_mode] + add_cost : 0);
3040
3041   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3042
3043   /* Now convert to the best mode to use.  */
3044   if (compute_mode != mode)
3045     {
3046       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3047       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3048
3049       /* convert_modes may have placed op1 into a register, so we
3050          must recompute the following.  */
3051       op1_is_constant = GET_CODE (op1) == CONST_INT;
3052       op1_is_pow2 = (op1_is_constant
3053                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3054                           || (! unsignedp
3055                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3056     }
3057
3058   /* If one of the operands is a volatile MEM, copy it into a register.  */
3059
3060   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3061     op0 = force_reg (compute_mode, op0);
3062   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3063     op1 = force_reg (compute_mode, op1);
3064
3065   /* If we need the remainder or if OP1 is constant, we need to
3066      put OP0 in a register in case it has any queued subexpressions.  */
3067   if (rem_flag || op1_is_constant)
3068     op0 = force_reg (compute_mode, op0);
3069
3070   last = get_last_insn ();
3071
3072   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3073   if (unsignedp)
3074     {
3075       if (code == FLOOR_DIV_EXPR)
3076         code = TRUNC_DIV_EXPR;
3077       if (code == FLOOR_MOD_EXPR)
3078         code = TRUNC_MOD_EXPR;
3079       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3080         code = TRUNC_DIV_EXPR;
3081     }
3082
3083   if (op1 != const0_rtx)
3084     switch (code)
3085       {
3086       case TRUNC_MOD_EXPR:
3087       case TRUNC_DIV_EXPR:
3088         if (op1_is_constant)
3089           {
3090             if (unsignedp)
3091               {
3092                 unsigned HOST_WIDE_INT mh, ml;
3093                 int pre_shift, post_shift;
3094                 int dummy;
3095                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3096                                             & GET_MODE_MASK (compute_mode));
3097
3098                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3099                   {
3100                     pre_shift = floor_log2 (d);
3101                     if (rem_flag)
3102                       {
3103                         remainder
3104                           = expand_binop (compute_mode, and_optab, op0,
3105                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3106                                           remainder, 1,
3107                                           OPTAB_LIB_WIDEN);
3108                         if (remainder)
3109                           return gen_lowpart (mode, remainder);
3110                       }
3111                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3112                                              build_int_2 (pre_shift, 0),
3113                                              tquotient, 1);
3114                   }
3115                 else if (size <= HOST_BITS_PER_WIDE_INT)
3116                   {
3117                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3118                       {
3119                         /* Most significant bit of divisor is set; emit an scc
3120                            insn.  */
3121                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3122                                                     compute_mode, 1, 1);
3123                         if (quotient == 0)
3124                           goto fail1;
3125                       }
3126                     else
3127                       {
3128                         /* Find a suitable multiplier and right shift count
3129                            instead of multiplying with D.  */
3130
3131                         mh = choose_multiplier (d, size, size,
3132                                                 &ml, &post_shift, &dummy);
3133
3134                         /* If the suggested multiplier is more than SIZE bits,
3135                            we can do better for even divisors, using an
3136                            initial right shift.  */
3137                         if (mh != 0 && (d & 1) == 0)
3138                           {
3139                             pre_shift = floor_log2 (d & -d);
3140                             mh = choose_multiplier (d >> pre_shift, size,
3141                                                     size - pre_shift,
3142                                                     &ml, &post_shift, &dummy);
3143                             if (mh)
3144                               abort ();
3145                           }
3146                         else
3147                           pre_shift = 0;
3148
3149                         if (mh != 0)
3150                           {
3151                             rtx t1, t2, t3, t4;
3152
3153                             if (post_shift - 1 >= BITS_PER_WORD)
3154                               goto fail1;
3155
3156                             extra_cost = (shift_cost[post_shift - 1]
3157                                           + shift_cost[1] + 2 * add_cost);
3158                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3159                                                        NULL_RTX, 1,
3160                                                        max_cost - extra_cost);
3161                             if (t1 == 0)
3162                               goto fail1;
3163                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3164                                                                op0, t1),
3165                                                 NULL_RTX);
3166                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3167                                                build_int_2 (1, 0), NULL_RTX,1);
3168                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3169                                                               t1, t3),
3170                                                 NULL_RTX);
3171                             quotient
3172                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3173                                               build_int_2 (post_shift - 1, 0),
3174                                               tquotient, 1);
3175                           }
3176                         else
3177                           {
3178                             rtx t1, t2;
3179
3180                             if (pre_shift >= BITS_PER_WORD
3181                                 || post_shift >= BITS_PER_WORD)
3182                               goto fail1;
3183
3184                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3185                                                build_int_2 (pre_shift, 0),
3186                                                NULL_RTX, 1);
3187                             extra_cost = (shift_cost[pre_shift]
3188                                           + shift_cost[post_shift]);
3189                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3190                                                        NULL_RTX, 1,
3191                                                        max_cost - extra_cost);
3192                             if (t2 == 0)
3193                               goto fail1;
3194                             quotient
3195                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3196                                               build_int_2 (post_shift, 0),
3197                                               tquotient, 1);
3198                           }
3199                       }
3200                   }
3201                 else            /* Too wide mode to use tricky code */
3202                   break;
3203
3204                 insn = get_last_insn ();
3205                 if (insn != last
3206                     && (set = single_set (insn)) != 0
3207                     && SET_DEST (set) == quotient)
3208                   set_unique_reg_note (insn,
3209                                        REG_EQUAL,
3210                                        gen_rtx_UDIV (compute_mode, op0, op1));
3211               }
3212             else                /* TRUNC_DIV, signed */
3213               {
3214                 unsigned HOST_WIDE_INT ml;
3215                 int lgup, post_shift;
3216                 HOST_WIDE_INT d = INTVAL (op1);
3217                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3218
3219                 /* n rem d = n rem -d */
3220                 if (rem_flag && d < 0)
3221                   {
3222                     d = abs_d;
3223                     op1 = gen_int_mode (abs_d, compute_mode);
3224                   }
3225
3226                 if (d == 1)
3227                   quotient = op0;
3228                 else if (d == -1)
3229                   quotient = expand_unop (compute_mode, neg_optab, op0,
3230                                           tquotient, 0);
3231                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3232                   {
3233                     /* This case is not handled correctly below.  */
3234                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3235                                                 compute_mode, 1, 1);
3236                     if (quotient == 0)
3237                       goto fail1;
3238                   }
3239                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3240                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap)
3241                          /* ??? The cheap metric is computed only for
3242                             word_mode.  If this operation is wider, this may
3243                             not be so.  Assume true if the optab has an
3244                             expander for this mode.  */
3245                          && (((rem_flag ? smod_optab : sdiv_optab)
3246                               ->handlers[(int) compute_mode].insn_code
3247                               != CODE_FOR_nothing)
3248                              || (sdivmod_optab->handlers[(int) compute_mode]
3249                                  .insn_code != CODE_FOR_nothing)))
3250                   ;
3251                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3252                   {
3253                     lgup = floor_log2 (abs_d);
3254                     if (BRANCH_COST < 1 || (abs_d != 2 && BRANCH_COST < 3))
3255                       {
3256                         rtx label = gen_label_rtx ();
3257                         rtx t1;
3258
3259                         t1 = copy_to_mode_reg (compute_mode, op0);
3260                         do_cmp_and_jump (t1, const0_rtx, GE,
3261                                          compute_mode, label);
3262                         expand_inc (t1, gen_int_mode (abs_d - 1,
3263                                                       compute_mode));
3264                         emit_label (label);
3265                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3266                                                  build_int_2 (lgup, 0),
3267                                                  tquotient, 0);
3268                       }
3269                     else
3270                       {
3271                         rtx t1, t2, t3;
3272                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3273                                            build_int_2 (size - 1, 0),
3274                                            NULL_RTX, 0);
3275                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3276                                            build_int_2 (size - lgup, 0),
3277                                            NULL_RTX, 1);
3278                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3279                                                           op0, t2),
3280                                             NULL_RTX);
3281                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3282                                                  build_int_2 (lgup, 0),
3283                                                  tquotient, 0);
3284                       }
3285
3286                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3287                        the quotient.  */
3288                     if (d < 0)
3289                       {
3290                         insn = get_last_insn ();
3291                         if (insn != last
3292                             && (set = single_set (insn)) != 0
3293                             && SET_DEST (set) == quotient
3294                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3295                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3296                           set_unique_reg_note (insn,
3297                                                REG_EQUAL,
3298                                                gen_rtx_DIV (compute_mode,
3299                                                             op0,
3300                                                             GEN_INT
3301                                                             (trunc_int_for_mode
3302                                                              (abs_d,
3303                                                               compute_mode))));
3304
3305                         quotient = expand_unop (compute_mode, neg_optab,
3306                                                 quotient, quotient, 0);
3307                       }
3308                   }
3309                 else if (size <= HOST_BITS_PER_WIDE_INT)
3310                   {
3311                     choose_multiplier (abs_d, size, size - 1,
3312                                        &ml, &post_shift, &lgup);
3313                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3314                       {
3315                         rtx t1, t2, t3;
3316
3317                         if (post_shift >= BITS_PER_WORD
3318                             || size - 1 >= BITS_PER_WORD)
3319                           goto fail1;
3320
3321                         extra_cost = (shift_cost[post_shift]
3322                                       + shift_cost[size - 1] + add_cost);
3323                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3324                                                    NULL_RTX, 0,
3325                                                    max_cost - extra_cost);
3326                         if (t1 == 0)
3327                           goto fail1;
3328                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3329                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3330                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3331                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3332                         if (d < 0)
3333                           quotient
3334                             = force_operand (gen_rtx_MINUS (compute_mode,
3335                                                             t3, t2),
3336                                              tquotient);
3337                         else
3338                           quotient
3339                             = force_operand (gen_rtx_MINUS (compute_mode,
3340                                                             t2, t3),
3341                                              tquotient);
3342                       }
3343                     else
3344                       {
3345                         rtx t1, t2, t3, t4;
3346
3347                         if (post_shift >= BITS_PER_WORD
3348                             || size - 1 >= BITS_PER_WORD)
3349                           goto fail1;
3350
3351                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3352                         extra_cost = (shift_cost[post_shift]
3353                                       + shift_cost[size - 1] + 2 * add_cost);
3354                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3355                                                    NULL_RTX, 0,
3356                                                    max_cost - extra_cost);
3357                         if (t1 == 0)
3358                           goto fail1;
3359                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3360                                                           t1, op0),
3361                                             NULL_RTX);
3362                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3363                                            build_int_2 (post_shift, 0),
3364                                            NULL_RTX, 0);
3365                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3366                                            build_int_2 (size - 1, 0),
3367                                            NULL_RTX, 0);
3368                         if (d < 0)
3369                           quotient
3370                             = force_operand (gen_rtx_MINUS (compute_mode,
3371                                                             t4, t3),
3372                                              tquotient);
3373                         else
3374                           quotient
3375                             = force_operand (gen_rtx_MINUS (compute_mode,
3376                                                             t3, t4),
3377                                              tquotient);
3378                       }
3379                   }
3380                 else            /* Too wide mode to use tricky code */
3381                   break;
3382
3383                 insn = get_last_insn ();
3384                 if (insn != last
3385                     && (set = single_set (insn)) != 0
3386                     && SET_DEST (set) == quotient)
3387                   set_unique_reg_note (insn,
3388                                        REG_EQUAL,
3389                                        gen_rtx_DIV (compute_mode, op0, op1));
3390               }
3391             break;
3392           }
3393       fail1:
3394         delete_insns_since (last);
3395         break;
3396
3397       case FLOOR_DIV_EXPR:
3398       case FLOOR_MOD_EXPR:
3399       /* We will come here only for signed operations.  */
3400         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3401           {
3402             unsigned HOST_WIDE_INT mh, ml;
3403             int pre_shift, lgup, post_shift;
3404             HOST_WIDE_INT d = INTVAL (op1);
3405
3406             if (d > 0)
3407               {
3408                 /* We could just as easily deal with negative constants here,
3409                    but it does not seem worth the trouble for GCC 2.6.  */
3410                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3411                   {
3412                     pre_shift = floor_log2 (d);
3413                     if (rem_flag)
3414                       {
3415                         remainder = expand_binop (compute_mode, and_optab, op0,
3416                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3417                                                   remainder, 0, OPTAB_LIB_WIDEN);
3418                         if (remainder)
3419                           return gen_lowpart (mode, remainder);
3420                       }
3421                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3422                                              build_int_2 (pre_shift, 0),
3423                                              tquotient, 0);
3424                   }
3425                 else
3426                   {
3427                     rtx t1, t2, t3, t4;
3428
3429                     mh = choose_multiplier (d, size, size - 1,
3430                                             &ml, &post_shift, &lgup);
3431                     if (mh)
3432                       abort ();
3433
3434                     if (post_shift < BITS_PER_WORD
3435                         && size - 1 < BITS_PER_WORD)
3436                       {
3437                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3438                                            build_int_2 (size - 1, 0),
3439                                            NULL_RTX, 0);
3440                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3441                                            NULL_RTX, 0, OPTAB_WIDEN);
3442                         extra_cost = (shift_cost[post_shift]
3443                                       + shift_cost[size - 1] + 2 * add_cost);
3444                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3445                                                    NULL_RTX, 1,
3446                                                    max_cost - extra_cost);
3447                         if (t3 != 0)
3448                           {
3449                             t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3450                                                build_int_2 (post_shift, 0),
3451                                                NULL_RTX, 1);
3452                             quotient = expand_binop (compute_mode, xor_optab,
3453                                                      t4, t1, tquotient, 0,
3454                                                      OPTAB_WIDEN);
3455                           }
3456                       }
3457                   }
3458               }
3459             else
3460               {
3461                 rtx nsign, t1, t2, t3, t4;
3462                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3463                                                   op0, constm1_rtx), NULL_RTX);
3464                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3465                                    0, OPTAB_WIDEN);
3466                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3467                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3468                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3469                                     NULL_RTX);
3470                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3471                                     NULL_RTX, 0);
3472                 if (t4)
3473                   {
3474                     rtx t5;
3475                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3476                                       NULL_RTX, 0);
3477                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3478                                                             t4, t5),
3479                                               tquotient);
3480                   }
3481               }
3482           }
3483
3484         if (quotient != 0)
3485           break;
3486         delete_insns_since (last);
3487
3488         /* Try using an instruction that produces both the quotient and
3489            remainder, using truncation.  We can easily compensate the quotient
3490            or remainder to get floor rounding, once we have the remainder.
3491            Notice that we compute also the final remainder value here,
3492            and return the result right away.  */
3493         if (target == 0 || GET_MODE (target) != compute_mode)
3494           target = gen_reg_rtx (compute_mode);
3495
3496         if (rem_flag)
3497           {
3498             remainder
3499               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3500             quotient = gen_reg_rtx (compute_mode);
3501           }
3502         else
3503           {
3504             quotient
3505               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3506             remainder = gen_reg_rtx (compute_mode);
3507           }
3508
3509         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3510                                  quotient, remainder, 0))
3511           {
3512             /* This could be computed with a branch-less sequence.
3513                Save that for later.  */
3514             rtx tem;
3515             rtx label = gen_label_rtx ();
3516             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3517             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3518                                 NULL_RTX, 0, OPTAB_WIDEN);
3519             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3520             expand_dec (quotient, const1_rtx);
3521             expand_inc (remainder, op1);
3522             emit_label (label);
3523             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3524           }
3525
3526         /* No luck with division elimination or divmod.  Have to do it
3527            by conditionally adjusting op0 *and* the result.  */
3528         {
3529           rtx label1, label2, label3, label4, label5;
3530           rtx adjusted_op0;
3531           rtx tem;
3532
3533           quotient = gen_reg_rtx (compute_mode);
3534           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3535           label1 = gen_label_rtx ();
3536           label2 = gen_label_rtx ();
3537           label3 = gen_label_rtx ();
3538           label4 = gen_label_rtx ();
3539           label5 = gen_label_rtx ();
3540           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3541           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3542           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3543                               quotient, 0, OPTAB_LIB_WIDEN);
3544           if (tem != quotient)
3545             emit_move_insn (quotient, tem);
3546           emit_jump_insn (gen_jump (label5));
3547           emit_barrier ();
3548           emit_label (label1);
3549           expand_inc (adjusted_op0, const1_rtx);
3550           emit_jump_insn (gen_jump (label4));
3551           emit_barrier ();
3552           emit_label (label2);
3553           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3554           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3555                               quotient, 0, OPTAB_LIB_WIDEN);
3556           if (tem != quotient)
3557             emit_move_insn (quotient, tem);
3558           emit_jump_insn (gen_jump (label5));
3559           emit_barrier ();
3560           emit_label (label3);
3561           expand_dec (adjusted_op0, const1_rtx);
3562           emit_label (label4);
3563           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3564                               quotient, 0, OPTAB_LIB_WIDEN);
3565           if (tem != quotient)
3566             emit_move_insn (quotient, tem);
3567           expand_dec (quotient, const1_rtx);
3568           emit_label (label5);
3569         }
3570         break;
3571
3572       case CEIL_DIV_EXPR:
3573       case CEIL_MOD_EXPR:
3574         if (unsignedp)
3575           {
3576             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3577               {
3578                 rtx t1, t2, t3;
3579                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3580                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3581                                    build_int_2 (floor_log2 (d), 0),
3582                                    tquotient, 1);
3583                 t2 = expand_binop (compute_mode, and_optab, op0,
3584                                    GEN_INT (d - 1),
3585                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3586                 t3 = gen_reg_rtx (compute_mode);
3587                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3588                                       compute_mode, 1, 1);
3589                 if (t3 == 0)
3590                   {
3591                     rtx lab;
3592                     lab = gen_label_rtx ();
3593                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3594                     expand_inc (t1, const1_rtx);
3595                     emit_label (lab);
3596                     quotient = t1;
3597                   }
3598                 else
3599                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3600                                                           t1, t3),
3601                                             tquotient);
3602                 break;
3603               }
3604
3605             /* Try using an instruction that produces both the quotient and
3606                remainder, using truncation.  We can easily compensate the
3607                quotient or remainder to get ceiling rounding, once we have the
3608                remainder.  Notice that we compute also the final remainder
3609                value here, and return the result right away.  */
3610             if (target == 0 || GET_MODE (target) != compute_mode)
3611               target = gen_reg_rtx (compute_mode);
3612
3613             if (rem_flag)
3614               {
3615                 remainder = (GET_CODE (target) == REG
3616                              ? target : gen_reg_rtx (compute_mode));
3617                 quotient = gen_reg_rtx (compute_mode);
3618               }
3619             else
3620               {
3621                 quotient = (GET_CODE (target) == REG
3622                             ? target : gen_reg_rtx (compute_mode));
3623                 remainder = gen_reg_rtx (compute_mode);
3624               }
3625
3626             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3627                                      remainder, 1))
3628               {
3629                 /* This could be computed with a branch-less sequence.
3630                    Save that for later.  */
3631                 rtx label = gen_label_rtx ();
3632                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3633                                  compute_mode, label);
3634                 expand_inc (quotient, const1_rtx);
3635                 expand_dec (remainder, op1);
3636                 emit_label (label);
3637                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3638               }
3639
3640             /* No luck with division elimination or divmod.  Have to do it
3641                by conditionally adjusting op0 *and* the result.  */
3642             {
3643               rtx label1, label2;
3644               rtx adjusted_op0, tem;
3645
3646               quotient = gen_reg_rtx (compute_mode);
3647               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3648               label1 = gen_label_rtx ();
3649               label2 = gen_label_rtx ();
3650               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3651                                compute_mode, label1);
3652               emit_move_insn  (quotient, const0_rtx);
3653               emit_jump_insn (gen_jump (label2));
3654               emit_barrier ();
3655               emit_label (label1);
3656               expand_dec (adjusted_op0, const1_rtx);
3657               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3658                                   quotient, 1, OPTAB_LIB_WIDEN);
3659               if (tem != quotient)
3660                 emit_move_insn (quotient, tem);
3661               expand_inc (quotient, const1_rtx);
3662               emit_label (label2);
3663             }
3664           }
3665         else /* signed */
3666           {
3667             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3668                 && INTVAL (op1) >= 0)
3669               {
3670                 /* This is extremely similar to the code for the unsigned case
3671                    above.  For 2.7 we should merge these variants, but for
3672                    2.6.1 I don't want to touch the code for unsigned since that
3673                    get used in C.  The signed case will only be used by other
3674                    languages (Ada).  */
3675
3676                 rtx t1, t2, t3;
3677                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3678                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3679                                    build_int_2 (floor_log2 (d), 0),
3680                                    tquotient, 0);
3681                 t2 = expand_binop (compute_mode, and_optab, op0,
3682                                    GEN_INT (d - 1),
3683                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3684                 t3 = gen_reg_rtx (compute_mode);
3685                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3686                                       compute_mode, 1, 1);
3687                 if (t3 == 0)
3688                   {
3689                     rtx lab;
3690                     lab = gen_label_rtx ();
3691                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3692                     expand_inc (t1, const1_rtx);
3693                     emit_label (lab);
3694                     quotient = t1;
3695                   }
3696                 else
3697                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3698                                                           t1, t3),
3699                                             tquotient);
3700                 break;
3701               }
3702
3703             /* Try using an instruction that produces both the quotient and
3704                remainder, using truncation.  We can easily compensate the
3705                quotient or remainder to get ceiling rounding, once we have the
3706                remainder.  Notice that we compute also the final remainder
3707                value here, and return the result right away.  */
3708             if (target == 0 || GET_MODE (target) != compute_mode)
3709               target = gen_reg_rtx (compute_mode);
3710             if (rem_flag)
3711               {
3712                 remainder= (GET_CODE (target) == REG
3713                             ? target : gen_reg_rtx (compute_mode));
3714                 quotient = gen_reg_rtx (compute_mode);
3715               }
3716             else
3717               {
3718                 quotient = (GET_CODE (target) == REG
3719                             ? target : gen_reg_rtx (compute_mode));
3720                 remainder = gen_reg_rtx (compute_mode);
3721               }
3722
3723             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3724                                      remainder, 0))
3725               {
3726                 /* This could be computed with a branch-less sequence.
3727                    Save that for later.  */
3728                 rtx tem;
3729                 rtx label = gen_label_rtx ();
3730                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3731                                  compute_mode, label);
3732                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3733                                     NULL_RTX, 0, OPTAB_WIDEN);
3734                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3735                 expand_inc (quotient, const1_rtx);
3736                 expand_dec (remainder, op1);
3737                 emit_label (label);
3738                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3739               }
3740
3741             /* No luck with division elimination or divmod.  Have to do it
3742                by conditionally adjusting op0 *and* the result.  */
3743             {
3744               rtx label1, label2, label3, label4, label5;
3745               rtx adjusted_op0;
3746               rtx tem;
3747
3748               quotient = gen_reg_rtx (compute_mode);
3749               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3750               label1 = gen_label_rtx ();
3751               label2 = gen_label_rtx ();
3752               label3 = gen_label_rtx ();
3753               label4 = gen_label_rtx ();
3754               label5 = gen_label_rtx ();
3755               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3756               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3757                                compute_mode, label1);
3758               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3759                                   quotient, 0, OPTAB_LIB_WIDEN);
3760               if (tem != quotient)
3761                 emit_move_insn (quotient, tem);
3762               emit_jump_insn (gen_jump (label5));
3763               emit_barrier ();
3764               emit_label (label1);
3765               expand_dec (adjusted_op0, const1_rtx);
3766               emit_jump_insn (gen_jump (label4));
3767               emit_barrier ();
3768               emit_label (label2);
3769               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3770                                compute_mode, label3);
3771               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3772                                   quotient, 0, OPTAB_LIB_WIDEN);
3773               if (tem != quotient)
3774                 emit_move_insn (quotient, tem);
3775               emit_jump_insn (gen_jump (label5));
3776               emit_barrier ();
3777               emit_label (label3);
3778               expand_inc (adjusted_op0, const1_rtx);
3779               emit_label (label4);
3780               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3781                                   quotient, 0, OPTAB_LIB_WIDEN);
3782               if (tem != quotient)
3783                 emit_move_insn (quotient, tem);
3784               expand_inc (quotient, const1_rtx);
3785               emit_label (label5);
3786             }
3787           }
3788         break;
3789
3790       case EXACT_DIV_EXPR:
3791         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3792           {
3793             HOST_WIDE_INT d = INTVAL (op1);
3794             unsigned HOST_WIDE_INT ml;
3795             int pre_shift;
3796             rtx t1;
3797
3798             pre_shift = floor_log2 (d & -d);
3799             ml = invert_mod2n (d >> pre_shift, size);
3800             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3801                                build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
3802             quotient = expand_mult (compute_mode, t1,
3803                                     gen_int_mode (ml, compute_mode),
3804                                     NULL_RTX, 0);
3805
3806             insn = get_last_insn ();
3807             set_unique_reg_note (insn,
3808                                  REG_EQUAL,
3809                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3810                                                  compute_mode,
3811                                                  op0, op1));
3812           }
3813         break;
3814
3815       case ROUND_DIV_EXPR:
3816       case ROUND_MOD_EXPR:
3817         if (unsignedp)
3818           {
3819             rtx tem;
3820             rtx label;
3821             label = gen_label_rtx ();
3822             quotient = gen_reg_rtx (compute_mode);
3823             remainder = gen_reg_rtx (compute_mode);
3824             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3825               {
3826                 rtx tem;
3827                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3828                                          quotient, 1, OPTAB_LIB_WIDEN);
3829                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3830                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3831                                           remainder, 1, OPTAB_LIB_WIDEN);
3832               }
3833             tem = plus_constant (op1, -1);
3834             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3835                                 build_int_2 (1, 0), NULL_RTX, 1);
3836             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3837             expand_inc (quotient, const1_rtx);
3838             expand_dec (remainder, op1);
3839             emit_label (label);
3840           }
3841         else
3842           {
3843             rtx abs_rem, abs_op1, tem, mask;
3844             rtx label;
3845             label = gen_label_rtx ();
3846             quotient = gen_reg_rtx (compute_mode);
3847             remainder = gen_reg_rtx (compute_mode);
3848             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3849               {
3850                 rtx tem;
3851                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3852                                          quotient, 0, OPTAB_LIB_WIDEN);
3853                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3854                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3855                                           remainder, 0, OPTAB_LIB_WIDEN);
3856               }
3857             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
3858             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
3859             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3860                                 build_int_2 (1, 0), NULL_RTX, 1);
3861             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3862             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3863                                 NULL_RTX, 0, OPTAB_WIDEN);
3864             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3865                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3866             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3867                                 NULL_RTX, 0, OPTAB_WIDEN);
3868             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3869                                 NULL_RTX, 0, OPTAB_WIDEN);
3870             expand_inc (quotient, tem);
3871             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3872                                 NULL_RTX, 0, OPTAB_WIDEN);
3873             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3874                                 NULL_RTX, 0, OPTAB_WIDEN);
3875             expand_dec (remainder, tem);
3876             emit_label (label);
3877           }
3878         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3879
3880       default:
3881         abort ();
3882       }
3883
3884   if (quotient == 0)
3885     {
3886       if (target && GET_MODE (target) != compute_mode)
3887         target = 0;
3888
3889       if (rem_flag)
3890         {
3891           /* Try to produce the remainder without producing the quotient.
3892              If we seem to have a divmod pattern that does not require widening,
3893              don't try widening here.  We should really have a WIDEN argument
3894              to expand_twoval_binop, since what we'd really like to do here is
3895              1) try a mod insn in compute_mode
3896              2) try a divmod insn in compute_mode
3897              3) try a div insn in compute_mode and multiply-subtract to get
3898                 remainder
3899              4) try the same things with widening allowed.  */
3900           remainder
3901             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3902                                  op0, op1, target,
3903                                  unsignedp,
3904                                  ((optab2->handlers[(int) compute_mode].insn_code
3905                                    != CODE_FOR_nothing)
3906                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
3907           if (remainder == 0)
3908             {
3909               /* No luck there.  Can we do remainder and divide at once
3910                  without a library call?  */
3911               remainder = gen_reg_rtx (compute_mode);
3912               if (! expand_twoval_binop ((unsignedp
3913                                           ? udivmod_optab
3914                                           : sdivmod_optab),
3915                                          op0, op1,
3916                                          NULL_RTX, remainder, unsignedp))
3917                 remainder = 0;
3918             }
3919
3920           if (remainder)
3921             return gen_lowpart (mode, remainder);
3922         }
3923
3924       /* Produce the quotient.  Try a quotient insn, but not a library call.
3925          If we have a divmod in this mode, use it in preference to widening
3926          the div (for this test we assume it will not fail). Note that optab2
3927          is set to the one of the two optabs that the call below will use.  */
3928       quotient
3929         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3930                              op0, op1, rem_flag ? NULL_RTX : target,
3931                              unsignedp,
3932                              ((optab2->handlers[(int) compute_mode].insn_code
3933                                != CODE_FOR_nothing)
3934                               ? OPTAB_DIRECT : OPTAB_WIDEN));
3935
3936       if (quotient == 0)
3937         {
3938           /* No luck there.  Try a quotient-and-remainder insn,
3939              keeping the quotient alone.  */
3940           quotient = gen_reg_rtx (compute_mode);
3941           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3942                                      op0, op1,
3943                                      quotient, NULL_RTX, unsignedp))
3944             {
3945               quotient = 0;
3946               if (! rem_flag)
3947                 /* Still no luck.  If we are not computing the remainder,
3948                    use a library call for the quotient.  */
3949                 quotient = sign_expand_binop (compute_mode,
3950                                               udiv_optab, sdiv_optab,
3951                                               op0, op1, target,
3952                                               unsignedp, OPTAB_LIB_WIDEN);
3953             }
3954         }
3955     }
3956
3957   if (rem_flag)
3958     {
3959       if (target && GET_MODE (target) != compute_mode)
3960         target = 0;
3961
3962       if (quotient == 0)
3963         /* No divide instruction either.  Use library for remainder.  */
3964         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3965                                        op0, op1, target,
3966                                        unsignedp, OPTAB_LIB_WIDEN);
3967       else
3968         {
3969           /* We divided.  Now finish doing X - Y * (X / Y).  */
3970           remainder = expand_mult (compute_mode, quotient, op1,
3971                                    NULL_RTX, unsignedp);
3972           remainder = expand_binop (compute_mode, sub_optab, op0,
3973                                     remainder, target, unsignedp,
3974                                     OPTAB_LIB_WIDEN);
3975         }
3976     }
3977
3978   return gen_lowpart (mode, rem_flag ? remainder : quotient);
3979 }
3980 \f
3981 /* Return a tree node with data type TYPE, describing the value of X.
3982    Usually this is an RTL_EXPR, if there is no obvious better choice.
3983    X may be an expression, however we only support those expressions
3984    generated by loop.c.  */
3985
3986 tree
3987 make_tree (tree type, rtx x)
3988 {
3989   tree t;
3990
3991   switch (GET_CODE (x))
3992     {
3993     case CONST_INT:
3994       t = build_int_2 (INTVAL (x),
3995                        (TREE_UNSIGNED (type)
3996                         && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
3997                        || INTVAL (x) >= 0 ? 0 : -1);
3998       TREE_TYPE (t) = type;
3999       return t;
4000
4001     case CONST_DOUBLE:
4002       if (GET_MODE (x) == VOIDmode)
4003         {
4004           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4005           TREE_TYPE (t) = type;
4006         }
4007       else
4008         {
4009           REAL_VALUE_TYPE d;
4010
4011           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4012           t = build_real (type, d);
4013         }
4014
4015       return t;
4016
4017     case CONST_VECTOR:
4018       {
4019         int i, units;
4020         rtx elt;
4021         tree t = NULL_TREE;
4022
4023         units = CONST_VECTOR_NUNITS (x);
4024
4025         /* Build a tree with vector elements.  */
4026         for (i = units - 1; i >= 0; --i)
4027           {
4028             elt = CONST_VECTOR_ELT (x, i);
4029             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4030           }
4031
4032         return build_vector (type, t);
4033       }
4034
4035     case PLUS:
4036       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4037                           make_tree (type, XEXP (x, 1))));
4038
4039     case MINUS:
4040       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4041                           make_tree (type, XEXP (x, 1))));
4042
4043     case NEG:
4044       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4045
4046     case MULT:
4047       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4048                           make_tree (type, XEXP (x, 1))));
4049
4050     case ASHIFT:
4051       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4052                           make_tree (type, XEXP (x, 1))));
4053
4054     case LSHIFTRT:
4055       t = (*lang_hooks.types.unsigned_type) (type);
4056       return fold (convert (type,
4057                             build (RSHIFT_EXPR, t,
4058                                    make_tree (t, XEXP (x, 0)),
4059                                    make_tree (type, XEXP (x, 1)))));
4060
4061     case ASHIFTRT:
4062       t = (*lang_hooks.types.signed_type) (type);
4063       return fold (convert (type,
4064                             build (RSHIFT_EXPR, t,
4065                                    make_tree (t, XEXP (x, 0)),
4066                                    make_tree (type, XEXP (x, 1)))));
4067
4068     case DIV:
4069       if (TREE_CODE (type) != REAL_TYPE)
4070         t = (*lang_hooks.types.signed_type) (type);
4071       else
4072         t = type;
4073
4074       return fold (convert (type,
4075                             build (TRUNC_DIV_EXPR, t,
4076                                    make_tree (t, XEXP (x, 0)),
4077                                    make_tree (t, XEXP (x, 1)))));
4078     case UDIV:
4079       t = (*lang_hooks.types.unsigned_type) (type);
4080       return fold (convert (type,
4081                             build (TRUNC_DIV_EXPR, t,
4082                                    make_tree (t, XEXP (x, 0)),
4083                                    make_tree (t, XEXP (x, 1)))));
4084
4085     case SIGN_EXTEND:
4086     case ZERO_EXTEND:
4087       t = (*lang_hooks.types.type_for_mode) (GET_MODE (XEXP (x, 0)),
4088                                              GET_CODE (x) == ZERO_EXTEND);
4089       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4090
4091    default:
4092       t = make_node (RTL_EXPR);
4093       TREE_TYPE (t) = type;
4094
4095 #ifdef POINTERS_EXTEND_UNSIGNED
4096       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4097          ptr_mode.  So convert.  */
4098       if (POINTER_TYPE_P (type) && GET_MODE (x) != TYPE_MODE (type))
4099         x = convert_memory_address (TYPE_MODE (type), x);
4100 #endif
4101
4102       RTL_EXPR_RTL (t) = x;
4103       /* There are no insns to be output
4104          when this rtl_expr is used.  */
4105       RTL_EXPR_SEQUENCE (t) = 0;
4106       return t;
4107     }
4108 }
4109
4110 /* Check whether the multiplication X * MULT + ADD overflows.
4111    X, MULT and ADD must be CONST_*.
4112    MODE is the machine mode for the computation.
4113    X and MULT must have mode MODE.  ADD may have a different mode.
4114    So can X (defaults to same as MODE).
4115    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4116
4117 bool
4118 const_mult_add_overflow_p (rtx x, rtx mult, rtx add, enum machine_mode mode, int unsignedp)
4119 {
4120   tree type, mult_type, add_type, result;
4121
4122   type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4123
4124   /* In order to get a proper overflow indication from an unsigned
4125      type, we have to pretend that it's a sizetype.  */
4126   mult_type = type;
4127   if (unsignedp)
4128     {
4129       mult_type = copy_node (type);
4130       TYPE_IS_SIZETYPE (mult_type) = 1;
4131     }
4132
4133   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4134               : (*lang_hooks.types.type_for_mode) (GET_MODE (add), unsignedp));
4135
4136   result = fold (build (PLUS_EXPR, mult_type,
4137                         fold (build (MULT_EXPR, mult_type,
4138                                      make_tree (mult_type, x),
4139                                      make_tree (mult_type, mult))),
4140                         make_tree (add_type, add)));
4141
4142   return TREE_CONSTANT_OVERFLOW (result);
4143 }
4144
4145 /* Return an rtx representing the value of X * MULT + ADD.
4146    TARGET is a suggestion for where to store the result (an rtx).
4147    MODE is the machine mode for the computation.
4148    X and MULT must have mode MODE.  ADD may have a different mode.
4149    So can X (defaults to same as MODE).
4150    UNSIGNEDP is nonzero to do unsigned multiplication.
4151    This may emit insns.  */
4152
4153 rtx
4154 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4155                  int unsignedp)
4156 {
4157   tree type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4158   tree add_type = (GET_MODE (add) == VOIDmode
4159                    ? type: (*lang_hooks.types.type_for_mode) (GET_MODE (add),
4160                                                               unsignedp));
4161   tree result =  fold (build (PLUS_EXPR, type,
4162                               fold (build (MULT_EXPR, type,
4163                                            make_tree (type, x),
4164                                            make_tree (type, mult))),
4165                               make_tree (add_type, add)));
4166
4167   return expand_expr (result, target, VOIDmode, 0);
4168 }
4169 \f
4170 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4171    and returning TARGET.
4172
4173    If TARGET is 0, a pseudo-register or constant is returned.  */
4174
4175 rtx
4176 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4177 {
4178   rtx tem = 0;
4179
4180   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4181     tem = simplify_binary_operation (AND, mode, op0, op1);
4182   if (tem == 0)
4183     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4184
4185   if (target == 0)
4186     target = tem;
4187   else if (tem != target)
4188     emit_move_insn (target, tem);
4189   return target;
4190 }
4191 \f
4192 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4193    and storing in TARGET.  Normally return TARGET.
4194    Return 0 if that cannot be done.
4195
4196    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4197    it is VOIDmode, they cannot both be CONST_INT.
4198
4199    UNSIGNEDP is for the case where we have to widen the operands
4200    to perform the operation.  It says to use zero-extension.
4201
4202    NORMALIZEP is 1 if we should convert the result to be either zero
4203    or one.  Normalize is -1 if we should convert the result to be
4204    either zero or -1.  If NORMALIZEP is zero, the result will be left
4205    "raw" out of the scc insn.  */
4206
4207 rtx
4208 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4209                  enum machine_mode mode, int unsignedp, int normalizep)
4210 {
4211   rtx subtarget;
4212   enum insn_code icode;
4213   enum machine_mode compare_mode;
4214   enum machine_mode target_mode = GET_MODE (target);
4215   rtx tem;
4216   rtx last = get_last_insn ();
4217   rtx pattern, comparison;
4218
4219   /* ??? Ok to do this and then fail? */
4220   op0 = protect_from_queue (op0, 0);
4221   op1 = protect_from_queue (op1, 0);
4222
4223   if (unsignedp)
4224     code = unsigned_condition (code);
4225
4226   /* If one operand is constant, make it the second one.  Only do this
4227      if the other operand is not constant as well.  */
4228
4229   if (swap_commutative_operands_p (op0, op1))
4230     {
4231       tem = op0;
4232       op0 = op1;
4233       op1 = tem;
4234       code = swap_condition (code);
4235     }
4236
4237   if (mode == VOIDmode)
4238     mode = GET_MODE (op0);
4239
4240   /* For some comparisons with 1 and -1, we can convert this to
4241      comparisons with zero.  This will often produce more opportunities for
4242      store-flag insns.  */
4243
4244   switch (code)
4245     {
4246     case LT:
4247       if (op1 == const1_rtx)
4248         op1 = const0_rtx, code = LE;
4249       break;
4250     case LE:
4251       if (op1 == constm1_rtx)
4252         op1 = const0_rtx, code = LT;
4253       break;
4254     case GE:
4255       if (op1 == const1_rtx)
4256         op1 = const0_rtx, code = GT;
4257       break;
4258     case GT:
4259       if (op1 == constm1_rtx)
4260         op1 = const0_rtx, code = GE;
4261       break;
4262     case GEU:
4263       if (op1 == const1_rtx)
4264         op1 = const0_rtx, code = NE;
4265       break;
4266     case LTU:
4267       if (op1 == const1_rtx)
4268         op1 = const0_rtx, code = EQ;
4269       break;
4270     default:
4271       break;
4272     }
4273
4274   /* If we are comparing a double-word integer with zero, we can convert
4275      the comparison into one involving a single word.  */
4276   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4277       && GET_MODE_CLASS (mode) == MODE_INT
4278       && op1 == const0_rtx
4279       && (GET_CODE (op0) != MEM || ! MEM_VOLATILE_P (op0)))
4280     {
4281       if (code == EQ || code == NE)
4282         {
4283           rtx op00, op01, op0both;
4284
4285           /* Do a logical OR of the two words and compare the result.  */
4286           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
4287           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
4288           op0both = expand_binop (word_mode, ior_optab, op00, op01,
4289                                   NULL_RTX, unsignedp, OPTAB_DIRECT);
4290           if (op0both != 0)
4291             return emit_store_flag (target, code, op0both, op1, word_mode,
4292                                     unsignedp, normalizep);
4293         }
4294       else if (code == LT || code == GE)
4295         {
4296           rtx op0h;
4297
4298           /* If testing the sign bit, can just test on high word.  */
4299           op0h = simplify_gen_subreg (word_mode, op0, mode,
4300                                       subreg_highpart_offset (word_mode, mode));
4301           return emit_store_flag (target, code, op0h, op1, word_mode,
4302                                   unsignedp, normalizep);
4303         }
4304     }
4305
4306   /* From now on, we won't change CODE, so set ICODE now.  */
4307   icode = setcc_gen_code[(int) code];
4308
4309   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4310      complement of A (for GE) and shifting the sign bit to the low bit.  */
4311   if (op1 == const0_rtx && (code == LT || code == GE)
4312       && GET_MODE_CLASS (mode) == MODE_INT
4313       && (normalizep || STORE_FLAG_VALUE == 1
4314           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4315               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4316                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4317     {
4318       subtarget = target;
4319
4320       /* If the result is to be wider than OP0, it is best to convert it
4321          first.  If it is to be narrower, it is *incorrect* to convert it
4322          first.  */
4323       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4324         {
4325           op0 = protect_from_queue (op0, 0);
4326           op0 = convert_modes (target_mode, mode, op0, 0);
4327           mode = target_mode;
4328         }
4329
4330       if (target_mode != mode)
4331         subtarget = 0;
4332
4333       if (code == GE)
4334         op0 = expand_unop (mode, one_cmpl_optab, op0,
4335                            ((STORE_FLAG_VALUE == 1 || normalizep)
4336                             ? 0 : subtarget), 0);
4337
4338       if (STORE_FLAG_VALUE == 1 || normalizep)
4339         /* If we are supposed to produce a 0/1 value, we want to do
4340            a logical shift from the sign bit to the low-order bit; for
4341            a -1/0 value, we do an arithmetic shift.  */
4342         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4343                             size_int (GET_MODE_BITSIZE (mode) - 1),
4344                             subtarget, normalizep != -1);
4345
4346       if (mode != target_mode)
4347         op0 = convert_modes (target_mode, mode, op0, 0);
4348
4349       return op0;
4350     }
4351
4352   if (icode != CODE_FOR_nothing)
4353     {
4354       insn_operand_predicate_fn pred;
4355
4356       /* We think we may be able to do this with a scc insn.  Emit the
4357          comparison and then the scc insn.
4358
4359          compare_from_rtx may call emit_queue, which would be deleted below
4360          if the scc insn fails.  So call it ourselves before setting LAST.
4361          Likewise for do_pending_stack_adjust.  */
4362
4363       emit_queue ();
4364       do_pending_stack_adjust ();
4365       last = get_last_insn ();
4366
4367       comparison
4368         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4369       if (GET_CODE (comparison) == CONST_INT)
4370         return (comparison == const0_rtx ? const0_rtx
4371                 : normalizep == 1 ? const1_rtx
4372                 : normalizep == -1 ? constm1_rtx
4373                 : const_true_rtx);
4374
4375       /* The code of COMPARISON may not match CODE if compare_from_rtx
4376          decided to swap its operands and reverse the original code.
4377
4378          We know that compare_from_rtx returns either a CONST_INT or
4379          a new comparison code, so it is safe to just extract the
4380          code from COMPARISON.  */
4381       code = GET_CODE (comparison);
4382
4383       /* Get a reference to the target in the proper mode for this insn.  */
4384       compare_mode = insn_data[(int) icode].operand[0].mode;
4385       subtarget = target;
4386       pred = insn_data[(int) icode].operand[0].predicate;
4387       if (preserve_subexpressions_p ()
4388           || ! (*pred) (subtarget, compare_mode))
4389         subtarget = gen_reg_rtx (compare_mode);
4390
4391       pattern = GEN_FCN (icode) (subtarget);
4392       if (pattern)
4393         {
4394           emit_insn (pattern);
4395
4396           /* If we are converting to a wider mode, first convert to
4397              TARGET_MODE, then normalize.  This produces better combining
4398              opportunities on machines that have a SIGN_EXTRACT when we are
4399              testing a single bit.  This mostly benefits the 68k.
4400
4401              If STORE_FLAG_VALUE does not have the sign bit set when
4402              interpreted in COMPARE_MODE, we can do this conversion as
4403              unsigned, which is usually more efficient.  */
4404           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4405             {
4406               convert_move (target, subtarget,
4407                             (GET_MODE_BITSIZE (compare_mode)
4408                              <= HOST_BITS_PER_WIDE_INT)
4409                             && 0 == (STORE_FLAG_VALUE
4410                                      & ((HOST_WIDE_INT) 1
4411                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4412               op0 = target;
4413               compare_mode = target_mode;
4414             }
4415           else
4416             op0 = subtarget;
4417
4418           /* If we want to keep subexpressions around, don't reuse our
4419              last target.  */
4420
4421           if (preserve_subexpressions_p ())
4422             subtarget = 0;
4423
4424           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4425              we don't have to do anything.  */
4426           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4427             ;
4428           /* STORE_FLAG_VALUE might be the most negative number, so write
4429              the comparison this way to avoid a compiler-time warning.  */
4430           else if (- normalizep == STORE_FLAG_VALUE)
4431             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4432
4433           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4434              makes it hard to use a value of just the sign bit due to
4435              ANSI integer constant typing rules.  */
4436           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4437                    && (STORE_FLAG_VALUE
4438                        & ((HOST_WIDE_INT) 1
4439                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4440             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4441                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4442                                 subtarget, normalizep == 1);
4443           else if (STORE_FLAG_VALUE & 1)
4444             {
4445               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4446               if (normalizep == -1)
4447                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4448             }
4449           else
4450             abort ();
4451
4452           /* If we were converting to a smaller mode, do the
4453              conversion now.  */
4454           if (target_mode != compare_mode)
4455             {
4456               convert_move (target, op0, 0);
4457               return target;
4458             }
4459           else
4460             return op0;
4461         }
4462     }
4463
4464   delete_insns_since (last);
4465
4466   /* If expensive optimizations, use different pseudo registers for each
4467      insn, instead of reusing the same pseudo.  This leads to better CSE,
4468      but slows down the compiler, since there are more pseudos */
4469   subtarget = (!flag_expensive_optimizations
4470                && (target_mode == mode)) ? target : NULL_RTX;
4471
4472   /* If we reached here, we can't do this with a scc insn.  However, there
4473      are some comparisons that can be done directly.  For example, if
4474      this is an equality comparison of integers, we can try to exclusive-or
4475      (or subtract) the two operands and use a recursive call to try the
4476      comparison with zero.  Don't do any of these cases if branches are
4477      very cheap.  */
4478
4479   if (BRANCH_COST > 0
4480       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4481       && op1 != const0_rtx)
4482     {
4483       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4484                           OPTAB_WIDEN);
4485
4486       if (tem == 0)
4487         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4488                             OPTAB_WIDEN);
4489       if (tem != 0)
4490         tem = emit_store_flag (target, code, tem, const0_rtx,
4491                                mode, unsignedp, normalizep);
4492       if (tem == 0)
4493         delete_insns_since (last);
4494       return tem;
4495     }
4496
4497   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4498      the constant zero.  Reject all other comparisons at this point.  Only
4499      do LE and GT if branches are expensive since they are expensive on
4500      2-operand machines.  */
4501
4502   if (BRANCH_COST == 0
4503       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4504       || (code != EQ && code != NE
4505           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4506     return 0;
4507
4508   /* See what we need to return.  We can only return a 1, -1, or the
4509      sign bit.  */
4510
4511   if (normalizep == 0)
4512     {
4513       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4514         normalizep = STORE_FLAG_VALUE;
4515
4516       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4517                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4518                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4519         ;
4520       else
4521         return 0;
4522     }
4523
4524   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4525      do the necessary operation below.  */
4526
4527   tem = 0;
4528
4529   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4530      the sign bit set.  */
4531
4532   if (code == LE)
4533     {
4534       /* This is destructive, so SUBTARGET can't be OP0.  */
4535       if (rtx_equal_p (subtarget, op0))
4536         subtarget = 0;
4537
4538       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4539                           OPTAB_WIDEN);
4540       if (tem)
4541         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4542                             OPTAB_WIDEN);
4543     }
4544
4545   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4546      number of bits in the mode of OP0, minus one.  */
4547
4548   if (code == GT)
4549     {
4550       if (rtx_equal_p (subtarget, op0))
4551         subtarget = 0;
4552
4553       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4554                           size_int (GET_MODE_BITSIZE (mode) - 1),
4555                           subtarget, 0);
4556       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4557                           OPTAB_WIDEN);
4558     }
4559
4560   if (code == EQ || code == NE)
4561     {
4562       /* For EQ or NE, one way to do the comparison is to apply an operation
4563          that converts the operand into a positive number if it is nonzero
4564          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4565          for NE we negate.  This puts the result in the sign bit.  Then we
4566          normalize with a shift, if needed.
4567
4568          Two operations that can do the above actions are ABS and FFS, so try
4569          them.  If that doesn't work, and MODE is smaller than a full word,
4570          we can use zero-extension to the wider mode (an unsigned conversion)
4571          as the operation.  */
4572
4573       /* Note that ABS doesn't yield a positive number for INT_MIN, but
4574          that is compensated by the subsequent overflow when subtracting
4575          one / negating.  */
4576
4577       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4578         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4579       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4580         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4581       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4582         {
4583           op0 = protect_from_queue (op0, 0);
4584           tem = convert_modes (word_mode, mode, op0, 1);
4585           mode = word_mode;
4586         }
4587
4588       if (tem != 0)
4589         {
4590           if (code == EQ)
4591             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4592                                 0, OPTAB_WIDEN);
4593           else
4594             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4595         }
4596
4597       /* If we couldn't do it that way, for NE we can "or" the two's complement
4598          of the value with itself.  For EQ, we take the one's complement of
4599          that "or", which is an extra insn, so we only handle EQ if branches
4600          are expensive.  */
4601
4602       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4603         {
4604           if (rtx_equal_p (subtarget, op0))
4605             subtarget = 0;
4606
4607           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4608           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4609                               OPTAB_WIDEN);
4610
4611           if (tem && code == EQ)
4612             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4613         }
4614     }
4615
4616   if (tem && normalizep)
4617     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4618                         size_int (GET_MODE_BITSIZE (mode) - 1),
4619                         subtarget, normalizep == 1);
4620
4621   if (tem)
4622     {
4623       if (GET_MODE (tem) != target_mode)
4624         {
4625           convert_move (target, tem, 0);
4626           tem = target;
4627         }
4628       else if (!subtarget)
4629         {
4630           emit_move_insn (target, tem);
4631           tem = target;
4632         }
4633     }
4634   else
4635     delete_insns_since (last);
4636
4637   return tem;
4638 }
4639
4640 /* Like emit_store_flag, but always succeeds.  */
4641
4642 rtx
4643 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
4644                        enum machine_mode mode, int unsignedp, int normalizep)
4645 {
4646   rtx tem, label;
4647
4648   /* First see if emit_store_flag can do the job.  */
4649   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4650   if (tem != 0)
4651     return tem;
4652
4653   if (normalizep == 0)
4654     normalizep = 1;
4655
4656   /* If this failed, we have to do this with set/compare/jump/set code.  */
4657
4658   if (GET_CODE (target) != REG
4659       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4660     target = gen_reg_rtx (GET_MODE (target));
4661
4662   emit_move_insn (target, const1_rtx);
4663   label = gen_label_rtx ();
4664   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
4665                            NULL_RTX, label);
4666
4667   emit_move_insn (target, const0_rtx);
4668   emit_label (label);
4669
4670   return target;
4671 }
4672 \f
4673 /* Perform possibly multi-word comparison and conditional jump to LABEL
4674    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4675
4676    The algorithm is based on the code in expr.c:do_jump.
4677
4678    Note that this does not perform a general comparison.  Only variants
4679    generated within expmed.c are correctly handled, others abort (but could
4680    be handled if needed).  */
4681
4682 static void
4683 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
4684                  rtx label)
4685 {
4686   /* If this mode is an integer too wide to compare properly,
4687      compare word by word.  Rely on cse to optimize constant cases.  */
4688
4689   if (GET_MODE_CLASS (mode) == MODE_INT
4690       && ! can_compare_p (op, mode, ccp_jump))
4691     {
4692       rtx label2 = gen_label_rtx ();
4693
4694       switch (op)
4695         {
4696         case LTU:
4697           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4698           break;
4699
4700         case LEU:
4701           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4702           break;
4703
4704         case LT:
4705           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4706           break;
4707
4708         case GT:
4709           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4710           break;
4711
4712         case GE:
4713           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4714           break;
4715
4716           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
4717              that's the only equality operations we do */
4718         case EQ:
4719           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4720             abort ();
4721           do_jump_by_parts_equality_rtx (arg1, label2, label);
4722           break;
4723
4724         case NE:
4725           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4726             abort ();
4727           do_jump_by_parts_equality_rtx (arg1, label, label2);
4728           break;
4729
4730         default:
4731           abort ();
4732         }
4733
4734       emit_label (label2);
4735     }
4736   else
4737     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
4738 }