gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54
  55 /* Nonzero means divides or modulus operations are relatively cheap for
  56    powers of two, so don't use branches; emit the operation instead.
  57    Usually, this will mean that the MD file will emit non-branch
  58    sequences.  */
  59
  60 static int sdiv_pow2_cheap, smod_pow2_cheap;
  61
  62 #ifndef SLOW_UNALIGNED_ACCESS
  63 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  64 #endif
  65
  66 /* For compilers that support multiple targets with different word sizes,
  67    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  68    is the H8/300(H) compiler.  */
  69
  70 #ifndef MAX_BITS_PER_WORD
  71 #define MAX_BITS_PER_WORD BITS_PER_WORD
  72 #endif
  73
  74 /* Reduce conditional compilation elsewhere.  */
  75 #ifndef HAVE_insv
  76 #define HAVE_insv       0
  77 #define CODE_FOR_insv   CODE_FOR_nothing
  78 #define gen_insv(a,b,c,d) NULL_RTX
  79 #endif
  80 #ifndef HAVE_extv
  81 #define HAVE_extv       0
  82 #define CODE_FOR_extv   CODE_FOR_nothing
  83 #define gen_extv(a,b,c,d) NULL_RTX
  84 #endif
  85 #ifndef HAVE_extzv
  86 #define HAVE_extzv      0
  87 #define CODE_FOR_extzv  CODE_FOR_nothing
  88 #define gen_extzv(a,b,c,d) NULL_RTX
  89 #endif
  90
  91 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  92    shift count and some by mode.  */
  93 static int add_cost, negate_cost, zero_cost;
  94 static int shift_cost[MAX_BITS_PER_WORD];
  95 static int shiftadd_cost[MAX_BITS_PER_WORD];
  96 static int shiftsub_cost[MAX_BITS_PER_WORD];
  97 static int mul_cost[NUM_MACHINE_MODES];
  98 static int div_cost[NUM_MACHINE_MODES];
  99 static int mul_widen_cost[NUM_MACHINE_MODES];
 100 static int mul_highpart_cost[NUM_MACHINE_MODES];
 101
 102 void
 103 init_expmed (void)
 104 {
 105   rtx reg, shift_insn, shiftadd_insn, shiftsub_insn;
 106   int dummy;
 107   int m;
 108   enum machine_mode mode, wider_mode;
 109
 110   start_sequence ();
 111
 112   /* This is "some random pseudo register" for purposes of calling recog
 113      to see what insns exist.  */
 114   reg = gen_rtx_REG (word_mode, 10000);
 115
 116   zero_cost = rtx_cost (const0_rtx, 0);
 117   add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
 118
 119   shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
 120                                        gen_rtx_ASHIFT (word_mode, reg,
 121                                                        const0_rtx)));
 122
 123   shiftadd_insn
 124     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 125                               gen_rtx_PLUS (word_mode,
 126                                             gen_rtx_MULT (word_mode,
 127                                                           reg, const0_rtx),
 128                                             reg)));
 129
 130   shiftsub_insn
 131     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 132                               gen_rtx_MINUS (word_mode,
 133                                              gen_rtx_MULT (word_mode,
 134                                                            reg, const0_rtx),
 135                                              reg)));
 136
 137   init_recog ();
 138
 139   shift_cost[0] = 0;
 140   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 141
 142   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 143     {
 144       rtx c_int = GEN_INT ((HOST_WIDE_INT) 1 << m);
 145       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 146
 147       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 148       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 149         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 150
 151       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) = c_int;
 152       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 153         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 154
 155       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) = c_int;
 156       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 157         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 158     }
 159
 160   negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
 161
 162   sdiv_pow2_cheap
 163     = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
 164        <= 2 * add_cost);
 165   smod_pow2_cheap
 166     = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
 167        <= 2 * add_cost);
 168
 169   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 170        mode != VOIDmode;
 171        mode = GET_MODE_WIDER_MODE (mode))
 172     {
 173       reg = gen_rtx_REG (mode, 10000);
 174       div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
 175       mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
 176       wider_mode = GET_MODE_WIDER_MODE (mode);
 177       if (wider_mode != VOIDmode)
 178         {
 179           mul_widen_cost[(int) wider_mode]
 180             = rtx_cost (gen_rtx_MULT (wider_mode,
 181                                       gen_rtx_ZERO_EXTEND (wider_mode, reg),
 182                                       gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 183                         SET);
 184           mul_highpart_cost[(int) mode]
 185             = rtx_cost (gen_rtx_TRUNCATE
 186                         (mode,
 187                          gen_rtx_LSHIFTRT (wider_mode,
 188                                            gen_rtx_MULT (wider_mode,
 189                                                          gen_rtx_ZERO_EXTEND
 190                                                          (wider_mode, reg),
 191                                                          gen_rtx_ZERO_EXTEND
 192                                                          (wider_mode, reg)),
 193                                            GEN_INT (GET_MODE_BITSIZE (mode)))),
 194                         SET);
 195         }
 196     }
 197
 198   end_sequence ();
 199 }
 200
 201 /* Return an rtx representing minus the value of X.
 202    MODE is the intended mode of the result,
 203    useful if X is a CONST_INT.  */
 204
 205 rtx
 206 negate_rtx (enum machine_mode mode, rtx x)
 207 {
 208   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 209
 210   if (result == 0)
 211     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 212
 213   return result;
 214 }
 215
 216 /* Report on the availability of insv/extv/extzv and the desired mode
 217    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 218    is false; else the mode of the specified operand.  If OPNO is -1,
 219    all the caller cares about is whether the insn is available.  */
 220 enum machine_mode
 221 mode_for_extraction (enum extraction_pattern pattern, int opno)
 222 {
 223   const struct insn_data *data;
 224
 225   switch (pattern)
 226     {
 227     case EP_insv:
 228       if (HAVE_insv)
 229         {
 230           data = &insn_data[CODE_FOR_insv];
 231           break;
 232         }
 233       return MAX_MACHINE_MODE;
 234
 235     case EP_extv:
 236       if (HAVE_extv)
 237         {
 238           data = &insn_data[CODE_FOR_extv];
 239           break;
 240         }
 241       return MAX_MACHINE_MODE;
 242
 243     case EP_extzv:
 244       if (HAVE_extzv)
 245         {
 246           data = &insn_data[CODE_FOR_extzv];
 247           break;
 248         }
 249       return MAX_MACHINE_MODE;
 250
 251     default:
 252       abort ();
 253     }
 254
 255   if (opno == -1)
 256     return VOIDmode;
 257
 258   /* Everyone who uses this function used to follow it with
 259      if (result == VOIDmode) result = word_mode; */
 260   if (data->operand[opno].mode == VOIDmode)
 261     return word_mode;
 262   return data->operand[opno].mode;
 263 }
 264
 265 \f
 266 /* Generate code to store value from rtx VALUE
 267    into a bit-field within structure STR_RTX
 268    containing BITSIZE bits starting at bit BITNUM.
 269    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 270    ALIGN is the alignment that STR_RTX is known to have.
 271    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 272
 273 /* ??? Note that there are two different ideas here for how
 274    to determine the size to count bits within, for a register.
 275    One is BITS_PER_WORD, and the other is the size of operand 3
 276    of the insv pattern.
 277
 278    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 279    else, we use the mode of operand 3.  */
 280
 281 rtx
 282 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 283                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 284                  rtx value, HOST_WIDE_INT total_size)
 285 {
 286   unsigned int unit
 287     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 288   unsigned HOST_WIDE_INT offset = bitnum / unit;
 289   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 290   rtx op0 = str_rtx;
 291   int byte_offset;
 292
 293   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 294
 295   /* Discount the part of the structure before the desired byte.
 296      We need to know how many bytes are safe to reference after it.  */
 297   if (total_size >= 0)
 298     total_size -= (bitpos / BIGGEST_ALIGNMENT
 299                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 300
 301   while (GET_CODE (op0) == SUBREG)
 302     {
 303       /* The following line once was done only if WORDS_BIG_ENDIAN,
 304          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 305          meaningful at a much higher level; when structures are copied
 306          between memory and regs, the higher-numbered regs
 307          always get higher addresses.  */
 308       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 309       /* We used to adjust BITPOS here, but now we do the whole adjustment
 310          right after the loop.  */
 311       op0 = SUBREG_REG (op0);
 312     }
 313
 314   value = protect_from_queue (value, 0);
 315
 316   if (flag_force_mem)
 317     {
 318       int old_generating_concat_p = generating_concat_p;
 319       generating_concat_p = 0;
 320       value = force_not_mem (value);
 321       generating_concat_p = old_generating_concat_p;
 322     }
 323
 324   /* If the target is a register, overwriting the entire object, or storing
 325      a full-word or multi-word field can be done with just a SUBREG.
 326
 327      If the target is memory, storing any naturally aligned field can be
 328      done with a simple store.  For targets that support fast unaligned
 329      memory, any naturally sized, unit aligned field can be done directly.  */
 330
 331   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 332                 + (offset * UNITS_PER_WORD);
 333
 334   if (bitpos == 0
 335       && bitsize == GET_MODE_BITSIZE (fieldmode)
 336       && (GET_CODE (op0) != MEM
 337           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 338              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 339              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 340           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 341              || (offset * BITS_PER_UNIT % bitsize == 0
 342                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 343     {
 344       if (GET_MODE (op0) != fieldmode)
 345         {
 346           if (GET_CODE (op0) == SUBREG)
 347             {
 348               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 349                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 350                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 351                 op0 = SUBREG_REG (op0);
 352               else
 353                 /* Else we've got some float mode source being extracted into
 354                    a different float mode destination -- this combination of
 355                    subregs results in Severe Tire Damage.  */
 356                 abort ();
 357             }
 358           if (GET_CODE (op0) == REG)
 359             op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 360           else
 361             op0 = adjust_address (op0, fieldmode, offset);
 362         }
 363       emit_move_insn (op0, value);
 364       return value;
 365     }
 366
 367   /* Make sure we are playing with integral modes.  Pun with subregs
 368      if we aren't.  This must come after the entire register case above,
 369      since that case is valid for any mode.  The following cases are only
 370      valid for integral modes.  */
 371   {
 372     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 373     if (imode != GET_MODE (op0))
 374       {
 375         if (GET_CODE (op0) == MEM)
 376           op0 = adjust_address (op0, imode, 0);
 377         else if (imode != BLKmode)
 378           op0 = gen_lowpart (imode, op0);
 379         else
 380           abort ();
 381       }
 382   }
 383
 384   /* We may be accessing data outside the field, which means
 385      we can alias adjacent data.  */
 386   if (GET_CODE (op0) == MEM)
 387     {
 388       op0 = shallow_copy_rtx (op0);
 389       set_mem_alias_set (op0, 0);
 390       set_mem_expr (op0, 0);
 391     }
 392
 393   /* If OP0 is a register, BITPOS must count within a word.
 394      But as we have it, it counts within whatever size OP0 now has.
 395      On a bigendian machine, these are not the same, so convert.  */
 396   if (BYTES_BIG_ENDIAN
 397       && GET_CODE (op0) != MEM
 398       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 399     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 400
 401   /* Storing an lsb-aligned field in a register
 402      can be done with a movestrict instruction.  */
 403
 404   if (GET_CODE (op0) != MEM
 405       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 406       && bitsize == GET_MODE_BITSIZE (fieldmode)
 407       && (movstrict_optab->handlers[(int) fieldmode].insn_code
 408           != CODE_FOR_nothing))
 409     {
 410       int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 411
 412       /* Get appropriate low part of the value being stored.  */
 413       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 414         value = gen_lowpart (fieldmode, value);
 415       else if (!(GET_CODE (value) == SYMBOL_REF
 416                  || GET_CODE (value) == LABEL_REF
 417                  || GET_CODE (value) == CONST))
 418         value = convert_to_mode (fieldmode, value, 0);
 419
 420       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 421         value = copy_to_mode_reg (fieldmode, value);
 422
 423       if (GET_CODE (op0) == SUBREG)
 424         {
 425           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 426               || GET_MODE_CLASS (fieldmode) == MODE_INT
 427               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 428             op0 = SUBREG_REG (op0);
 429           else
 430             /* Else we've got some float mode source being extracted into
 431                a different float mode destination -- this combination of
 432                subregs results in Severe Tire Damage.  */
 433             abort ();
 434         }
 435
 436       emit_insn (GEN_FCN (icode)
 437                  (gen_rtx_SUBREG (fieldmode, op0,
 438                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 439                                   + (offset * UNITS_PER_WORD)),
 440                                   value));
 441
 442       return value;
 443     }
 444
 445   /* Handle fields bigger than a word.  */
 446
 447   if (bitsize > BITS_PER_WORD)
 448     {
 449       /* Here we transfer the words of the field
 450          in the order least significant first.
 451          This is because the most significant word is the one which may
 452          be less than full.
 453          However, only do that if the value is not BLKmode.  */
 454
 455       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 456       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 457       unsigned int i;
 458
 459       /* This is the mode we must force value to, so that there will be enough
 460          subwords to extract.  Note that fieldmode will often (always?) be
 461          VOIDmode, because that is what store_field uses to indicate that this
 462          is a bit field, but passing VOIDmode to operand_subword_force will
 463          result in an abort.  */
 464       fieldmode = GET_MODE (value);
 465       if (fieldmode == VOIDmode)
 466         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 467
 468       for (i = 0; i < nwords; i++)
 469         {
 470           /* If I is 0, use the low-order word in both field and target;
 471              if I is 1, use the next to lowest word; and so on.  */
 472           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 473           unsigned int bit_offset = (backwards
 474                                      ? MAX ((int) bitsize - ((int) i + 1)
 475                                             * BITS_PER_WORD,
 476                                             0)
 477                                      : (int) i * BITS_PER_WORD);
 478
 479           store_bit_field (op0, MIN (BITS_PER_WORD,
 480                                      bitsize - i * BITS_PER_WORD),
 481                            bitnum + bit_offset, word_mode,
 482                            operand_subword_force (value, wordnum, fieldmode),
 483                            total_size);
 484         }
 485       return value;
 486     }
 487
 488   /* From here on we can assume that the field to be stored in is
 489      a full-word (whatever type that is), since it is shorter than a word.  */
 490
 491   /* OFFSET is the number of words or bytes (UNIT says which)
 492      from STR_RTX to the first word or byte containing part of the field.  */
 493
 494   if (GET_CODE (op0) != MEM)
 495     {
 496       if (offset != 0
 497           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 498         {
 499           if (GET_CODE (op0) != REG)
 500             {
 501               /* Since this is a destination (lvalue), we can't copy it to a
 502                  pseudo.  We can trivially remove a SUBREG that does not
 503                  change the size of the operand.  Such a SUBREG may have been
 504                  added above.  Otherwise, abort.  */
 505               if (GET_CODE (op0) == SUBREG
 506                   && (GET_MODE_SIZE (GET_MODE (op0))
 507                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 508                 op0 = SUBREG_REG (op0);
 509               else
 510                 abort ();
 511             }
 512           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 513                                 op0, (offset * UNITS_PER_WORD));
 514         }
 515       offset = 0;
 516     }
 517   else
 518     op0 = protect_from_queue (op0, 1);
 519
 520   /* If VALUE is a floating-point mode, access it as an integer of the
 521      corresponding size.  This can occur on a machine with 64 bit registers
 522      that uses SFmode for float.  This can also occur for unaligned float
 523      structure fields.  */
 524   if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 525       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 526     value = gen_lowpart ((GET_MODE (value) == VOIDmode
 527                           ? word_mode : int_mode_for_mode (GET_MODE (value))),
 528                          value);
 529
 530   /* Now OFFSET is nonzero only if OP0 is memory
 531      and is therefore always measured in bytes.  */
 532
 533   if (HAVE_insv
 534       && GET_MODE (value) != BLKmode
 535       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 536       /* Ensure insv's size is wide enough for this field.  */
 537       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 538       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 539             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 540     {
 541       int xbitpos = bitpos;
 542       rtx value1;
 543       rtx xop0 = op0;
 544       rtx last = get_last_insn ();
 545       rtx pat;
 546       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 547       int save_volatile_ok = volatile_ok;
 548
 549       volatile_ok = 1;
 550
 551       /* If this machine's insv can only insert into a register, copy OP0
 552          into a register and save it back later.  */
 553       /* This used to check flag_force_mem, but that was a serious
 554          de-optimization now that flag_force_mem is enabled by -O2.  */
 555       if (GET_CODE (op0) == MEM
 556           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 557                 (op0, VOIDmode)))
 558         {
 559           rtx tempreg;
 560           enum machine_mode bestmode;
 561
 562           /* Get the mode to use for inserting into this field.  If OP0 is
 563              BLKmode, get the smallest mode consistent with the alignment. If
 564              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 565              mode. Otherwise, use the smallest mode containing the field.  */
 566
 567           if (GET_MODE (op0) == BLKmode
 568               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 569             bestmode
 570               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 571                                MEM_VOLATILE_P (op0));
 572           else
 573             bestmode = GET_MODE (op0);
 574
 575           if (bestmode == VOIDmode
 576               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 577                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 578             goto insv_loses;
 579
 580           /* Adjust address to point to the containing unit of that mode.
 581              Compute offset as multiple of this unit, counting in bytes.  */
 582           unit = GET_MODE_BITSIZE (bestmode);
 583           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 584           bitpos = bitnum % unit;
 585           op0 = adjust_address (op0, bestmode,  offset);
 586
 587           /* Fetch that unit, store the bitfield in it, then store
 588              the unit.  */
 589           tempreg = copy_to_reg (op0);
 590           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 591                            total_size);
 592           emit_move_insn (op0, tempreg);
 593           return value;
 594         }
 595       volatile_ok = save_volatile_ok;
 596
 597       /* Add OFFSET into OP0's address.  */
 598       if (GET_CODE (xop0) == MEM)
 599         xop0 = adjust_address (xop0, byte_mode, offset);
 600
 601       /* If xop0 is a register, we need it in MAXMODE
 602          to make it acceptable to the format of insv.  */
 603       if (GET_CODE (xop0) == SUBREG)
 604         /* We can't just change the mode, because this might clobber op0,
 605            and we will need the original value of op0 if insv fails.  */
 606         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 607       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 608         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 609
 610       /* On big-endian machines, we count bits from the most significant.
 611          If the bit field insn does not, we must invert.  */
 612
 613       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 614         xbitpos = unit - bitsize - xbitpos;
 615
 616       /* We have been counting XBITPOS within UNIT.
 617          Count instead within the size of the register.  */
 618       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 619         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 620
 621       unit = GET_MODE_BITSIZE (maxmode);
 622
 623       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 624       value1 = value;
 625       if (GET_MODE (value) != maxmode)
 626         {
 627           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 628             {
 629               /* Optimization: Don't bother really extending VALUE
 630                  if it has all the bits we will actually use.  However,
 631                  if we must narrow it, be sure we do it correctly.  */
 632
 633               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 634                 {
 635                   rtx tmp;
 636
 637                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 638                   if (! tmp)
 639                     tmp = simplify_gen_subreg (maxmode,
 640                                                force_reg (GET_MODE (value),
 641                                                           value1),
 642                                                GET_MODE (value), 0);
 643                   value1 = tmp;
 644                 }
 645               else
 646                 value1 = gen_lowpart (maxmode, value1);
 647             }
 648           else if (GET_CODE (value) == CONST_INT)
 649             value1 = gen_int_mode (INTVAL (value), maxmode);
 650           else if (!CONSTANT_P (value))
 651             /* Parse phase is supposed to make VALUE's data type
 652                match that of the component reference, which is a type
 653                at least as wide as the field; so VALUE should have
 654                a mode that corresponds to that type.  */
 655             abort ();
 656         }
 657
 658       /* If this machine's insv insists on a register,
 659          get VALUE1 into a register.  */
 660       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 661              (value1, maxmode)))
 662         value1 = force_reg (maxmode, value1);
 663
 664       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 665       if (pat)
 666         emit_insn (pat);
 667       else
 668         {
 669           delete_insns_since (last);
 670           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 671         }
 672     }
 673   else
 674     insv_loses:
 675     /* Insv is not available; store using shifts and boolean ops.  */
 676     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 677   return value;
 678 }
 679 \f
 680 /* Use shifts and boolean operations to store VALUE
 681    into a bit field of width BITSIZE
 682    in a memory location specified by OP0 except offset by OFFSET bytes.
 683      (OFFSET must be 0 if OP0 is a register.)
 684    The field starts at position BITPOS within the byte.
 685     (If OP0 is a register, it may be a full word or a narrower mode,
 686      but BITPOS still counts within a full word,
 687      which is significant on bigendian machines.)
 688
 689    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 690
 691 static void
 692 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 693                        unsigned HOST_WIDE_INT bitsize,
 694                        unsigned HOST_WIDE_INT bitpos, rtx value)
 695 {
 696   enum machine_mode mode;
 697   unsigned int total_bits = BITS_PER_WORD;
 698   rtx subtarget, temp;
 699   int all_zero = 0;
 700   int all_one = 0;
 701
 702   /* There is a case not handled here:
 703      a structure with a known alignment of just a halfword
 704      and a field split across two aligned halfwords within the structure.
 705      Or likewise a structure with a known alignment of just a byte
 706      and a field split across two bytes.
 707      Such cases are not supposed to be able to occur.  */
 708
 709   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 710     {
 711       if (offset != 0)
 712         abort ();
 713       /* Special treatment for a bit field split across two registers.  */
 714       if (bitsize + bitpos > BITS_PER_WORD)
 715         {
 716           store_split_bit_field (op0, bitsize, bitpos, value);
 717           return;
 718         }
 719     }
 720   else
 721     {
 722       /* Get the proper mode to use for this field.  We want a mode that
 723          includes the entire field.  If such a mode would be larger than
 724          a word, we won't be doing the extraction the normal way.
 725          We don't want a mode bigger than the destination.  */
 726
 727       mode = GET_MODE (op0);
 728       if (GET_MODE_BITSIZE (mode) == 0
 729           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 730         mode = word_mode;
 731       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 732                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 733
 734       if (mode == VOIDmode)
 735         {
 736           /* The only way this should occur is if the field spans word
 737              boundaries.  */
 738           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 739                                  value);
 740           return;
 741         }
 742
 743       total_bits = GET_MODE_BITSIZE (mode);
 744
 745       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 746          be in the range 0 to total_bits-1, and put any excess bytes in
 747          OFFSET.  */
 748       if (bitpos >= total_bits)
 749         {
 750           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 751           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 752                      * BITS_PER_UNIT);
 753         }
 754
 755       /* Get ref to an aligned byte, halfword, or word containing the field.
 756          Adjust BITPOS to be position within a word,
 757          and OFFSET to be the offset of that word.
 758          Then alter OP0 to refer to that word.  */
 759       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 760       offset -= (offset % (total_bits / BITS_PER_UNIT));
 761       op0 = adjust_address (op0, mode, offset);
 762     }
 763
 764   mode = GET_MODE (op0);
 765
 766   /* Now MODE is either some integral mode for a MEM as OP0,
 767      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 768      The bit field is contained entirely within OP0.
 769      BITPOS is the starting bit number within OP0.
 770      (OP0's mode may actually be narrower than MODE.)  */
 771
 772   if (BYTES_BIG_ENDIAN)
 773       /* BITPOS is the distance between our msb
 774          and that of the containing datum.
 775          Convert it to the distance from the lsb.  */
 776       bitpos = total_bits - bitsize - bitpos;
 777
 778   /* Now BITPOS is always the distance between our lsb
 779      and that of OP0.  */
 780
 781   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 782      we must first convert its mode to MODE.  */
 783
 784   if (GET_CODE (value) == CONST_INT)
 785     {
 786       HOST_WIDE_INT v = INTVAL (value);
 787
 788       if (bitsize < HOST_BITS_PER_WIDE_INT)
 789         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 790
 791       if (v == 0)
 792         all_zero = 1;
 793       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 794                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 795                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 796         all_one = 1;
 797
 798       value = lshift_value (mode, value, bitpos, bitsize);
 799     }
 800   else
 801     {
 802       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 803                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 804
 805       if (GET_MODE (value) != mode)
 806         {
 807           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 808               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 809             value = gen_lowpart (mode, value);
 810           else
 811             value = convert_to_mode (mode, value, 1);
 812         }
 813
 814       if (must_and)
 815         value = expand_binop (mode, and_optab, value,
 816                               mask_rtx (mode, 0, bitsize, 0),
 817                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 818       if (bitpos > 0)
 819         value = expand_shift (LSHIFT_EXPR, mode, value,
 820                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 821     }
 822
 823   /* Now clear the chosen bits in OP0,
 824      except that if VALUE is -1 we need not bother.  */
 825
 826   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 827
 828   if (! all_one)
 829     {
 830       temp = expand_binop (mode, and_optab, op0,
 831                            mask_rtx (mode, bitpos, bitsize, 1),
 832                            subtarget, 1, OPTAB_LIB_WIDEN);
 833       subtarget = temp;
 834     }
 835   else
 836     temp = op0;
 837
 838   /* Now logical-or VALUE into OP0, unless it is zero.  */
 839
 840   if (! all_zero)
 841     temp = expand_binop (mode, ior_optab, temp, value,
 842                          subtarget, 1, OPTAB_LIB_WIDEN);
 843   if (op0 != temp)
 844     emit_move_insn (op0, temp);
 845 }
 846 \f
 847 /* Store a bit field that is split across multiple accessible memory objects.
 848
 849    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 850    BITSIZE is the field width; BITPOS the position of its first bit
 851    (within the word).
 852    VALUE is the value to store.
 853
 854    This does not yet handle fields wider than BITS_PER_WORD.  */
 855
 856 static void
 857 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 858                        unsigned HOST_WIDE_INT bitpos, rtx value)
 859 {
 860   unsigned int unit;
 861   unsigned int bitsdone = 0;
 862
 863   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 864      much at a time.  */
 865   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 866     unit = BITS_PER_WORD;
 867   else
 868     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 869
 870   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 871      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 872      that VALUE might be a floating-point constant.  */
 873   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 874     {
 875       rtx word = gen_lowpart_common (word_mode, value);
 876
 877       if (word && (value != word))
 878         value = word;
 879       else
 880         value = gen_lowpart_common (word_mode,
 881                                     force_reg (GET_MODE (value) != VOIDmode
 882                                                ? GET_MODE (value)
 883                                                : word_mode, value));
 884     }
 885   else if (GET_CODE (value) == ADDRESSOF)
 886     value = copy_to_reg (value);
 887
 888   while (bitsdone < bitsize)
 889     {
 890       unsigned HOST_WIDE_INT thissize;
 891       rtx part, word;
 892       unsigned HOST_WIDE_INT thispos;
 893       unsigned HOST_WIDE_INT offset;
 894
 895       offset = (bitpos + bitsdone) / unit;
 896       thispos = (bitpos + bitsdone) % unit;
 897
 898       /* THISSIZE must not overrun a word boundary.  Otherwise,
 899          store_fixed_bit_field will call us again, and we will mutually
 900          recurse forever.  */
 901       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 902       thissize = MIN (thissize, unit - thispos);
 903
 904       if (BYTES_BIG_ENDIAN)
 905         {
 906           int total_bits;
 907
 908           /* We must do an endian conversion exactly the same way as it is
 909              done in extract_bit_field, so that the two calls to
 910              extract_fixed_bit_field will have comparable arguments.  */
 911           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 912             total_bits = BITS_PER_WORD;
 913           else
 914             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 915
 916           /* Fetch successively less significant portions.  */
 917           if (GET_CODE (value) == CONST_INT)
 918             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 919                              >> (bitsize - bitsdone - thissize))
 920                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 921           else
 922             /* The args are chosen so that the last part includes the
 923                lsb.  Give extract_bit_field the value it needs (with
 924                endianness compensation) to fetch the piece we want.  */
 925             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 926                                             total_bits - bitsize + bitsdone,
 927                                             NULL_RTX, 1);
 928         }
 929       else
 930         {
 931           /* Fetch successively more significant portions.  */
 932           if (GET_CODE (value) == CONST_INT)
 933             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 934                              >> bitsdone)
 935                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 936           else
 937             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 938                                             bitsdone, NULL_RTX, 1);
 939         }
 940
 941       /* If OP0 is a register, then handle OFFSET here.
 942
 943          When handling multiword bitfields, extract_bit_field may pass
 944          down a word_mode SUBREG of a larger REG for a bitfield that actually
 945          crosses a word boundary.  Thus, for a SUBREG, we must find
 946          the current word starting from the base register.  */
 947       if (GET_CODE (op0) == SUBREG)
 948         {
 949           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
 950           word = operand_subword_force (SUBREG_REG (op0), word_offset,
 951                                         GET_MODE (SUBREG_REG (op0)));
 952           offset = 0;
 953         }
 954       else if (GET_CODE (op0) == REG)
 955         {
 956           word = operand_subword_force (op0, offset, GET_MODE (op0));
 957           offset = 0;
 958         }
 959       else
 960         word = op0;
 961
 962       /* OFFSET is in UNITs, and UNIT is in bits.
 963          store_fixed_bit_field wants offset in bytes.  */
 964       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
 965                              thispos, part);
 966       bitsdone += thissize;
 967     }
 968 }
 969 \f
 970 /* Generate code to extract a byte-field from STR_RTX
 971    containing BITSIZE bits, starting at BITNUM,
 972    and put it in TARGET if possible (if TARGET is nonzero).
 973    Regardless of TARGET, we return the rtx for where the value is placed.
 974    It may be a QUEUED.
 975
 976    STR_RTX is the structure containing the byte (a REG or MEM).
 977    UNSIGNEDP is nonzero if this is an unsigned bit field.
 978    MODE is the natural mode of the field value once extracted.
 979    TMODE is the mode the caller would like the value to have;
 980    but the value may be returned with type MODE instead.
 981
 982    TOTAL_SIZE is the size in bytes of the containing structure,
 983    or -1 if varying.
 984
 985    If a TARGET is specified and we can store in it at no extra cost,
 986    we do so, and return TARGET.
 987    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
 988    if they are equally easy.  */
 989
 990 rtx
 991 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 992                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
 993                    enum machine_mode mode, enum machine_mode tmode,
 994                    HOST_WIDE_INT total_size)
 995 {
 996   unsigned int unit
 997     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 998   unsigned HOST_WIDE_INT offset = bitnum / unit;
 999   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1000   rtx op0 = str_rtx;
1001   rtx spec_target = target;
1002   rtx spec_target_subreg = 0;
1003   enum machine_mode int_mode;
1004   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1005   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1006   enum machine_mode mode1;
1007   int byte_offset;
1008
1009   /* Discount the part of the structure before the desired byte.
1010      We need to know how many bytes are safe to reference after it.  */
1011   if (total_size >= 0)
1012     total_size -= (bitpos / BIGGEST_ALIGNMENT
1013                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
1014
1015   if (tmode == VOIDmode)
1016     tmode = mode;
1017
1018   while (GET_CODE (op0) == SUBREG)
1019     {
1020       bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1021       if (bitpos > unit)
1022         {
1023           offset += (bitpos / unit);
1024           bitpos %= unit;
1025         }
1026       op0 = SUBREG_REG (op0);
1027     }
1028
1029   if (GET_CODE (op0) == REG
1030       && mode == GET_MODE (op0)
1031       && bitnum == 0
1032       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1033     {
1034       /* We're trying to extract a full register from itself.  */
1035       return op0;
1036     }
1037
1038   /* Make sure we are playing with integral modes.  Pun with subregs
1039      if we aren't.  */
1040   {
1041     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1042     if (imode != GET_MODE (op0))
1043       {
1044         if (GET_CODE (op0) == MEM)
1045           op0 = adjust_address (op0, imode, 0);
1046         else if (imode != BLKmode)
1047           op0 = gen_lowpart (imode, op0);
1048         else
1049           abort ();
1050       }
1051   }
1052
1053   /* We may be accessing data outside the field, which means
1054      we can alias adjacent data.  */
1055   if (GET_CODE (op0) == MEM)
1056     {
1057       op0 = shallow_copy_rtx (op0);
1058       set_mem_alias_set (op0, 0);
1059       set_mem_expr (op0, 0);
1060     }
1061
1062   /* Extraction of a full-word or multi-word value from a structure
1063      in a register or aligned memory can be done with just a SUBREG.
1064      A subword value in the least significant part of a register
1065      can also be extracted with a SUBREG.  For this, we need the
1066      byte offset of the value in op0.  */
1067
1068   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1069
1070   /* If OP0 is a register, BITPOS must count within a word.
1071      But as we have it, it counts within whatever size OP0 now has.
1072      On a bigendian machine, these are not the same, so convert.  */
1073   if (BYTES_BIG_ENDIAN
1074       && GET_CODE (op0) != MEM
1075       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1076     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1077
1078   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1079      If that's wrong, the solution is to test for it and set TARGET to 0
1080      if needed.  */
1081
1082   mode1  = (VECTOR_MODE_P (tmode)
1083             ? mode
1084             : mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0));
1085
1086   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1087         && bitpos % BITS_PER_WORD == 0)
1088        || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
1089            /* ??? The big endian test here is wrong.  This is correct
1090               if the value is in a register, and if mode_for_size is not
1091               the same mode as op0.  This causes us to get unnecessarily
1092               inefficient code from the Thumb port when -mbig-endian.  */
1093            && (BYTES_BIG_ENDIAN
1094                ? bitpos + bitsize == BITS_PER_WORD
1095                : bitpos == 0)))
1096       && ((GET_CODE (op0) != MEM
1097            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1098                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1099            && GET_MODE_SIZE (mode1) != 0
1100            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1101           || (GET_CODE (op0) == MEM
1102               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1103                   || (offset * BITS_PER_UNIT % bitsize == 0
1104                       && MEM_ALIGN (op0) % bitsize == 0)))))
1105     {
1106       if (mode1 != GET_MODE (op0))
1107         {
1108           if (GET_CODE (op0) == SUBREG)
1109             {
1110               if (GET_MODE (SUBREG_REG (op0)) == mode1
1111                   || GET_MODE_CLASS (mode1) == MODE_INT
1112                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1113                 op0 = SUBREG_REG (op0);
1114               else
1115                 /* Else we've got some float mode source being extracted into
1116                    a different float mode destination -- this combination of
1117                    subregs results in Severe Tire Damage.  */
1118                 goto no_subreg_mode_swap;
1119             }
1120           if (GET_CODE (op0) == REG)
1121             op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1122           else
1123             op0 = adjust_address (op0, mode1, offset);
1124         }
1125       if (mode1 != mode)
1126         return convert_to_mode (tmode, op0, unsignedp);
1127       return op0;
1128     }
1129  no_subreg_mode_swap:
1130
1131   /* Handle fields bigger than a word.  */
1132
1133   if (bitsize > BITS_PER_WORD)
1134     {
1135       /* Here we transfer the words of the field
1136          in the order least significant first.
1137          This is because the most significant word is the one which may
1138          be less than full.  */
1139
1140       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1141       unsigned int i;
1142
1143       if (target == 0 || GET_CODE (target) != REG)
1144         target = gen_reg_rtx (mode);
1145
1146       /* Indicate for flow that the entire target reg is being set.  */
1147       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1148
1149       for (i = 0; i < nwords; i++)
1150         {
1151           /* If I is 0, use the low-order word in both field and target;
1152              if I is 1, use the next to lowest word; and so on.  */
1153           /* Word number in TARGET to use.  */
1154           unsigned int wordnum
1155             = (WORDS_BIG_ENDIAN
1156                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1157                : i);
1158           /* Offset from start of field in OP0.  */
1159           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1160                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1161                                                 * (int) BITS_PER_WORD))
1162                                      : (int) i * BITS_PER_WORD);
1163           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1164           rtx result_part
1165             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1166                                            bitsize - i * BITS_PER_WORD),
1167                                  bitnum + bit_offset, 1, target_part, mode,
1168                                  word_mode, total_size);
1169
1170           if (target_part == 0)
1171             abort ();
1172
1173           if (result_part != target_part)
1174             emit_move_insn (target_part, result_part);
1175         }
1176
1177       if (unsignedp)
1178         {
1179           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1180              need to be zero'd out.  */
1181           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1182             {
1183               unsigned int i, total_words;
1184
1185               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1186               for (i = nwords; i < total_words; i++)
1187                 emit_move_insn
1188                   (operand_subword (target,
1189                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1190                                     1, VOIDmode),
1191                    const0_rtx);
1192             }
1193           return target;
1194         }
1195
1196       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1197       target = expand_shift (LSHIFT_EXPR, mode, target,
1198                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1199                              NULL_RTX, 0);
1200       return expand_shift (RSHIFT_EXPR, mode, target,
1201                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1202                            NULL_RTX, 0);
1203     }
1204
1205   /* From here on we know the desired field is smaller than a word.  */
1206
1207   /* Check if there is a correspondingly-sized integer field, so we can
1208      safely extract it as one size of integer, if necessary; then
1209      truncate or extend to the size that is wanted; then use SUBREGs or
1210      convert_to_mode to get one of the modes we really wanted.  */
1211
1212   int_mode = int_mode_for_mode (tmode);
1213   if (int_mode == BLKmode)
1214     int_mode = int_mode_for_mode (mode);
1215   if (int_mode == BLKmode)
1216     abort ();    /* Should probably push op0 out to memory and then
1217                     do a load.  */
1218
1219   /* OFFSET is the number of words or bytes (UNIT says which)
1220      from STR_RTX to the first word or byte containing part of the field.  */
1221
1222   if (GET_CODE (op0) != MEM)
1223     {
1224       if (offset != 0
1225           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1226         {
1227           if (GET_CODE (op0) != REG)
1228             op0 = copy_to_reg (op0);
1229           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1230                                 op0, (offset * UNITS_PER_WORD));
1231         }
1232       offset = 0;
1233     }
1234   else
1235     op0 = protect_from_queue (str_rtx, 1);
1236
1237   /* Now OFFSET is nonzero only for memory operands.  */
1238
1239   if (unsignedp)
1240     {
1241       if (HAVE_extzv
1242           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1243           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1244                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1245         {
1246           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1247           rtx bitsize_rtx, bitpos_rtx;
1248           rtx last = get_last_insn ();
1249           rtx xop0 = op0;
1250           rtx xtarget = target;
1251           rtx xspec_target = spec_target;
1252           rtx xspec_target_subreg = spec_target_subreg;
1253           rtx pat;
1254           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1255
1256           if (GET_CODE (xop0) == MEM)
1257             {
1258               int save_volatile_ok = volatile_ok;
1259               volatile_ok = 1;
1260
1261               /* Is the memory operand acceptable?  */
1262               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1263                      (xop0, GET_MODE (xop0))))
1264                 {
1265                   /* No, load into a reg and extract from there.  */
1266                   enum machine_mode bestmode;
1267
1268                   /* Get the mode to use for inserting into this field.  If
1269                      OP0 is BLKmode, get the smallest mode consistent with the
1270                      alignment. If OP0 is a non-BLKmode object that is no
1271                      wider than MAXMODE, use its mode. Otherwise, use the
1272                      smallest mode containing the field.  */
1273
1274                   if (GET_MODE (xop0) == BLKmode
1275                       || (GET_MODE_SIZE (GET_MODE (op0))
1276                           > GET_MODE_SIZE (maxmode)))
1277                     bestmode = get_best_mode (bitsize, bitnum,
1278                                               MEM_ALIGN (xop0), maxmode,
1279                                               MEM_VOLATILE_P (xop0));
1280                   else
1281                     bestmode = GET_MODE (xop0);
1282
1283                   if (bestmode == VOIDmode
1284                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1285                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1286                     goto extzv_loses;
1287
1288                   /* Compute offset as multiple of this unit,
1289                      counting in bytes.  */
1290                   unit = GET_MODE_BITSIZE (bestmode);
1291                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1292                   xbitpos = bitnum % unit;
1293                   xop0 = adjust_address (xop0, bestmode, xoffset);
1294
1295                   /* Fetch it to a register in that size.  */
1296                   xop0 = force_reg (bestmode, xop0);
1297
1298                   /* XBITPOS counts within UNIT, which is what is expected.  */
1299                 }
1300               else
1301                 /* Get ref to first byte containing part of the field.  */
1302                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1303
1304               volatile_ok = save_volatile_ok;
1305             }
1306
1307           /* If op0 is a register, we need it in MAXMODE (which is usually
1308              SImode). to make it acceptable to the format of extzv.  */
1309           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1310             goto extzv_loses;
1311           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1312             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1313
1314           /* On big-endian machines, we count bits from the most significant.
1315              If the bit field insn does not, we must invert.  */
1316           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1317             xbitpos = unit - bitsize - xbitpos;
1318
1319           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1320           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1321             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1322
1323           unit = GET_MODE_BITSIZE (maxmode);
1324
1325           if (xtarget == 0
1326               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1327             xtarget = xspec_target = gen_reg_rtx (tmode);
1328
1329           if (GET_MODE (xtarget) != maxmode)
1330             {
1331               if (GET_CODE (xtarget) == REG)
1332                 {
1333                   int wider = (GET_MODE_SIZE (maxmode)
1334                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1335                   xtarget = gen_lowpart (maxmode, xtarget);
1336                   if (wider)
1337                     xspec_target_subreg = xtarget;
1338                 }
1339               else
1340                 xtarget = gen_reg_rtx (maxmode);
1341             }
1342
1343           /* If this machine's extzv insists on a register target,
1344              make sure we have one.  */
1345           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1346                  (xtarget, maxmode)))
1347             xtarget = gen_reg_rtx (maxmode);
1348
1349           bitsize_rtx = GEN_INT (bitsize);
1350           bitpos_rtx = GEN_INT (xbitpos);
1351
1352           pat = gen_extzv (protect_from_queue (xtarget, 1),
1353                            xop0, bitsize_rtx, bitpos_rtx);
1354           if (pat)
1355             {
1356               emit_insn (pat);
1357               target = xtarget;
1358               spec_target = xspec_target;
1359               spec_target_subreg = xspec_target_subreg;
1360             }
1361           else
1362             {
1363               delete_insns_since (last);
1364               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1365                                                 bitpos, target, 1);
1366             }
1367         }
1368       else
1369       extzv_loses:
1370         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1371                                           bitpos, target, 1);
1372     }
1373   else
1374     {
1375       if (HAVE_extv
1376           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1377           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1378                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1379         {
1380           int xbitpos = bitpos, xoffset = offset;
1381           rtx bitsize_rtx, bitpos_rtx;
1382           rtx last = get_last_insn ();
1383           rtx xop0 = op0, xtarget = target;
1384           rtx xspec_target = spec_target;
1385           rtx xspec_target_subreg = spec_target_subreg;
1386           rtx pat;
1387           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1388
1389           if (GET_CODE (xop0) == MEM)
1390             {
1391               /* Is the memory operand acceptable?  */
1392               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1393                      (xop0, GET_MODE (xop0))))
1394                 {
1395                   /* No, load into a reg and extract from there.  */
1396                   enum machine_mode bestmode;
1397
1398                   /* Get the mode to use for inserting into this field.  If
1399                      OP0 is BLKmode, get the smallest mode consistent with the
1400                      alignment. If OP0 is a non-BLKmode object that is no
1401                      wider than MAXMODE, use its mode. Otherwise, use the
1402                      smallest mode containing the field.  */
1403
1404                   if (GET_MODE (xop0) == BLKmode
1405                       || (GET_MODE_SIZE (GET_MODE (op0))
1406                           > GET_MODE_SIZE (maxmode)))
1407                     bestmode = get_best_mode (bitsize, bitnum,
1408                                               MEM_ALIGN (xop0), maxmode,
1409                                               MEM_VOLATILE_P (xop0));
1410                   else
1411                     bestmode = GET_MODE (xop0);
1412
1413                   if (bestmode == VOIDmode
1414                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1415                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1416                     goto extv_loses;
1417
1418                   /* Compute offset as multiple of this unit,
1419                      counting in bytes.  */
1420                   unit = GET_MODE_BITSIZE (bestmode);
1421                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1422                   xbitpos = bitnum % unit;
1423                   xop0 = adjust_address (xop0, bestmode, xoffset);
1424
1425                   /* Fetch it to a register in that size.  */
1426                   xop0 = force_reg (bestmode, xop0);
1427
1428                   /* XBITPOS counts within UNIT, which is what is expected.  */
1429                 }
1430               else
1431                 /* Get ref to first byte containing part of the field.  */
1432                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1433             }
1434
1435           /* If op0 is a register, we need it in MAXMODE (which is usually
1436              SImode) to make it acceptable to the format of extv.  */
1437           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1438             goto extv_loses;
1439           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1440             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1441
1442           /* On big-endian machines, we count bits from the most significant.
1443              If the bit field insn does not, we must invert.  */
1444           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1445             xbitpos = unit - bitsize - xbitpos;
1446
1447           /* XBITPOS counts within a size of UNIT.
1448              Adjust to count within a size of MAXMODE.  */
1449           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1450             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1451
1452           unit = GET_MODE_BITSIZE (maxmode);
1453
1454           if (xtarget == 0
1455               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1456             xtarget = xspec_target = gen_reg_rtx (tmode);
1457
1458           if (GET_MODE (xtarget) != maxmode)
1459             {
1460               if (GET_CODE (xtarget) == REG)
1461                 {
1462                   int wider = (GET_MODE_SIZE (maxmode)
1463                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1464                   xtarget = gen_lowpart (maxmode, xtarget);
1465                   if (wider)
1466                     xspec_target_subreg = xtarget;
1467                 }
1468               else
1469                 xtarget = gen_reg_rtx (maxmode);
1470             }
1471
1472           /* If this machine's extv insists on a register target,
1473              make sure we have one.  */
1474           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1475                  (xtarget, maxmode)))
1476             xtarget = gen_reg_rtx (maxmode);
1477
1478           bitsize_rtx = GEN_INT (bitsize);
1479           bitpos_rtx = GEN_INT (xbitpos);
1480
1481           pat = gen_extv (protect_from_queue (xtarget, 1),
1482                           xop0, bitsize_rtx, bitpos_rtx);
1483           if (pat)
1484             {
1485               emit_insn (pat);
1486               target = xtarget;
1487               spec_target = xspec_target;
1488               spec_target_subreg = xspec_target_subreg;
1489             }
1490           else
1491             {
1492               delete_insns_since (last);
1493               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1494                                                 bitpos, target, 0);
1495             }
1496         }
1497       else
1498       extv_loses:
1499         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1500                                           bitpos, target, 0);
1501     }
1502   if (target == spec_target)
1503     return target;
1504   if (target == spec_target_subreg)
1505     return spec_target;
1506   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1507     {
1508       /* If the target mode is floating-point, first convert to the
1509          integer mode of that size and then access it as a floating-point
1510          value via a SUBREG.  */
1511       if (GET_MODE_CLASS (tmode) != MODE_INT
1512           && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1513         {
1514           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1515                                                    MODE_INT, 0),
1516                                     target, unsignedp);
1517           return gen_lowpart (tmode, target);
1518         }
1519       else
1520         return convert_to_mode (tmode, target, unsignedp);
1521     }
1522   return target;
1523 }
1524 \f
1525 /* Extract a bit field using shifts and boolean operations
1526    Returns an rtx to represent the value.
1527    OP0 addresses a register (word) or memory (byte).
1528    BITPOS says which bit within the word or byte the bit field starts in.
1529    OFFSET says how many bytes farther the bit field starts;
1530     it is 0 if OP0 is a register.
1531    BITSIZE says how many bits long the bit field is.
1532     (If OP0 is a register, it may be narrower than a full word,
1533      but BITPOS still counts within a full word,
1534      which is significant on bigendian machines.)
1535
1536    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1537    If TARGET is nonzero, attempts to store the value there
1538    and return TARGET, but this is not guaranteed.
1539    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1540
1541 static rtx
1542 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1543                          unsigned HOST_WIDE_INT offset,
1544                          unsigned HOST_WIDE_INT bitsize,
1545                          unsigned HOST_WIDE_INT bitpos, rtx target,
1546                          int unsignedp)
1547 {
1548   unsigned int total_bits = BITS_PER_WORD;
1549   enum machine_mode mode;
1550
1551   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1552     {
1553       /* Special treatment for a bit field split across two registers.  */
1554       if (bitsize + bitpos > BITS_PER_WORD)
1555         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1556     }
1557   else
1558     {
1559       /* Get the proper mode to use for this field.  We want a mode that
1560          includes the entire field.  If such a mode would be larger than
1561          a word, we won't be doing the extraction the normal way.  */
1562
1563       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1564                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1565
1566       if (mode == VOIDmode)
1567         /* The only way this should occur is if the field spans word
1568            boundaries.  */
1569         return extract_split_bit_field (op0, bitsize,
1570                                         bitpos + offset * BITS_PER_UNIT,
1571                                         unsignedp);
1572
1573       total_bits = GET_MODE_BITSIZE (mode);
1574
1575       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1576          be in the range 0 to total_bits-1, and put any excess bytes in
1577          OFFSET.  */
1578       if (bitpos >= total_bits)
1579         {
1580           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1581           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1582                      * BITS_PER_UNIT);
1583         }
1584
1585       /* Get ref to an aligned byte, halfword, or word containing the field.
1586          Adjust BITPOS to be position within a word,
1587          and OFFSET to be the offset of that word.
1588          Then alter OP0 to refer to that word.  */
1589       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1590       offset -= (offset % (total_bits / BITS_PER_UNIT));
1591       op0 = adjust_address (op0, mode, offset);
1592     }
1593
1594   mode = GET_MODE (op0);
1595
1596   if (BYTES_BIG_ENDIAN)
1597     /* BITPOS is the distance between our msb and that of OP0.
1598        Convert it to the distance from the lsb.  */
1599     bitpos = total_bits - bitsize - bitpos;
1600
1601   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1602      We have reduced the big-endian case to the little-endian case.  */
1603
1604   if (unsignedp)
1605     {
1606       if (bitpos)
1607         {
1608           /* If the field does not already start at the lsb,
1609              shift it so it does.  */
1610           tree amount = build_int_2 (bitpos, 0);
1611           /* Maybe propagate the target for the shift.  */
1612           /* But not if we will return it--could confuse integrate.c.  */
1613           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1614                            && !REG_FUNCTION_VALUE_P (target)
1615                            ? target : 0);
1616           if (tmode != mode) subtarget = 0;
1617           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1618         }
1619       /* Convert the value to the desired mode.  */
1620       if (mode != tmode)
1621         op0 = convert_to_mode (tmode, op0, 1);
1622
1623       /* Unless the msb of the field used to be the msb when we shifted,
1624          mask out the upper bits.  */
1625
1626       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1627         return expand_binop (GET_MODE (op0), and_optab, op0,
1628                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1629                              target, 1, OPTAB_LIB_WIDEN);
1630       return op0;
1631     }
1632
1633   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1634      then arithmetic-shift its lsb to the lsb of the word.  */
1635   op0 = force_reg (mode, op0);
1636   if (mode != tmode)
1637     target = 0;
1638
1639   /* Find the narrowest integer mode that contains the field.  */
1640
1641   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1642        mode = GET_MODE_WIDER_MODE (mode))
1643     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1644       {
1645         op0 = convert_to_mode (mode, op0, 0);
1646         break;
1647       }
1648
1649   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1650     {
1651       tree amount
1652         = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1653       /* Maybe propagate the target for the shift.  */
1654       /* But not if we will return the result--could confuse integrate.c.  */
1655       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1656                        && ! REG_FUNCTION_VALUE_P (target)
1657                        ? target : 0);
1658       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1659     }
1660
1661   return expand_shift (RSHIFT_EXPR, mode, op0,
1662                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1663                        target, 0);
1664 }
1665 \f
1666 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1667    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1668    complement of that if COMPLEMENT.  The mask is truncated if
1669    necessary to the width of mode MODE.  The mask is zero-extended if
1670    BITSIZE+BITPOS is too small for MODE.  */
1671
1672 static rtx
1673 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1674 {
1675   HOST_WIDE_INT masklow, maskhigh;
1676
1677   if (bitsize == 0)
1678     masklow = 0;
1679   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1680     masklow = (HOST_WIDE_INT) -1 << bitpos;
1681   else
1682     masklow = 0;
1683
1684   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1685     masklow &= ((unsigned HOST_WIDE_INT) -1
1686                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1687
1688   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1689     maskhigh = -1;
1690   else
1691     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1692
1693   if (bitsize == 0)
1694     maskhigh = 0;
1695   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1696     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1697                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1698   else
1699     maskhigh = 0;
1700
1701   if (complement)
1702     {
1703       maskhigh = ~maskhigh;
1704       masklow = ~masklow;
1705     }
1706
1707   return immed_double_const (masklow, maskhigh, mode);
1708 }
1709
1710 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1711    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1712
1713 static rtx
1714 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1715 {
1716   unsigned HOST_WIDE_INT v = INTVAL (value);
1717   HOST_WIDE_INT low, high;
1718
1719   if (bitsize < HOST_BITS_PER_WIDE_INT)
1720     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1721
1722   if (bitpos < HOST_BITS_PER_WIDE_INT)
1723     {
1724       low = v << bitpos;
1725       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1726     }
1727   else
1728     {
1729       low = 0;
1730       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1731     }
1732
1733   return immed_double_const (low, high, mode);
1734 }
1735 \f
1736 /* Extract a bit field that is split across two words
1737    and return an RTX for the result.
1738
1739    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1740    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1741    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1742
1743 static rtx
1744 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1745                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1746 {
1747   unsigned int unit;
1748   unsigned int bitsdone = 0;
1749   rtx result = NULL_RTX;
1750   int first = 1;
1751
1752   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1753      much at a time.  */
1754   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1755     unit = BITS_PER_WORD;
1756   else
1757     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1758
1759   while (bitsdone < bitsize)
1760     {
1761       unsigned HOST_WIDE_INT thissize;
1762       rtx part, word;
1763       unsigned HOST_WIDE_INT thispos;
1764       unsigned HOST_WIDE_INT offset;
1765
1766       offset = (bitpos + bitsdone) / unit;
1767       thispos = (bitpos + bitsdone) % unit;
1768
1769       /* THISSIZE must not overrun a word boundary.  Otherwise,
1770          extract_fixed_bit_field will call us again, and we will mutually
1771          recurse forever.  */
1772       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1773       thissize = MIN (thissize, unit - thispos);
1774
1775       /* If OP0 is a register, then handle OFFSET here.
1776
1777          When handling multiword bitfields, extract_bit_field may pass
1778          down a word_mode SUBREG of a larger REG for a bitfield that actually
1779          crosses a word boundary.  Thus, for a SUBREG, we must find
1780          the current word starting from the base register.  */
1781       if (GET_CODE (op0) == SUBREG)
1782         {
1783           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1784           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1785                                         GET_MODE (SUBREG_REG (op0)));
1786           offset = 0;
1787         }
1788       else if (GET_CODE (op0) == REG)
1789         {
1790           word = operand_subword_force (op0, offset, GET_MODE (op0));
1791           offset = 0;
1792         }
1793       else
1794         word = op0;
1795
1796       /* Extract the parts in bit-counting order,
1797          whose meaning is determined by BYTES_PER_UNIT.
1798          OFFSET is in UNITs, and UNIT is in bits.
1799          extract_fixed_bit_field wants offset in bytes.  */
1800       part = extract_fixed_bit_field (word_mode, word,
1801                                       offset * unit / BITS_PER_UNIT,
1802                                       thissize, thispos, 0, 1);
1803       bitsdone += thissize;
1804
1805       /* Shift this part into place for the result.  */
1806       if (BYTES_BIG_ENDIAN)
1807         {
1808           if (bitsize != bitsdone)
1809             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1810                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1811         }
1812       else
1813         {
1814           if (bitsdone != thissize)
1815             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1816                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1817         }
1818
1819       if (first)
1820         result = part;
1821       else
1822         /* Combine the parts with bitwise or.  This works
1823            because we extracted each part as an unsigned bit field.  */
1824         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1825                                OPTAB_LIB_WIDEN);
1826
1827       first = 0;
1828     }
1829
1830   /* Unsigned bit field: we are done.  */
1831   if (unsignedp)
1832     return result;
1833   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1834   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1835                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1836                          NULL_RTX, 0);
1837   return expand_shift (RSHIFT_EXPR, word_mode, result,
1838                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1839 }
1840 \f
1841 /* Add INC into TARGET.  */
1842
1843 void
1844 expand_inc (rtx target, rtx inc)
1845 {
1846   rtx value = expand_binop (GET_MODE (target), add_optab,
1847                             target, inc,
1848                             target, 0, OPTAB_LIB_WIDEN);
1849   if (value != target)
1850     emit_move_insn (target, value);
1851 }
1852
1853 /* Subtract DEC from TARGET.  */
1854
1855 void
1856 expand_dec (rtx target, rtx dec)
1857 {
1858   rtx value = expand_binop (GET_MODE (target), sub_optab,
1859                             target, dec,
1860                             target, 0, OPTAB_LIB_WIDEN);
1861   if (value != target)
1862     emit_move_insn (target, value);
1863 }
1864 \f
1865 /* Output a shift instruction for expression code CODE,
1866    with SHIFTED being the rtx for the value to shift,
1867    and AMOUNT the tree for the amount to shift by.
1868    Store the result in the rtx TARGET, if that is convenient.
1869    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1870    Return the rtx for where the value is.  */
1871
1872 rtx
1873 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
1874               tree amount, rtx target, int unsignedp)
1875 {
1876   rtx op1, temp = 0;
1877   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1878   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1879   int try;
1880
1881   /* Previously detected shift-counts computed by NEGATE_EXPR
1882      and shifted in the other direction; but that does not work
1883      on all machines.  */
1884
1885   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1886
1887 #ifdef SHIFT_COUNT_TRUNCATED
1888   if (SHIFT_COUNT_TRUNCATED)
1889     {
1890       if (GET_CODE (op1) == CONST_INT
1891           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
1892               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
1893         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1894                        % GET_MODE_BITSIZE (mode));
1895       else if (GET_CODE (op1) == SUBREG
1896                && subreg_lowpart_p (op1))
1897         op1 = SUBREG_REG (op1);
1898     }
1899 #endif
1900
1901   if (op1 == const0_rtx)
1902     return shifted;
1903
1904   for (try = 0; temp == 0 && try < 3; try++)
1905     {
1906       enum optab_methods methods;
1907
1908       if (try == 0)
1909         methods = OPTAB_DIRECT;
1910       else if (try == 1)
1911         methods = OPTAB_WIDEN;
1912       else
1913         methods = OPTAB_LIB_WIDEN;
1914
1915       if (rotate)
1916         {
1917           /* Widening does not work for rotation.  */
1918           if (methods == OPTAB_WIDEN)
1919             continue;
1920           else if (methods == OPTAB_LIB_WIDEN)
1921             {
1922               /* If we have been unable to open-code this by a rotation,
1923                  do it as the IOR of two shifts.  I.e., to rotate A
1924                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1925                  where C is the bitsize of A.
1926
1927                  It is theoretically possible that the target machine might
1928                  not be able to perform either shift and hence we would
1929                  be making two libcalls rather than just the one for the
1930                  shift (similarly if IOR could not be done).  We will allow
1931                  this extremely unlikely lossage to avoid complicating the
1932                  code below.  */
1933
1934               rtx subtarget = target == shifted ? 0 : target;
1935               rtx temp1;
1936               tree type = TREE_TYPE (amount);
1937               tree new_amount = make_tree (type, op1);
1938               tree other_amount
1939                 = fold (build (MINUS_EXPR, type,
1940                                convert (type,
1941                                         build_int_2 (GET_MODE_BITSIZE (mode),
1942                                                      0)),
1943                                amount));
1944
1945               shifted = force_reg (mode, shifted);
1946
1947               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1948                                    mode, shifted, new_amount, subtarget, 1);
1949               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1950                                     mode, shifted, other_amount, 0, 1);
1951               return expand_binop (mode, ior_optab, temp, temp1, target,
1952                                    unsignedp, methods);
1953             }
1954
1955           temp = expand_binop (mode,
1956                                left ? rotl_optab : rotr_optab,
1957                                shifted, op1, target, unsignedp, methods);
1958
1959           /* If we don't have the rotate, but we are rotating by a constant
1960              that is in range, try a rotate in the opposite direction.  */
1961
1962           if (temp == 0 && GET_CODE (op1) == CONST_INT
1963               && INTVAL (op1) > 0
1964               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
1965             temp = expand_binop (mode,
1966                                  left ? rotr_optab : rotl_optab,
1967                                  shifted,
1968                                  GEN_INT (GET_MODE_BITSIZE (mode)
1969                                           - INTVAL (op1)),
1970                                  target, unsignedp, methods);
1971         }
1972       else if (unsignedp)
1973         temp = expand_binop (mode,
1974                              left ? ashl_optab : lshr_optab,
1975                              shifted, op1, target, unsignedp, methods);
1976
1977       /* Do arithmetic shifts.
1978          Also, if we are going to widen the operand, we can just as well
1979          use an arithmetic right-shift instead of a logical one.  */
1980       if (temp == 0 && ! rotate
1981           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
1982         {
1983           enum optab_methods methods1 = methods;
1984
1985           /* If trying to widen a log shift to an arithmetic shift,
1986              don't accept an arithmetic shift of the same size.  */
1987           if (unsignedp)
1988             methods1 = OPTAB_MUST_WIDEN;
1989
1990           /* Arithmetic shift */
1991
1992           temp = expand_binop (mode,
1993                                left ? ashl_optab : ashr_optab,
1994                                shifted, op1, target, unsignedp, methods1);
1995         }
1996
1997       /* We used to try extzv here for logical right shifts, but that was
1998          only useful for one machine, the VAX, and caused poor code
1999          generation there for lshrdi3, so the code was deleted and a
2000          define_expand for lshrsi3 was added to vax.md.  */
2001     }
2002
2003   if (temp == 0)
2004     abort ();
2005   return temp;
2006 }
2007 \f
2008 enum alg_code { alg_zero, alg_m, alg_shift,
2009                   alg_add_t_m2, alg_sub_t_m2,
2010                   alg_add_factor, alg_sub_factor,
2011                   alg_add_t2_m, alg_sub_t2_m,
2012                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2013
2014 /* This structure records a sequence of operations.
2015    `ops' is the number of operations recorded.
2016    `cost' is their total cost.
2017    The operations are stored in `op' and the corresponding
2018    logarithms of the integer coefficients in `log'.
2019
2020    These are the operations:
2021    alg_zero             total := 0;
2022    alg_m                total := multiplicand;
2023    alg_shift            total := total * coeff
2024    alg_add_t_m2         total := total + multiplicand * coeff;
2025    alg_sub_t_m2         total := total - multiplicand * coeff;
2026    alg_add_factor       total := total * coeff + total;
2027    alg_sub_factor       total := total * coeff - total;
2028    alg_add_t2_m         total := total * coeff + multiplicand;
2029    alg_sub_t2_m         total := total * coeff - multiplicand;
2030
2031    The first operand must be either alg_zero or alg_m.  */
2032
2033 struct algorithm
2034 {
2035   short cost;
2036   short ops;
2037   /* The size of the OP and LOG fields are not directly related to the
2038      word size, but the worst-case algorithms will be if we have few
2039      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2040      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2041      in total wordsize operations.  */
2042   enum alg_code op[MAX_BITS_PER_WORD];
2043   char log[MAX_BITS_PER_WORD];
2044 };
2045
2046 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, int);
2047 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2048                                                  int, unsigned HOST_WIDE_INT *,
2049                                                  int *, int *);
2050 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2051 /* Compute and return the best algorithm for multiplying by T.
2052    The algorithm must cost less than cost_limit
2053    If retval.cost >= COST_LIMIT, no algorithm was found and all
2054    other field of the returned struct are undefined.  */
2055
2056 static void
2057 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2058             int cost_limit)
2059 {
2060   int m;
2061   struct algorithm *alg_in, *best_alg;
2062   int cost;
2063   unsigned HOST_WIDE_INT q;
2064
2065   /* Indicate that no algorithm is yet found.  If no algorithm
2066      is found, this value will be returned and indicate failure.  */
2067   alg_out->cost = cost_limit;
2068
2069   if (cost_limit <= 0)
2070     return;
2071
2072   /* t == 1 can be done in zero cost.  */
2073   if (t == 1)
2074     {
2075       alg_out->ops = 1;
2076       alg_out->cost = 0;
2077       alg_out->op[0] = alg_m;
2078       return;
2079     }
2080
2081   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2082      fail now.  */
2083   if (t == 0)
2084     {
2085       if (zero_cost >= cost_limit)
2086         return;
2087       else
2088         {
2089           alg_out->ops = 1;
2090           alg_out->cost = zero_cost;
2091           alg_out->op[0] = alg_zero;
2092           return;
2093         }
2094     }
2095
2096   /* We'll be needing a couple extra algorithm structures now.  */
2097
2098   alg_in = alloca (sizeof (struct algorithm));
2099   best_alg = alloca (sizeof (struct algorithm));
2100
2101   /* If we have a group of zero bits at the low-order part of T, try
2102      multiplying by the remaining bits and then doing a shift.  */
2103
2104   if ((t & 1) == 0)
2105     {
2106       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2107       if (m < BITS_PER_WORD)
2108         {
2109           q = t >> m;
2110           cost = shift_cost[m];
2111           synth_mult (alg_in, q, cost_limit - cost);
2112
2113           cost += alg_in->cost;
2114           if (cost < cost_limit)
2115             {
2116               struct algorithm *x;
2117               x = alg_in, alg_in = best_alg, best_alg = x;
2118               best_alg->log[best_alg->ops] = m;
2119               best_alg->op[best_alg->ops] = alg_shift;
2120               cost_limit = cost;
2121             }
2122         }
2123     }
2124
2125   /* If we have an odd number, add or subtract one.  */
2126   if ((t & 1) != 0)
2127     {
2128       unsigned HOST_WIDE_INT w;
2129
2130       for (w = 1; (w & t) != 0; w <<= 1)
2131         ;
2132       /* If T was -1, then W will be zero after the loop.  This is another
2133          case where T ends with ...111.  Handling this with (T + 1) and
2134          subtract 1 produces slightly better code and results in algorithm
2135          selection much faster than treating it like the ...0111 case
2136          below.  */
2137       if (w == 0
2138           || (w > 2
2139               /* Reject the case where t is 3.
2140                  Thus we prefer addition in that case.  */
2141               && t != 3))
2142         {
2143           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2144
2145           cost = add_cost;
2146           synth_mult (alg_in, t + 1, cost_limit - cost);
2147
2148           cost += alg_in->cost;
2149           if (cost < cost_limit)
2150             {
2151               struct algorithm *x;
2152               x = alg_in, alg_in = best_alg, best_alg = x;
2153               best_alg->log[best_alg->ops] = 0;
2154               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2155               cost_limit = cost;
2156             }
2157         }
2158       else
2159         {
2160           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2161
2162           cost = add_cost;
2163           synth_mult (alg_in, t - 1, cost_limit - cost);
2164
2165           cost += alg_in->cost;
2166           if (cost < cost_limit)
2167             {
2168               struct algorithm *x;
2169               x = alg_in, alg_in = best_alg, best_alg = x;
2170               best_alg->log[best_alg->ops] = 0;
2171               best_alg->op[best_alg->ops] = alg_add_t_m2;
2172               cost_limit = cost;
2173             }
2174         }
2175     }
2176
2177   /* Look for factors of t of the form
2178      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2179      If we find such a factor, we can multiply by t using an algorithm that
2180      multiplies by q, shift the result by m and add/subtract it to itself.
2181
2182      We search for large factors first and loop down, even if large factors
2183      are less probable than small; if we find a large factor we will find a
2184      good sequence quickly, and therefore be able to prune (by decreasing
2185      COST_LIMIT) the search.  */
2186
2187   for (m = floor_log2 (t - 1); m >= 2; m--)
2188     {
2189       unsigned HOST_WIDE_INT d;
2190
2191       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2192       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2193         {
2194           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2195           synth_mult (alg_in, t / d, cost_limit - cost);
2196
2197           cost += alg_in->cost;
2198           if (cost < cost_limit)
2199             {
2200               struct algorithm *x;
2201               x = alg_in, alg_in = best_alg, best_alg = x;
2202               best_alg->log[best_alg->ops] = m;
2203               best_alg->op[best_alg->ops] = alg_add_factor;
2204               cost_limit = cost;
2205             }
2206           /* Other factors will have been taken care of in the recursion.  */
2207           break;
2208         }
2209
2210       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2211       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2212         {
2213           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2214           synth_mult (alg_in, t / d, cost_limit - cost);
2215
2216           cost += alg_in->cost;
2217           if (cost < cost_limit)
2218             {
2219               struct algorithm *x;
2220               x = alg_in, alg_in = best_alg, best_alg = x;
2221               best_alg->log[best_alg->ops] = m;
2222               best_alg->op[best_alg->ops] = alg_sub_factor;
2223               cost_limit = cost;
2224             }
2225           break;
2226         }
2227     }
2228
2229   /* Try shift-and-add (load effective address) instructions,
2230      i.e. do a*3, a*5, a*9.  */
2231   if ((t & 1) != 0)
2232     {
2233       q = t - 1;
2234       q = q & -q;
2235       m = exact_log2 (q);
2236       if (m >= 0 && m < BITS_PER_WORD)
2237         {
2238           cost = shiftadd_cost[m];
2239           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2240
2241           cost += alg_in->cost;
2242           if (cost < cost_limit)
2243             {
2244               struct algorithm *x;
2245               x = alg_in, alg_in = best_alg, best_alg = x;
2246               best_alg->log[best_alg->ops] = m;
2247               best_alg->op[best_alg->ops] = alg_add_t2_m;
2248               cost_limit = cost;
2249             }
2250         }
2251
2252       q = t + 1;
2253       q = q & -q;
2254       m = exact_log2 (q);
2255       if (m >= 0 && m < BITS_PER_WORD)
2256         {
2257           cost = shiftsub_cost[m];
2258           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2259
2260           cost += alg_in->cost;
2261           if (cost < cost_limit)
2262             {
2263               struct algorithm *x;
2264               x = alg_in, alg_in = best_alg, best_alg = x;
2265               best_alg->log[best_alg->ops] = m;
2266               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2267               cost_limit = cost;
2268             }
2269         }
2270     }
2271
2272   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2273      we have not found any algorithm.  */
2274   if (cost_limit == alg_out->cost)
2275     return;
2276
2277   /* If we are getting a too long sequence for `struct algorithm'
2278      to record, make this search fail.  */
2279   if (best_alg->ops == MAX_BITS_PER_WORD)
2280     return;
2281
2282   /* Copy the algorithm from temporary space to the space at alg_out.
2283      We avoid using structure assignment because the majority of
2284      best_alg is normally undefined, and this is a critical function.  */
2285   alg_out->ops = best_alg->ops + 1;
2286   alg_out->cost = cost_limit;
2287   memcpy (alg_out->op, best_alg->op,
2288           alg_out->ops * sizeof *alg_out->op);
2289   memcpy (alg_out->log, best_alg->log,
2290           alg_out->ops * sizeof *alg_out->log);
2291 }
2292 \f
2293 /* Perform a multiplication and return an rtx for the result.
2294    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2295    TARGET is a suggestion for where to store the result (an rtx).
2296
2297    We check specially for a constant integer as OP1.
2298    If you want this check for OP0 as well, then before calling
2299    you should swap the two operands if OP0 would be constant.  */
2300
2301 rtx
2302 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2303              int unsignedp)
2304 {
2305   rtx const_op1 = op1;
2306
2307   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2308      less than or equal in size to `unsigned int' this doesn't matter.
2309      If the mode is larger than `unsigned int', then synth_mult works only
2310      if the constant value exactly fits in an `unsigned int' without any
2311      truncation.  This means that multiplying by negative values does
2312      not work; results are off by 2^32 on a 32 bit machine.  */
2313
2314   /* If we are multiplying in DImode, it may still be a win
2315      to try to work with shifts and adds.  */
2316   if (GET_CODE (op1) == CONST_DOUBLE
2317       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2318       && HOST_BITS_PER_INT >= BITS_PER_WORD
2319       && CONST_DOUBLE_HIGH (op1) == 0)
2320     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2321   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2322            && GET_CODE (op1) == CONST_INT
2323            && INTVAL (op1) < 0)
2324     const_op1 = 0;
2325
2326   /* We used to test optimize here, on the grounds that it's better to
2327      produce a smaller program when -O is not used.
2328      But this causes such a terrible slowdown sometimes
2329      that it seems better to use synth_mult always.  */
2330
2331   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2332       && (unsignedp || ! flag_trapv))
2333     {
2334       struct algorithm alg;
2335       struct algorithm alg2;
2336       HOST_WIDE_INT val = INTVAL (op1);
2337       HOST_WIDE_INT val_so_far;
2338       rtx insn;
2339       int mult_cost;
2340       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2341
2342       /* op0 must be register to make mult_cost match the precomputed
2343          shiftadd_cost array.  */
2344       op0 = force_reg (mode, op0);
2345
2346       /* Try to do the computation three ways: multiply by the negative of OP1
2347          and then negate, do the multiplication directly, or do multiplication
2348          by OP1 - 1.  */
2349
2350       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2351       mult_cost = MIN (12 * add_cost, mult_cost);
2352
2353       synth_mult (&alg, val, mult_cost);
2354
2355       /* This works only if the inverted value actually fits in an
2356          `unsigned int' */
2357       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2358         {
2359           synth_mult (&alg2, - val,
2360                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2361           if (alg2.cost + negate_cost < alg.cost)
2362             alg = alg2, variant = negate_variant;
2363         }
2364
2365       /* This proves very useful for division-by-constant.  */
2366       synth_mult (&alg2, val - 1,
2367                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2368       if (alg2.cost + add_cost < alg.cost)
2369         alg = alg2, variant = add_variant;
2370
2371       if (alg.cost < mult_cost)
2372         {
2373           /* We found something cheaper than a multiply insn.  */
2374           int opno;
2375           rtx accum, tem;
2376           enum machine_mode nmode;
2377
2378           op0 = protect_from_queue (op0, 0);
2379
2380           /* Avoid referencing memory over and over.
2381              For speed, but also for correctness when mem is volatile.  */
2382           if (GET_CODE (op0) == MEM)
2383             op0 = force_reg (mode, op0);
2384
2385           /* ACCUM starts out either as OP0 or as a zero, depending on
2386              the first operation.  */
2387
2388           if (alg.op[0] == alg_zero)
2389             {
2390               accum = copy_to_mode_reg (mode, const0_rtx);
2391               val_so_far = 0;
2392             }
2393           else if (alg.op[0] == alg_m)
2394             {
2395               accum = copy_to_mode_reg (mode, op0);
2396               val_so_far = 1;
2397             }
2398           else
2399             abort ();
2400
2401           for (opno = 1; opno < alg.ops; opno++)
2402             {
2403               int log = alg.log[opno];
2404               int preserve = preserve_subexpressions_p ();
2405               rtx shift_subtarget = preserve ? 0 : accum;
2406               rtx add_target
2407                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2408                    && ! preserve)
2409                   ? target : 0;
2410               rtx accum_target = preserve ? 0 : accum;
2411
2412               switch (alg.op[opno])
2413                 {
2414                 case alg_shift:
2415                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2416                                         build_int_2 (log, 0), NULL_RTX, 0);
2417                   val_so_far <<= log;
2418                   break;
2419
2420                 case alg_add_t_m2:
2421                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2422                                       build_int_2 (log, 0), NULL_RTX, 0);
2423                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2424                                          add_target
2425                                          ? add_target : accum_target);
2426                   val_so_far += (HOST_WIDE_INT) 1 << log;
2427                   break;
2428
2429                 case alg_sub_t_m2:
2430                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2431                                       build_int_2 (log, 0), NULL_RTX, 0);
2432                   accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2433                                          add_target
2434                                          ? add_target : accum_target);
2435                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2436                   break;
2437
2438                 case alg_add_t2_m:
2439                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2440                                         build_int_2 (log, 0), shift_subtarget,
2441                                         0);
2442                   accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2443                                          add_target
2444                                          ? add_target : accum_target);
2445                   val_so_far = (val_so_far << log) + 1;
2446                   break;
2447
2448                 case alg_sub_t2_m:
2449                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2450                                         build_int_2 (log, 0), shift_subtarget,
2451                                         0);
2452                   accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2453                                          add_target
2454                                          ? add_target : accum_target);
2455                   val_so_far = (val_so_far << log) - 1;
2456                   break;
2457
2458                 case alg_add_factor:
2459                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2460                                       build_int_2 (log, 0), NULL_RTX, 0);
2461                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2462                                          add_target
2463                                          ? add_target : accum_target);
2464                   val_so_far += val_so_far << log;
2465                   break;
2466
2467                 case alg_sub_factor:
2468                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2469                                       build_int_2 (log, 0), NULL_RTX, 0);
2470                   accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2471                                          (add_target ? add_target
2472                                           : preserve ? 0 : tem));
2473                   val_so_far = (val_so_far << log) - val_so_far;
2474                   break;
2475
2476                 default:
2477                   abort ();
2478                 }
2479
2480               /* Write a REG_EQUAL note on the last insn so that we can cse
2481                  multiplication sequences.  Note that if ACCUM is a SUBREG,
2482                  we've set the inner register and must properly indicate
2483                  that.  */
2484
2485               tem = op0, nmode = mode;
2486               if (GET_CODE (accum) == SUBREG)
2487                 {
2488                   nmode = GET_MODE (SUBREG_REG (accum));
2489                   tem = gen_lowpart (nmode, op0);
2490                 }
2491
2492               insn = get_last_insn ();
2493               set_unique_reg_note (insn,
2494                                    REG_EQUAL,
2495                                    gen_rtx_MULT (nmode, tem,
2496                                                  GEN_INT (val_so_far)));
2497             }
2498
2499           if (variant == negate_variant)
2500             {
2501               val_so_far = - val_so_far;
2502               accum = expand_unop (mode, neg_optab, accum, target, 0);
2503             }
2504           else if (variant == add_variant)
2505             {
2506               val_so_far = val_so_far + 1;
2507               accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2508             }
2509
2510           if (val != val_so_far)
2511             abort ();
2512
2513           return accum;
2514         }
2515     }
2516
2517   if (GET_CODE (op0) == CONST_DOUBLE)
2518     {
2519       rtx temp = op0;
2520       op0 = op1;
2521       op1 = temp;
2522     }
2523
2524   /* Expand x*2.0 as x+x.  */
2525   if (GET_CODE (op1) == CONST_DOUBLE
2526       && GET_MODE_CLASS (mode) == MODE_FLOAT)
2527     {
2528       REAL_VALUE_TYPE d;
2529       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
2530
2531       if (REAL_VALUES_EQUAL (d, dconst2))
2532         {
2533           op0 = force_reg (GET_MODE (op0), op0);
2534           return expand_binop (mode, add_optab, op0, op0,
2535                                target, unsignedp, OPTAB_LIB_WIDEN);
2536         }
2537     }
2538
2539   /* This used to use umul_optab if unsigned, but for non-widening multiply
2540      there is no difference between signed and unsigned.  */
2541   op0 = expand_binop (mode,
2542                       ! unsignedp
2543                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2544                       ? smulv_optab : smul_optab,
2545                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2546   if (op0 == 0)
2547     abort ();
2548   return op0;
2549 }
2550 \f
2551 /* Return the smallest n such that 2**n >= X.  */
2552
2553 int
2554 ceil_log2 (unsigned HOST_WIDE_INT x)
2555 {
2556   return floor_log2 (x - 1) + 1;
2557 }
2558
2559 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2560    replace division by D, and put the least significant N bits of the result
2561    in *MULTIPLIER_PTR and return the most significant bit.
2562
2563    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2564    needed precision is in PRECISION (should be <= N).
2565
2566    PRECISION should be as small as possible so this function can choose
2567    multiplier more freely.
2568
2569    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2570    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2571
2572    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2573    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2574
2575 static
2576 unsigned HOST_WIDE_INT
2577 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
2578                    unsigned HOST_WIDE_INT *multiplier_ptr,
2579                    int *post_shift_ptr, int *lgup_ptr)
2580 {
2581   HOST_WIDE_INT mhigh_hi, mlow_hi;
2582   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2583   int lgup, post_shift;
2584   int pow, pow2;
2585   unsigned HOST_WIDE_INT nl, dummy1;
2586   HOST_WIDE_INT nh, dummy2;
2587
2588   /* lgup = ceil(log2(divisor)); */
2589   lgup = ceil_log2 (d);
2590
2591   if (lgup > n)
2592     abort ();
2593
2594   pow = n + lgup;
2595   pow2 = n + lgup - precision;
2596
2597   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2598     {
2599       /* We could handle this with some effort, but this case is much better
2600          handled directly with a scc insn, so rely on caller using that.  */
2601       abort ();
2602     }
2603
2604   /* mlow = 2^(N + lgup)/d */
2605  if (pow >= HOST_BITS_PER_WIDE_INT)
2606     {
2607       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2608       nl = 0;
2609     }
2610   else
2611     {
2612       nh = 0;
2613       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2614     }
2615   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2616                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2617
2618   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2619   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2620     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2621   else
2622     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2623   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2624                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2625
2626   if (mhigh_hi && nh - d >= d)
2627     abort ();
2628   if (mhigh_hi > 1 || mlow_hi > 1)
2629     abort ();
2630   /* Assert that mlow < mhigh.  */
2631   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2632     abort ();
2633
2634   /* If precision == N, then mlow, mhigh exceed 2^N
2635      (but they do not exceed 2^(N+1)).  */
2636
2637   /* Reduce to lowest terms.  */
2638   for (post_shift = lgup; post_shift > 0; post_shift--)
2639     {
2640       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2641       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2642       if (ml_lo >= mh_lo)
2643         break;
2644
2645       mlow_hi = 0;
2646       mlow_lo = ml_lo;
2647       mhigh_hi = 0;
2648       mhigh_lo = mh_lo;
2649     }
2650
2651   *post_shift_ptr = post_shift;
2652   *lgup_ptr = lgup;
2653   if (n < HOST_BITS_PER_WIDE_INT)
2654     {
2655       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2656       *multiplier_ptr = mhigh_lo & mask;
2657       return mhigh_lo >= mask;
2658     }
2659   else
2660     {
2661       *multiplier_ptr = mhigh_lo;
2662       return mhigh_hi;
2663     }
2664 }
2665
2666 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2667    congruent to 1 (mod 2**N).  */
2668
2669 static unsigned HOST_WIDE_INT
2670 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
2671 {
2672   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2673
2674   /* The algorithm notes that the choice y = x satisfies
2675      x*y == 1 mod 2^3, since x is assumed odd.
2676      Each iteration doubles the number of bits of significance in y.  */
2677
2678   unsigned HOST_WIDE_INT mask;
2679   unsigned HOST_WIDE_INT y = x;
2680   int nbit = 3;
2681
2682   mask = (n == HOST_BITS_PER_WIDE_INT
2683           ? ~(unsigned HOST_WIDE_INT) 0
2684           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2685
2686   while (nbit < n)
2687     {
2688       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2689       nbit *= 2;
2690     }
2691   return y;
2692 }
2693
2694 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2695    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2696    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2697    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2698    become signed.
2699
2700    The result is put in TARGET if that is convenient.
2701
2702    MODE is the mode of operation.  */
2703
2704 rtx
2705 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
2706                              rtx op1, rtx target, int unsignedp)
2707 {
2708   rtx tem;
2709   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2710
2711   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2712                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2713                       NULL_RTX, 0);
2714   tem = expand_and (mode, tem, op1, NULL_RTX);
2715   adj_operand
2716     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2717                      adj_operand);
2718
2719   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2720                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2721                       NULL_RTX, 0);
2722   tem = expand_and (mode, tem, op0, NULL_RTX);
2723   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2724                           target);
2725
2726   return target;
2727 }
2728
2729 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2730    in TARGET if that is convenient, and return where the result is.  If the
2731    operation can not be performed, 0 is returned.
2732
2733    MODE is the mode of operation and result.
2734
2735    UNSIGNEDP nonzero means unsigned multiply.
2736
2737    MAX_COST is the total allowed cost for the expanded RTL.  */
2738
2739 rtx
2740 expand_mult_highpart (enum machine_mode mode, rtx op0,
2741                       unsigned HOST_WIDE_INT cnst1, rtx target,
2742                       int unsignedp, int max_cost)
2743 {
2744   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2745   optab mul_highpart_optab;
2746   optab moptab;
2747   rtx tem;
2748   int size = GET_MODE_BITSIZE (mode);
2749   rtx op1, wide_op1;
2750
2751   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2752   if (size > HOST_BITS_PER_WIDE_INT)
2753     abort ();
2754
2755   op1 = gen_int_mode (cnst1, mode);
2756
2757   wide_op1
2758     = immed_double_const (cnst1,
2759                           (unsignedp
2760                            ? (HOST_WIDE_INT) 0
2761                            : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2762                           wider_mode);
2763
2764   /* expand_mult handles constant multiplication of word_mode
2765      or narrower.  It does a poor job for large modes.  */
2766   if (size < BITS_PER_WORD
2767       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2768     {
2769       /* We have to do this, since expand_binop doesn't do conversion for
2770          multiply.  Maybe change expand_binop to handle widening multiply?  */
2771       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2772
2773       /* We know that this can't have signed overflow, so pretend this is
2774          an unsigned multiply.  */
2775       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
2776       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2777                           build_int_2 (size, 0), NULL_RTX, 1);
2778       return convert_modes (mode, wider_mode, tem, unsignedp);
2779     }
2780
2781   if (target == 0)
2782     target = gen_reg_rtx (mode);
2783
2784   /* Firstly, try using a multiplication insn that only generates the needed
2785      high part of the product, and in the sign flavor of unsignedp.  */
2786   if (mul_highpart_cost[(int) mode] < max_cost)
2787     {
2788       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2789       target = expand_binop (mode, mul_highpart_optab,
2790                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2791       if (target)
2792         return target;
2793     }
2794
2795   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2796      Need to adjust the result after the multiplication.  */
2797   if (size - 1 < BITS_PER_WORD
2798       && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
2799           < max_cost))
2800     {
2801       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2802       target = expand_binop (mode, mul_highpart_optab,
2803                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2804       if (target)
2805         /* We used the wrong signedness.  Adjust the result.  */
2806         return expand_mult_highpart_adjust (mode, target, op0,
2807                                             op1, target, unsignedp);
2808     }
2809
2810   /* Try widening multiplication.  */
2811   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2812   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2813       && mul_widen_cost[(int) wider_mode] < max_cost)
2814     {
2815       op1 = force_reg (mode, op1);
2816       goto try;
2817     }
2818
2819   /* Try widening the mode and perform a non-widening multiplication.  */
2820   moptab = smul_optab;
2821   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2822       && size - 1 < BITS_PER_WORD
2823       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2824     {
2825       op1 = wide_op1;
2826       goto try;
2827     }
2828
2829   /* Try widening multiplication of opposite signedness, and adjust.  */
2830   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2831   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2832       && size - 1 < BITS_PER_WORD
2833       && (mul_widen_cost[(int) wider_mode]
2834           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2835     {
2836       rtx regop1 = force_reg (mode, op1);
2837       tem = expand_binop (wider_mode, moptab, op0, regop1,
2838                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2839       if (tem != 0)
2840         {
2841           /* Extract the high half of the just generated product.  */
2842           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2843                               build_int_2 (size, 0), NULL_RTX, 1);
2844           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2845           /* We used the wrong signedness.  Adjust the result.  */
2846           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2847                                               target, unsignedp);
2848         }
2849     }
2850
2851   return 0;
2852
2853  try:
2854   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2855   tem = expand_binop (wider_mode, moptab, op0, op1,
2856                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2857   if (tem == 0)
2858     return 0;
2859
2860   /* Extract the high half of the just generated product.  */
2861   if (mode == word_mode)
2862     {
2863       return gen_highpart (mode, tem);
2864     }
2865   else
2866     {
2867       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2868                           build_int_2 (size, 0), NULL_RTX, 1);
2869       return convert_modes (mode, wider_mode, tem, unsignedp);
2870     }
2871 }
2872 \f
2873 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2874    if that is convenient, and returning where the result is.
2875    You may request either the quotient or the remainder as the result;
2876    specify REM_FLAG nonzero to get the remainder.
2877
2878    CODE is the expression code for which kind of division this is;
2879    it controls how rounding is done.  MODE is the machine mode to use.
2880    UNSIGNEDP nonzero means do unsigned division.  */
2881
2882 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2883    and then correct it by or'ing in missing high bits
2884    if result of ANDI is nonzero.
2885    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2886    This could optimize to a bfexts instruction.
2887    But C doesn't use these operations, so their optimizations are
2888    left for later.  */
2889 /* ??? For modulo, we don't actually need the highpart of the first product,
2890    the low part will do nicely.  And for small divisors, the second multiply
2891    can also be a low-part only multiply or even be completely left out.
2892    E.g. to calculate the remainder of a division by 3 with a 32 bit
2893    multiply, multiply with 0x55555556 and extract the upper two bits;
2894    the result is exact for inputs up to 0x1fffffff.
2895    The input range can be reduced by using cross-sum rules.
2896    For odd divisors >= 3, the following table gives right shift counts
2897    so that if a number is shifted by an integer multiple of the given
2898    amount, the remainder stays the same:
2899    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
2900    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
2901    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
2902    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
2903    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
2904
2905    Cross-sum rules for even numbers can be derived by leaving as many bits
2906    to the right alone as the divisor has zeros to the right.
2907    E.g. if x is an unsigned 32 bit number:
2908    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
2909    */
2910
2911 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2912
2913 rtx
2914 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
2915                rtx op0, rtx op1, rtx target, int unsignedp)
2916 {
2917   enum machine_mode compute_mode;
2918   rtx tquotient;
2919   rtx quotient = 0, remainder = 0;
2920   rtx last;
2921   int size;
2922   rtx insn, set;
2923   optab optab1, optab2;
2924   int op1_is_constant, op1_is_pow2 = 0;
2925   int max_cost, extra_cost;
2926   static HOST_WIDE_INT last_div_const = 0;
2927   static HOST_WIDE_INT ext_op1;
2928
2929   op1_is_constant = GET_CODE (op1) == CONST_INT;
2930   if (op1_is_constant)
2931     {
2932       ext_op1 = INTVAL (op1);
2933       if (unsignedp)
2934         ext_op1 &= GET_MODE_MASK (mode);
2935       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
2936                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
2937     }
2938
2939   /*
2940      This is the structure of expand_divmod:
2941
2942      First comes code to fix up the operands so we can perform the operations
2943      correctly and efficiently.
2944
2945      Second comes a switch statement with code specific for each rounding mode.
2946      For some special operands this code emits all RTL for the desired
2947      operation, for other cases, it generates only a quotient and stores it in
2948      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2949      to indicate that it has not done anything.
2950
2951      Last comes code that finishes the operation.  If QUOTIENT is set and
2952      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2953      QUOTIENT is not set, it is computed using trunc rounding.
2954
2955      We try to generate special code for division and remainder when OP1 is a
2956      constant.  If |OP1| = 2**n we can use shifts and some other fast
2957      operations.  For other values of OP1, we compute a carefully selected
2958      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2959      by m.
2960
2961      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2962      half of the product.  Different strategies for generating the product are
2963      implemented in expand_mult_highpart.
2964
2965      If what we actually want is the remainder, we generate that by another
2966      by-constant multiplication and a subtraction.  */
2967
2968   /* We shouldn't be called with OP1 == const1_rtx, but some of the
2969      code below will malfunction if we are, so check here and handle
2970      the special case if so.  */
2971   if (op1 == const1_rtx)
2972     return rem_flag ? const0_rtx : op0;
2973
2974     /* When dividing by -1, we could get an overflow.
2975      negv_optab can handle overflows.  */
2976   if (! unsignedp && op1 == constm1_rtx)
2977     {
2978       if (rem_flag)
2979         return const0_rtx;
2980       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
2981                           ? negv_optab : neg_optab, op0, target, 0);
2982     }
2983
2984   if (target
2985       /* Don't use the function value register as a target
2986          since we have to read it as well as write it,
2987          and function-inlining gets confused by this.  */
2988       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
2989           /* Don't clobber an operand while doing a multi-step calculation.  */
2990           || ((rem_flag || op1_is_constant)
2991               && (reg_mentioned_p (target, op0)
2992                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
2993           || reg_mentioned_p (target, op1)
2994           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
2995     target = 0;
2996
2997   /* Get the mode in which to perform this computation.  Normally it will
2998      be MODE, but sometimes we can't do the desired operation in MODE.
2999      If so, pick a wider mode in which we can do the operation.  Convert
3000      to that mode at the start to avoid repeated conversions.
3001
3002      First see what operations we need.  These depend on the expression
3003      we are evaluating.  (We assume that divxx3 insns exist under the
3004      same conditions that modxx3 insns and that these insns don't normally
3005      fail.  If these assumptions are not correct, we may generate less
3006      efficient code in some cases.)
3007
3008      Then see if we find a mode in which we can open-code that operation
3009      (either a division, modulus, or shift).  Finally, check for the smallest
3010      mode for which we can do the operation with a library call.  */
3011
3012   /* We might want to refine this now that we have division-by-constant
3013      optimization.  Since expand_mult_highpart tries so many variants, it is
3014      not straightforward to generalize this.  Maybe we should make an array
3015      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3016
3017   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3018             ? (unsignedp ? lshr_optab : ashr_optab)
3019             : (unsignedp ? udiv_optab : sdiv_optab));
3020   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3021             ? optab1
3022             : (unsignedp ? udivmod_optab : sdivmod_optab));
3023
3024   for (compute_mode = mode; compute_mode != VOIDmode;
3025        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3026     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
3027         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
3028       break;
3029
3030   if (compute_mode == VOIDmode)
3031     for (compute_mode = mode; compute_mode != VOIDmode;
3032          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3033       if (optab1->handlers[(int) compute_mode].libfunc
3034           || optab2->handlers[(int) compute_mode].libfunc)
3035         break;
3036
3037   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3038      in expand_binop.  */
3039   if (compute_mode == VOIDmode)
3040     compute_mode = mode;
3041
3042   if (target && GET_MODE (target) == compute_mode)
3043     tquotient = target;
3044   else
3045     tquotient = gen_reg_rtx (compute_mode);
3046
3047   size = GET_MODE_BITSIZE (compute_mode);
3048 #if 0
3049   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3050      (mode), and thereby get better code when OP1 is a constant.  Do that
3051      later.  It will require going over all usages of SIZE below.  */
3052   size = GET_MODE_BITSIZE (mode);
3053 #endif
3054
3055   /* Only deduct something for a REM if the last divide done was
3056      for a different constant.   Then set the constant of the last
3057      divide.  */
3058   max_cost = div_cost[(int) compute_mode]
3059     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3060                       && INTVAL (op1) == last_div_const)
3061        ? mul_cost[(int) compute_mode] + add_cost : 0);
3062
3063   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3064
3065   /* Now convert to the best mode to use.  */
3066   if (compute_mode != mode)
3067     {
3068       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3069       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3070
3071       /* convert_modes may have placed op1 into a register, so we
3072          must recompute the following.  */
3073       op1_is_constant = GET_CODE (op1) == CONST_INT;
3074       op1_is_pow2 = (op1_is_constant
3075                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3076                           || (! unsignedp
3077                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3078     }
3079
3080   /* If one of the operands is a volatile MEM, copy it into a register.  */
3081
3082   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3083     op0 = force_reg (compute_mode, op0);
3084   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3085     op1 = force_reg (compute_mode, op1);
3086
3087   /* If we need the remainder or if OP1 is constant, we need to
3088      put OP0 in a register in case it has any queued subexpressions.  */
3089   if (rem_flag || op1_is_constant)
3090     op0 = force_reg (compute_mode, op0);
3091
3092   last = get_last_insn ();
3093
3094   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3095   if (unsignedp)
3096     {
3097       if (code == FLOOR_DIV_EXPR)
3098         code = TRUNC_DIV_EXPR;
3099       if (code == FLOOR_MOD_EXPR)
3100         code = TRUNC_MOD_EXPR;
3101       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3102         code = TRUNC_DIV_EXPR;
3103     }
3104
3105   if (op1 != const0_rtx)
3106     switch (code)
3107       {
3108       case TRUNC_MOD_EXPR:
3109       case TRUNC_DIV_EXPR:
3110         if (op1_is_constant)
3111           {
3112             if (unsignedp)
3113               {
3114                 unsigned HOST_WIDE_INT mh, ml;
3115                 int pre_shift, post_shift;
3116                 int dummy;
3117                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3118                                             & GET_MODE_MASK (compute_mode));
3119
3120                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3121                   {
3122                     pre_shift = floor_log2 (d);
3123                     if (rem_flag)
3124                       {
3125                         remainder
3126                           = expand_binop (compute_mode, and_optab, op0,
3127                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3128                                           remainder, 1,
3129                                           OPTAB_LIB_WIDEN);
3130                         if (remainder)
3131                           return gen_lowpart (mode, remainder);
3132                       }
3133                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3134                                              build_int_2 (pre_shift, 0),
3135                                              tquotient, 1);
3136                   }
3137                 else if (size <= HOST_BITS_PER_WIDE_INT)
3138                   {
3139                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3140                       {
3141                         /* Most significant bit of divisor is set; emit an scc
3142                            insn.  */
3143                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3144                                                     compute_mode, 1, 1);
3145                         if (quotient == 0)
3146                           goto fail1;
3147                       }
3148                     else
3149                       {
3150                         /* Find a suitable multiplier and right shift count
3151                            instead of multiplying with D.  */
3152
3153                         mh = choose_multiplier (d, size, size,
3154                                                 &ml, &post_shift, &dummy);
3155
3156                         /* If the suggested multiplier is more than SIZE bits,
3157                            we can do better for even divisors, using an
3158                            initial right shift.  */
3159                         if (mh != 0 && (d & 1) == 0)
3160                           {
3161                             pre_shift = floor_log2 (d & -d);
3162                             mh = choose_multiplier (d >> pre_shift, size,
3163                                                     size - pre_shift,
3164                                                     &ml, &post_shift, &dummy);
3165                             if (mh)
3166                               abort ();
3167                           }
3168                         else
3169                           pre_shift = 0;
3170
3171                         if (mh != 0)
3172                           {
3173                             rtx t1, t2, t3, t4;
3174
3175                             if (post_shift - 1 >= BITS_PER_WORD)
3176                               goto fail1;
3177
3178                             extra_cost = (shift_cost[post_shift - 1]
3179                                           + shift_cost[1] + 2 * add_cost);
3180                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3181                                                        NULL_RTX, 1,
3182                                                        max_cost - extra_cost);
3183                             if (t1 == 0)
3184                               goto fail1;
3185                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3186                                                                op0, t1),
3187                                                 NULL_RTX);
3188                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3189                                                build_int_2 (1, 0), NULL_RTX,1);
3190                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3191                                                               t1, t3),
3192                                                 NULL_RTX);
3193                             quotient
3194                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3195                                               build_int_2 (post_shift - 1, 0),
3196                                               tquotient, 1);
3197                           }
3198                         else
3199                           {
3200                             rtx t1, t2;
3201
3202                             if (pre_shift >= BITS_PER_WORD
3203                                 || post_shift >= BITS_PER_WORD)
3204                               goto fail1;
3205
3206                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3207                                                build_int_2 (pre_shift, 0),
3208                                                NULL_RTX, 1);
3209                             extra_cost = (shift_cost[pre_shift]
3210                                           + shift_cost[post_shift]);
3211                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3212                                                        NULL_RTX, 1,
3213                                                        max_cost - extra_cost);
3214                             if (t2 == 0)
3215                               goto fail1;
3216                             quotient
3217                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3218                                               build_int_2 (post_shift, 0),
3219                                               tquotient, 1);
3220                           }
3221                       }
3222                   }
3223                 else            /* Too wide mode to use tricky code */
3224                   break;
3225
3226                 insn = get_last_insn ();
3227                 if (insn != last
3228                     && (set = single_set (insn)) != 0
3229                     && SET_DEST (set) == quotient)
3230                   set_unique_reg_note (insn,
3231                                        REG_EQUAL,
3232                                        gen_rtx_UDIV (compute_mode, op0, op1));
3233               }
3234             else                /* TRUNC_DIV, signed */
3235               {
3236                 unsigned HOST_WIDE_INT ml;
3237                 int lgup, post_shift;
3238                 HOST_WIDE_INT d = INTVAL (op1);
3239                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3240
3241                 /* n rem d = n rem -d */
3242                 if (rem_flag && d < 0)
3243                   {
3244                     d = abs_d;
3245                     op1 = gen_int_mode (abs_d, compute_mode);
3246                   }
3247
3248                 if (d == 1)
3249                   quotient = op0;
3250                 else if (d == -1)
3251                   quotient = expand_unop (compute_mode, neg_optab, op0,
3252                                           tquotient, 0);
3253                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3254                   {
3255                     /* This case is not handled correctly below.  */
3256                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3257                                                 compute_mode, 1, 1);
3258                     if (quotient == 0)
3259                       goto fail1;
3260                   }
3261                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3262                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap)
3263                          /* ??? The cheap metric is computed only for
3264                             word_mode.  If this operation is wider, this may
3265                             not be so.  Assume true if the optab has an
3266                             expander for this mode.  */
3267                          && (((rem_flag ? smod_optab : sdiv_optab)
3268                               ->handlers[(int) compute_mode].insn_code
3269                               != CODE_FOR_nothing)
3270                              || (sdivmod_optab->handlers[(int) compute_mode]
3271                                  .insn_code != CODE_FOR_nothing)))
3272                   ;
3273                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3274                   {
3275                     lgup = floor_log2 (abs_d);
3276                     if (BRANCH_COST < 1 || (abs_d != 2 && BRANCH_COST < 3))
3277                       {
3278                         rtx label = gen_label_rtx ();
3279                         rtx t1;
3280
3281                         t1 = copy_to_mode_reg (compute_mode, op0);
3282                         do_cmp_and_jump (t1, const0_rtx, GE,
3283                                          compute_mode, label);
3284                         expand_inc (t1, gen_int_mode (abs_d - 1,
3285                                                       compute_mode));
3286                         emit_label (label);
3287                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3288                                                  build_int_2 (lgup, 0),
3289                                                  tquotient, 0);
3290                       }
3291                     else
3292                       {
3293                         rtx t1, t2, t3;
3294                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3295                                            build_int_2 (size - 1, 0),
3296                                            NULL_RTX, 0);
3297                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3298                                            build_int_2 (size - lgup, 0),
3299                                            NULL_RTX, 1);
3300                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3301                                                           op0, t2),
3302                                             NULL_RTX);
3303                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3304                                                  build_int_2 (lgup, 0),
3305                                                  tquotient, 0);
3306                       }
3307
3308                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3309                        the quotient.  */
3310                     if (d < 0)
3311                       {
3312                         insn = get_last_insn ();
3313                         if (insn != last
3314                             && (set = single_set (insn)) != 0
3315                             && SET_DEST (set) == quotient
3316                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3317                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3318                           set_unique_reg_note (insn,
3319                                                REG_EQUAL,
3320                                                gen_rtx_DIV (compute_mode,
3321                                                             op0,
3322                                                             GEN_INT
3323                                                             (trunc_int_for_mode
3324                                                              (abs_d,
3325                                                               compute_mode))));
3326
3327                         quotient = expand_unop (compute_mode, neg_optab,
3328                                                 quotient, quotient, 0);
3329                       }
3330                   }
3331                 else if (size <= HOST_BITS_PER_WIDE_INT)
3332                   {
3333                     choose_multiplier (abs_d, size, size - 1,
3334                                        &ml, &post_shift, &lgup);
3335                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3336                       {
3337                         rtx t1, t2, t3;
3338
3339                         if (post_shift >= BITS_PER_WORD
3340                             || size - 1 >= BITS_PER_WORD)
3341                           goto fail1;
3342
3343                         extra_cost = (shift_cost[post_shift]
3344                                       + shift_cost[size - 1] + add_cost);
3345                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3346                                                    NULL_RTX, 0,
3347                                                    max_cost - extra_cost);
3348                         if (t1 == 0)
3349                           goto fail1;
3350                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3351                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3352                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3353                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3354                         if (d < 0)
3355                           quotient
3356                             = force_operand (gen_rtx_MINUS (compute_mode,
3357                                                             t3, t2),
3358                                              tquotient);
3359                         else
3360                           quotient
3361                             = force_operand (gen_rtx_MINUS (compute_mode,
3362                                                             t2, t3),
3363                                              tquotient);
3364                       }
3365                     else
3366                       {
3367                         rtx t1, t2, t3, t4;
3368
3369                         if (post_shift >= BITS_PER_WORD
3370                             || size - 1 >= BITS_PER_WORD)
3371                           goto fail1;
3372
3373                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3374                         extra_cost = (shift_cost[post_shift]
3375                                       + shift_cost[size - 1] + 2 * add_cost);
3376                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3377                                                    NULL_RTX, 0,
3378                                                    max_cost - extra_cost);
3379                         if (t1 == 0)
3380                           goto fail1;
3381                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3382                                                           t1, op0),
3383                                             NULL_RTX);
3384                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3385                                            build_int_2 (post_shift, 0),
3386                                            NULL_RTX, 0);
3387                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3388                                            build_int_2 (size - 1, 0),
3389                                            NULL_RTX, 0);
3390                         if (d < 0)
3391                           quotient
3392                             = force_operand (gen_rtx_MINUS (compute_mode,
3393                                                             t4, t3),
3394                                              tquotient);
3395                         else
3396                           quotient
3397                             = force_operand (gen_rtx_MINUS (compute_mode,
3398                                                             t3, t4),
3399                                              tquotient);
3400                       }
3401                   }
3402                 else            /* Too wide mode to use tricky code */
3403                   break;
3404
3405                 insn = get_last_insn ();
3406                 if (insn != last
3407                     && (set = single_set (insn)) != 0
3408                     && SET_DEST (set) == quotient)
3409                   set_unique_reg_note (insn,
3410                                        REG_EQUAL,
3411                                        gen_rtx_DIV (compute_mode, op0, op1));
3412               }
3413             break;
3414           }
3415       fail1:
3416         delete_insns_since (last);
3417         break;
3418
3419       case FLOOR_DIV_EXPR:
3420       case FLOOR_MOD_EXPR:
3421       /* We will come here only for signed operations.  */
3422         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3423           {
3424             unsigned HOST_WIDE_INT mh, ml;
3425             int pre_shift, lgup, post_shift;
3426             HOST_WIDE_INT d = INTVAL (op1);
3427
3428             if (d > 0)
3429               {
3430                 /* We could just as easily deal with negative constants here,
3431                    but it does not seem worth the trouble for GCC 2.6.  */
3432                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3433                   {
3434                     pre_shift = floor_log2 (d);
3435                     if (rem_flag)
3436                       {
3437                         remainder = expand_binop (compute_mode, and_optab, op0,
3438                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3439                                                   remainder, 0, OPTAB_LIB_WIDEN);
3440                         if (remainder)
3441                           return gen_lowpart (mode, remainder);
3442                       }
3443                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3444                                              build_int_2 (pre_shift, 0),
3445                                              tquotient, 0);
3446                   }
3447                 else
3448                   {
3449                     rtx t1, t2, t3, t4;
3450
3451                     mh = choose_multiplier (d, size, size - 1,
3452                                             &ml, &post_shift, &lgup);
3453                     if (mh)
3454                       abort ();
3455
3456                     if (post_shift < BITS_PER_WORD
3457                         && size - 1 < BITS_PER_WORD)
3458                       {
3459                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3460                                            build_int_2 (size - 1, 0),
3461                                            NULL_RTX, 0);
3462                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3463                                            NULL_RTX, 0, OPTAB_WIDEN);
3464                         extra_cost = (shift_cost[post_shift]
3465                                       + shift_cost[size - 1] + 2 * add_cost);
3466                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3467                                                    NULL_RTX, 1,
3468                                                    max_cost - extra_cost);
3469                         if (t3 != 0)
3470                           {
3471                             t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3472                                                build_int_2 (post_shift, 0),
3473                                                NULL_RTX, 1);
3474                             quotient = expand_binop (compute_mode, xor_optab,
3475                                                      t4, t1, tquotient, 0,
3476                                                      OPTAB_WIDEN);
3477                           }
3478                       }
3479                   }
3480               }
3481             else
3482               {
3483                 rtx nsign, t1, t2, t3, t4;
3484                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3485                                                   op0, constm1_rtx), NULL_RTX);
3486                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3487                                    0, OPTAB_WIDEN);
3488                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3489                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3490                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3491                                     NULL_RTX);
3492                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3493                                     NULL_RTX, 0);
3494                 if (t4)
3495                   {
3496                     rtx t5;
3497                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3498                                       NULL_RTX, 0);
3499                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3500                                                             t4, t5),
3501                                               tquotient);
3502                   }
3503               }
3504           }
3505
3506         if (quotient != 0)
3507           break;
3508         delete_insns_since (last);
3509
3510         /* Try using an instruction that produces both the quotient and
3511            remainder, using truncation.  We can easily compensate the quotient
3512            or remainder to get floor rounding, once we have the remainder.
3513            Notice that we compute also the final remainder value here,
3514            and return the result right away.  */
3515         if (target == 0 || GET_MODE (target) != compute_mode)
3516           target = gen_reg_rtx (compute_mode);
3517
3518         if (rem_flag)
3519           {
3520             remainder
3521               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3522             quotient = gen_reg_rtx (compute_mode);
3523           }
3524         else
3525           {
3526             quotient
3527               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3528             remainder = gen_reg_rtx (compute_mode);
3529           }
3530
3531         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3532                                  quotient, remainder, 0))
3533           {
3534             /* This could be computed with a branch-less sequence.
3535                Save that for later.  */
3536             rtx tem;
3537             rtx label = gen_label_rtx ();
3538             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3539             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3540                                 NULL_RTX, 0, OPTAB_WIDEN);
3541             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3542             expand_dec (quotient, const1_rtx);
3543             expand_inc (remainder, op1);
3544             emit_label (label);
3545             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3546           }
3547
3548         /* No luck with division elimination or divmod.  Have to do it
3549            by conditionally adjusting op0 *and* the result.  */
3550         {
3551           rtx label1, label2, label3, label4, label5;
3552           rtx adjusted_op0;
3553           rtx tem;
3554
3555           quotient = gen_reg_rtx (compute_mode);
3556           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3557           label1 = gen_label_rtx ();
3558           label2 = gen_label_rtx ();
3559           label3 = gen_label_rtx ();
3560           label4 = gen_label_rtx ();
3561           label5 = gen_label_rtx ();
3562           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3563           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3564           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3565                               quotient, 0, OPTAB_LIB_WIDEN);
3566           if (tem != quotient)
3567             emit_move_insn (quotient, tem);
3568           emit_jump_insn (gen_jump (label5));
3569           emit_barrier ();
3570           emit_label (label1);
3571           expand_inc (adjusted_op0, const1_rtx);
3572           emit_jump_insn (gen_jump (label4));
3573           emit_barrier ();
3574           emit_label (label2);
3575           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3576           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3577                               quotient, 0, OPTAB_LIB_WIDEN);
3578           if (tem != quotient)
3579             emit_move_insn (quotient, tem);
3580           emit_jump_insn (gen_jump (label5));
3581           emit_barrier ();
3582           emit_label (label3);
3583           expand_dec (adjusted_op0, const1_rtx);
3584           emit_label (label4);
3585           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3586                               quotient, 0, OPTAB_LIB_WIDEN);
3587           if (tem != quotient)
3588             emit_move_insn (quotient, tem);
3589           expand_dec (quotient, const1_rtx);
3590           emit_label (label5);
3591         }
3592         break;
3593
3594       case CEIL_DIV_EXPR:
3595       case CEIL_MOD_EXPR:
3596         if (unsignedp)
3597           {
3598             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3599               {
3600                 rtx t1, t2, t3;
3601                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3602                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3603                                    build_int_2 (floor_log2 (d), 0),
3604                                    tquotient, 1);
3605                 t2 = expand_binop (compute_mode, and_optab, op0,
3606                                    GEN_INT (d - 1),
3607                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3608                 t3 = gen_reg_rtx (compute_mode);
3609                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3610                                       compute_mode, 1, 1);
3611                 if (t3 == 0)
3612                   {
3613                     rtx lab;
3614                     lab = gen_label_rtx ();
3615                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3616                     expand_inc (t1, const1_rtx);
3617                     emit_label (lab);
3618                     quotient = t1;
3619                   }
3620                 else
3621                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3622                                                           t1, t3),
3623                                             tquotient);
3624                 break;
3625               }
3626
3627             /* Try using an instruction that produces both the quotient and
3628                remainder, using truncation.  We can easily compensate the
3629                quotient or remainder to get ceiling rounding, once we have the
3630                remainder.  Notice that we compute also the final remainder
3631                value here, and return the result right away.  */
3632             if (target == 0 || GET_MODE (target) != compute_mode)
3633               target = gen_reg_rtx (compute_mode);
3634
3635             if (rem_flag)
3636               {
3637                 remainder = (GET_CODE (target) == REG
3638                              ? target : gen_reg_rtx (compute_mode));
3639                 quotient = gen_reg_rtx (compute_mode);
3640               }
3641             else
3642               {
3643                 quotient = (GET_CODE (target) == REG
3644                             ? target : gen_reg_rtx (compute_mode));
3645                 remainder = gen_reg_rtx (compute_mode);
3646               }
3647
3648             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3649                                      remainder, 1))
3650               {
3651                 /* This could be computed with a branch-less sequence.
3652                    Save that for later.  */
3653                 rtx label = gen_label_rtx ();
3654                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3655                                  compute_mode, label);
3656                 expand_inc (quotient, const1_rtx);
3657                 expand_dec (remainder, op1);
3658                 emit_label (label);
3659                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3660               }
3661
3662             /* No luck with division elimination or divmod.  Have to do it
3663                by conditionally adjusting op0 *and* the result.  */
3664             {
3665               rtx label1, label2;
3666               rtx adjusted_op0, tem;
3667
3668               quotient = gen_reg_rtx (compute_mode);
3669               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3670               label1 = gen_label_rtx ();
3671               label2 = gen_label_rtx ();
3672               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3673                                compute_mode, label1);
3674               emit_move_insn  (quotient, const0_rtx);
3675               emit_jump_insn (gen_jump (label2));
3676               emit_barrier ();
3677               emit_label (label1);
3678               expand_dec (adjusted_op0, const1_rtx);
3679               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3680                                   quotient, 1, OPTAB_LIB_WIDEN);
3681               if (tem != quotient)
3682                 emit_move_insn (quotient, tem);
3683               expand_inc (quotient, const1_rtx);
3684               emit_label (label2);
3685             }
3686           }
3687         else /* signed */
3688           {
3689             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3690                 && INTVAL (op1) >= 0)
3691               {
3692                 /* This is extremely similar to the code for the unsigned case
3693                    above.  For 2.7 we should merge these variants, but for
3694                    2.6.1 I don't want to touch the code for unsigned since that
3695                    get used in C.  The signed case will only be used by other
3696                    languages (Ada).  */
3697
3698                 rtx t1, t2, t3;
3699                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3700                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3701                                    build_int_2 (floor_log2 (d), 0),
3702                                    tquotient, 0);
3703                 t2 = expand_binop (compute_mode, and_optab, op0,
3704                                    GEN_INT (d - 1),
3705                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3706                 t3 = gen_reg_rtx (compute_mode);
3707                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3708                                       compute_mode, 1, 1);
3709                 if (t3 == 0)
3710                   {
3711                     rtx lab;
3712                     lab = gen_label_rtx ();
3713                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3714                     expand_inc (t1, const1_rtx);
3715                     emit_label (lab);
3716                     quotient = t1;
3717                   }
3718                 else
3719                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3720                                                           t1, t3),
3721                                             tquotient);
3722                 break;
3723               }
3724
3725             /* Try using an instruction that produces both the quotient and
3726                remainder, using truncation.  We can easily compensate the
3727                quotient or remainder to get ceiling rounding, once we have the
3728                remainder.  Notice that we compute also the final remainder
3729                value here, and return the result right away.  */
3730             if (target == 0 || GET_MODE (target) != compute_mode)
3731               target = gen_reg_rtx (compute_mode);
3732             if (rem_flag)
3733               {
3734                 remainder= (GET_CODE (target) == REG
3735                             ? target : gen_reg_rtx (compute_mode));
3736                 quotient = gen_reg_rtx (compute_mode);
3737               }
3738             else
3739               {
3740                 quotient = (GET_CODE (target) == REG
3741                             ? target : gen_reg_rtx (compute_mode));
3742                 remainder = gen_reg_rtx (compute_mode);
3743               }
3744
3745             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3746                                      remainder, 0))
3747               {
3748                 /* This could be computed with a branch-less sequence.
3749                    Save that for later.  */
3750                 rtx tem;
3751                 rtx label = gen_label_rtx ();
3752                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3753                                  compute_mode, label);
3754                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3755                                     NULL_RTX, 0, OPTAB_WIDEN);
3756                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3757                 expand_inc (quotient, const1_rtx);
3758                 expand_dec (remainder, op1);
3759                 emit_label (label);
3760                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3761               }
3762
3763             /* No luck with division elimination or divmod.  Have to do it
3764                by conditionally adjusting op0 *and* the result.  */
3765             {
3766               rtx label1, label2, label3, label4, label5;
3767               rtx adjusted_op0;
3768               rtx tem;
3769
3770               quotient = gen_reg_rtx (compute_mode);
3771               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3772               label1 = gen_label_rtx ();
3773               label2 = gen_label_rtx ();
3774               label3 = gen_label_rtx ();
3775               label4 = gen_label_rtx ();
3776               label5 = gen_label_rtx ();
3777               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3778               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3779                                compute_mode, label1);
3780               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3781                                   quotient, 0, OPTAB_LIB_WIDEN);
3782               if (tem != quotient)
3783                 emit_move_insn (quotient, tem);
3784               emit_jump_insn (gen_jump (label5));
3785               emit_barrier ();
3786               emit_label (label1);
3787               expand_dec (adjusted_op0, const1_rtx);
3788               emit_jump_insn (gen_jump (label4));
3789               emit_barrier ();
3790               emit_label (label2);
3791               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3792                                compute_mode, label3);
3793               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3794                                   quotient, 0, OPTAB_LIB_WIDEN);
3795               if (tem != quotient)
3796                 emit_move_insn (quotient, tem);
3797               emit_jump_insn (gen_jump (label5));
3798               emit_barrier ();
3799               emit_label (label3);
3800               expand_inc (adjusted_op0, const1_rtx);
3801               emit_label (label4);
3802               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3803                                   quotient, 0, OPTAB_LIB_WIDEN);
3804               if (tem != quotient)
3805                 emit_move_insn (quotient, tem);
3806               expand_inc (quotient, const1_rtx);
3807               emit_label (label5);
3808             }
3809           }
3810         break;
3811
3812       case EXACT_DIV_EXPR:
3813         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3814           {
3815             HOST_WIDE_INT d = INTVAL (op1);
3816             unsigned HOST_WIDE_INT ml;
3817             int pre_shift;
3818             rtx t1;
3819
3820             pre_shift = floor_log2 (d & -d);
3821             ml = invert_mod2n (d >> pre_shift, size);
3822             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3823                                build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
3824             quotient = expand_mult (compute_mode, t1,
3825                                     gen_int_mode (ml, compute_mode),
3826                                     NULL_RTX, 1);
3827
3828             insn = get_last_insn ();
3829             set_unique_reg_note (insn,
3830                                  REG_EQUAL,
3831                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3832                                                  compute_mode,
3833                                                  op0, op1));
3834           }
3835         break;
3836
3837       case ROUND_DIV_EXPR:
3838       case ROUND_MOD_EXPR:
3839         if (unsignedp)
3840           {
3841             rtx tem;
3842             rtx label;
3843             label = gen_label_rtx ();
3844             quotient = gen_reg_rtx (compute_mode);
3845             remainder = gen_reg_rtx (compute_mode);
3846             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3847               {
3848                 rtx tem;
3849                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3850                                          quotient, 1, OPTAB_LIB_WIDEN);
3851                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3852                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3853                                           remainder, 1, OPTAB_LIB_WIDEN);
3854               }
3855             tem = plus_constant (op1, -1);
3856             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3857                                 build_int_2 (1, 0), NULL_RTX, 1);
3858             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3859             expand_inc (quotient, const1_rtx);
3860             expand_dec (remainder, op1);
3861             emit_label (label);
3862           }
3863         else
3864           {
3865             rtx abs_rem, abs_op1, tem, mask;
3866             rtx label;
3867             label = gen_label_rtx ();
3868             quotient = gen_reg_rtx (compute_mode);
3869             remainder = gen_reg_rtx (compute_mode);
3870             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3871               {
3872                 rtx tem;
3873                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3874                                          quotient, 0, OPTAB_LIB_WIDEN);
3875                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3876                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3877                                           remainder, 0, OPTAB_LIB_WIDEN);
3878               }
3879             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
3880             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
3881             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3882                                 build_int_2 (1, 0), NULL_RTX, 1);
3883             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3884             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3885                                 NULL_RTX, 0, OPTAB_WIDEN);
3886             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3887                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3888             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3889                                 NULL_RTX, 0, OPTAB_WIDEN);
3890             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3891                                 NULL_RTX, 0, OPTAB_WIDEN);
3892             expand_inc (quotient, tem);
3893             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3894                                 NULL_RTX, 0, OPTAB_WIDEN);
3895             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3896                                 NULL_RTX, 0, OPTAB_WIDEN);
3897             expand_dec (remainder, tem);
3898             emit_label (label);
3899           }
3900         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3901
3902       default:
3903         abort ();
3904       }
3905
3906   if (quotient == 0)
3907     {
3908       if (target && GET_MODE (target) != compute_mode)
3909         target = 0;
3910
3911       if (rem_flag)
3912         {
3913           /* Try to produce the remainder without producing the quotient.
3914              If we seem to have a divmod pattern that does not require widening,
3915              don't try widening here.  We should really have a WIDEN argument
3916              to expand_twoval_binop, since what we'd really like to do here is
3917              1) try a mod insn in compute_mode
3918              2) try a divmod insn in compute_mode
3919              3) try a div insn in compute_mode and multiply-subtract to get
3920                 remainder
3921              4) try the same things with widening allowed.  */
3922           remainder
3923             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3924                                  op0, op1, target,
3925                                  unsignedp,
3926                                  ((optab2->handlers[(int) compute_mode].insn_code
3927                                    != CODE_FOR_nothing)
3928                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
3929           if (remainder == 0)
3930             {
3931               /* No luck there.  Can we do remainder and divide at once
3932                  without a library call?  */
3933               remainder = gen_reg_rtx (compute_mode);
3934               if (! expand_twoval_binop ((unsignedp
3935                                           ? udivmod_optab
3936                                           : sdivmod_optab),
3937                                          op0, op1,
3938                                          NULL_RTX, remainder, unsignedp))
3939                 remainder = 0;
3940             }
3941
3942           if (remainder)
3943             return gen_lowpart (mode, remainder);
3944         }
3945
3946       /* Produce the quotient.  Try a quotient insn, but not a library call.
3947          If we have a divmod in this mode, use it in preference to widening
3948          the div (for this test we assume it will not fail). Note that optab2
3949          is set to the one of the two optabs that the call below will use.  */
3950       quotient
3951         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3952                              op0, op1, rem_flag ? NULL_RTX : target,
3953                              unsignedp,
3954                              ((optab2->handlers[(int) compute_mode].insn_code
3955                                != CODE_FOR_nothing)
3956                               ? OPTAB_DIRECT : OPTAB_WIDEN));
3957
3958       if (quotient == 0)
3959         {
3960           /* No luck there.  Try a quotient-and-remainder insn,
3961              keeping the quotient alone.  */
3962           quotient = gen_reg_rtx (compute_mode);
3963           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3964                                      op0, op1,
3965                                      quotient, NULL_RTX, unsignedp))
3966             {
3967               quotient = 0;
3968               if (! rem_flag)
3969                 /* Still no luck.  If we are not computing the remainder,
3970                    use a library call for the quotient.  */
3971                 quotient = sign_expand_binop (compute_mode,
3972                                               udiv_optab, sdiv_optab,
3973                                               op0, op1, target,
3974                                               unsignedp, OPTAB_LIB_WIDEN);
3975             }
3976         }
3977     }
3978
3979   if (rem_flag)
3980     {
3981       if (target && GET_MODE (target) != compute_mode)
3982         target = 0;
3983
3984       if (quotient == 0)
3985         /* No divide instruction either.  Use library for remainder.  */
3986         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3987                                        op0, op1, target,
3988                                        unsignedp, OPTAB_LIB_WIDEN);
3989       else
3990         {
3991           /* We divided.  Now finish doing X - Y * (X / Y).  */
3992           remainder = expand_mult (compute_mode, quotient, op1,
3993                                    NULL_RTX, unsignedp);
3994           remainder = expand_binop (compute_mode, sub_optab, op0,
3995                                     remainder, target, unsignedp,
3996                                     OPTAB_LIB_WIDEN);
3997         }
3998     }
3999
4000   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4001 }
4002 \f
4003 /* Return a tree node with data type TYPE, describing the value of X.
4004    Usually this is an RTL_EXPR, if there is no obvious better choice.
4005    X may be an expression, however we only support those expressions
4006    generated by loop.c.  */
4007
4008 tree
4009 make_tree (tree type, rtx x)
4010 {
4011   tree t;
4012
4013   switch (GET_CODE (x))
4014     {
4015     case CONST_INT:
4016       t = build_int_2 (INTVAL (x),
4017                        (TREE_UNSIGNED (type)
4018                         && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
4019                        || INTVAL (x) >= 0 ? 0 : -1);
4020       TREE_TYPE (t) = type;
4021       return t;
4022
4023     case CONST_DOUBLE:
4024       if (GET_MODE (x) == VOIDmode)
4025         {
4026           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4027           TREE_TYPE (t) = type;
4028         }
4029       else
4030         {
4031           REAL_VALUE_TYPE d;
4032
4033           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4034           t = build_real (type, d);
4035         }
4036
4037       return t;
4038
4039     case CONST_VECTOR:
4040       {
4041         int i, units;
4042         rtx elt;
4043         tree t = NULL_TREE;
4044
4045         units = CONST_VECTOR_NUNITS (x);
4046
4047         /* Build a tree with vector elements.  */
4048         for (i = units - 1; i >= 0; --i)
4049           {
4050             elt = CONST_VECTOR_ELT (x, i);
4051             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4052           }
4053
4054         return build_vector (type, t);
4055       }
4056
4057     case PLUS:
4058       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4059                           make_tree (type, XEXP (x, 1))));
4060
4061     case MINUS:
4062       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4063                           make_tree (type, XEXP (x, 1))));
4064
4065     case NEG:
4066       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4067
4068     case MULT:
4069       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4070                           make_tree (type, XEXP (x, 1))));
4071
4072     case ASHIFT:
4073       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4074                           make_tree (type, XEXP (x, 1))));
4075
4076     case LSHIFTRT:
4077       t = (*lang_hooks.types.unsigned_type) (type);
4078       return fold (convert (type,
4079                             build (RSHIFT_EXPR, t,
4080                                    make_tree (t, XEXP (x, 0)),
4081                                    make_tree (type, XEXP (x, 1)))));
4082
4083     case ASHIFTRT:
4084       t = (*lang_hooks.types.signed_type) (type);
4085       return fold (convert (type,
4086                             build (RSHIFT_EXPR, t,
4087                                    make_tree (t, XEXP (x, 0)),
4088                                    make_tree (type, XEXP (x, 1)))));
4089
4090     case DIV:
4091       if (TREE_CODE (type) != REAL_TYPE)
4092         t = (*lang_hooks.types.signed_type) (type);
4093       else
4094         t = type;
4095
4096       return fold (convert (type,
4097                             build (TRUNC_DIV_EXPR, t,
4098                                    make_tree (t, XEXP (x, 0)),
4099                                    make_tree (t, XEXP (x, 1)))));
4100     case UDIV:
4101       t = (*lang_hooks.types.unsigned_type) (type);
4102       return fold (convert (type,
4103                             build (TRUNC_DIV_EXPR, t,
4104                                    make_tree (t, XEXP (x, 0)),
4105                                    make_tree (t, XEXP (x, 1)))));
4106
4107     case SIGN_EXTEND:
4108     case ZERO_EXTEND:
4109       t = (*lang_hooks.types.type_for_mode) (GET_MODE (XEXP (x, 0)),
4110                                              GET_CODE (x) == ZERO_EXTEND);
4111       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4112
4113    default:
4114       t = make_node (RTL_EXPR);
4115       TREE_TYPE (t) = type;
4116
4117       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4118          ptr_mode.  So convert.  */
4119       if (POINTER_TYPE_P (type))
4120         x = convert_memory_address (TYPE_MODE (type), x);
4121
4122       RTL_EXPR_RTL (t) = x;
4123       /* There are no insns to be output
4124          when this rtl_expr is used.  */
4125       RTL_EXPR_SEQUENCE (t) = 0;
4126       return t;
4127     }
4128 }
4129
4130 /* Check whether the multiplication X * MULT + ADD overflows.
4131    X, MULT and ADD must be CONST_*.
4132    MODE is the machine mode for the computation.
4133    X and MULT must have mode MODE.  ADD may have a different mode.
4134    So can X (defaults to same as MODE).
4135    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4136
4137 bool
4138 const_mult_add_overflow_p (rtx x, rtx mult, rtx add, enum machine_mode mode, int unsignedp)
4139 {
4140   tree type, mult_type, add_type, result;
4141
4142   type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4143
4144   /* In order to get a proper overflow indication from an unsigned
4145      type, we have to pretend that it's a sizetype.  */
4146   mult_type = type;
4147   if (unsignedp)
4148     {
4149       mult_type = copy_node (type);
4150       TYPE_IS_SIZETYPE (mult_type) = 1;
4151     }
4152
4153   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4154               : (*lang_hooks.types.type_for_mode) (GET_MODE (add), unsignedp));
4155
4156   result = fold (build (PLUS_EXPR, mult_type,
4157                         fold (build (MULT_EXPR, mult_type,
4158                                      make_tree (mult_type, x),
4159                                      make_tree (mult_type, mult))),
4160                         make_tree (add_type, add)));
4161
4162   return TREE_CONSTANT_OVERFLOW (result);
4163 }
4164
4165 /* Return an rtx representing the value of X * MULT + ADD.
4166    TARGET is a suggestion for where to store the result (an rtx).
4167    MODE is the machine mode for the computation.
4168    X and MULT must have mode MODE.  ADD may have a different mode.
4169    So can X (defaults to same as MODE).
4170    UNSIGNEDP is nonzero to do unsigned multiplication.
4171    This may emit insns.  */
4172
4173 rtx
4174 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4175                  int unsignedp)
4176 {
4177   tree type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4178   tree add_type = (GET_MODE (add) == VOIDmode
4179                    ? type: (*lang_hooks.types.type_for_mode) (GET_MODE (add),
4180                                                               unsignedp));
4181   tree result =  fold (build (PLUS_EXPR, type,
4182                               fold (build (MULT_EXPR, type,
4183                                            make_tree (type, x),
4184                                            make_tree (type, mult))),
4185                               make_tree (add_type, add)));
4186
4187   return expand_expr (result, target, VOIDmode, 0);
4188 }
4189 \f
4190 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4191    and returning TARGET.
4192
4193    If TARGET is 0, a pseudo-register or constant is returned.  */
4194
4195 rtx
4196 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4197 {
4198   rtx tem = 0;
4199
4200   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4201     tem = simplify_binary_operation (AND, mode, op0, op1);
4202   if (tem == 0)
4203     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4204
4205   if (target == 0)
4206     target = tem;
4207   else if (tem != target)
4208     emit_move_insn (target, tem);
4209   return target;
4210 }
4211 \f
4212 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4213    and storing in TARGET.  Normally return TARGET.
4214    Return 0 if that cannot be done.
4215
4216    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4217    it is VOIDmode, they cannot both be CONST_INT.
4218
4219    UNSIGNEDP is for the case where we have to widen the operands
4220    to perform the operation.  It says to use zero-extension.
4221
4222    NORMALIZEP is 1 if we should convert the result to be either zero
4223    or one.  Normalize is -1 if we should convert the result to be
4224    either zero or -1.  If NORMALIZEP is zero, the result will be left
4225    "raw" out of the scc insn.  */
4226
4227 rtx
4228 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4229                  enum machine_mode mode, int unsignedp, int normalizep)
4230 {
4231   rtx subtarget;
4232   enum insn_code icode;
4233   enum machine_mode compare_mode;
4234   enum machine_mode target_mode = GET_MODE (target);
4235   rtx tem;
4236   rtx last = get_last_insn ();
4237   rtx pattern, comparison;
4238
4239   /* ??? Ok to do this and then fail? */
4240   op0 = protect_from_queue (op0, 0);
4241   op1 = protect_from_queue (op1, 0);
4242
4243   if (unsignedp)
4244     code = unsigned_condition (code);
4245
4246   /* If one operand is constant, make it the second one.  Only do this
4247      if the other operand is not constant as well.  */
4248
4249   if (swap_commutative_operands_p (op0, op1))
4250     {
4251       tem = op0;
4252       op0 = op1;
4253       op1 = tem;
4254       code = swap_condition (code);
4255     }
4256
4257   if (mode == VOIDmode)
4258     mode = GET_MODE (op0);
4259
4260   /* For some comparisons with 1 and -1, we can convert this to
4261      comparisons with zero.  This will often produce more opportunities for
4262      store-flag insns.  */
4263
4264   switch (code)
4265     {
4266     case LT:
4267       if (op1 == const1_rtx)
4268         op1 = const0_rtx, code = LE;
4269       break;
4270     case LE:
4271       if (op1 == constm1_rtx)
4272         op1 = const0_rtx, code = LT;
4273       break;
4274     case GE:
4275       if (op1 == const1_rtx)
4276         op1 = const0_rtx, code = GT;
4277       break;
4278     case GT:
4279       if (op1 == constm1_rtx)
4280         op1 = const0_rtx, code = GE;
4281       break;
4282     case GEU:
4283       if (op1 == const1_rtx)
4284         op1 = const0_rtx, code = NE;
4285       break;
4286     case LTU:
4287       if (op1 == const1_rtx)
4288         op1 = const0_rtx, code = EQ;
4289       break;
4290     default:
4291       break;
4292     }
4293
4294   /* If we are comparing a double-word integer with zero, we can convert
4295      the comparison into one involving a single word.  */
4296   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4297       && GET_MODE_CLASS (mode) == MODE_INT
4298       && op1 == const0_rtx
4299       && (GET_CODE (op0) != MEM || ! MEM_VOLATILE_P (op0)))
4300     {
4301       if (code == EQ || code == NE)
4302         {
4303           rtx op00, op01, op0both;
4304
4305           /* Do a logical OR of the two words and compare the result.  */
4306           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
4307           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
4308           op0both = expand_binop (word_mode, ior_optab, op00, op01,
4309                                   NULL_RTX, unsignedp, OPTAB_DIRECT);
4310           if (op0both != 0)
4311             return emit_store_flag (target, code, op0both, op1, word_mode,
4312                                     unsignedp, normalizep);
4313         }
4314       else if (code == LT || code == GE)
4315         {
4316           rtx op0h;
4317
4318           /* If testing the sign bit, can just test on high word.  */
4319           op0h = simplify_gen_subreg (word_mode, op0, mode,
4320                                       subreg_highpart_offset (word_mode, mode));
4321           return emit_store_flag (target, code, op0h, op1, word_mode,
4322                                   unsignedp, normalizep);
4323         }
4324     }
4325
4326   /* From now on, we won't change CODE, so set ICODE now.  */
4327   icode = setcc_gen_code[(int) code];
4328
4329   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4330      complement of A (for GE) and shifting the sign bit to the low bit.  */
4331   if (op1 == const0_rtx && (code == LT || code == GE)
4332       && GET_MODE_CLASS (mode) == MODE_INT
4333       && (normalizep || STORE_FLAG_VALUE == 1
4334           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4335               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4336                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4337     {
4338       subtarget = target;
4339
4340       /* If the result is to be wider than OP0, it is best to convert it
4341          first.  If it is to be narrower, it is *incorrect* to convert it
4342          first.  */
4343       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4344         {
4345           op0 = protect_from_queue (op0, 0);
4346           op0 = convert_modes (target_mode, mode, op0, 0);
4347           mode = target_mode;
4348         }
4349
4350       if (target_mode != mode)
4351         subtarget = 0;
4352
4353       if (code == GE)
4354         op0 = expand_unop (mode, one_cmpl_optab, op0,
4355                            ((STORE_FLAG_VALUE == 1 || normalizep)
4356                             ? 0 : subtarget), 0);
4357
4358       if (STORE_FLAG_VALUE == 1 || normalizep)
4359         /* If we are supposed to produce a 0/1 value, we want to do
4360            a logical shift from the sign bit to the low-order bit; for
4361            a -1/0 value, we do an arithmetic shift.  */
4362         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4363                             size_int (GET_MODE_BITSIZE (mode) - 1),
4364                             subtarget, normalizep != -1);
4365
4366       if (mode != target_mode)
4367         op0 = convert_modes (target_mode, mode, op0, 0);
4368
4369       return op0;
4370     }
4371
4372   if (icode != CODE_FOR_nothing)
4373     {
4374       insn_operand_predicate_fn pred;
4375
4376       /* We think we may be able to do this with a scc insn.  Emit the
4377          comparison and then the scc insn.
4378
4379          compare_from_rtx may call emit_queue, which would be deleted below
4380          if the scc insn fails.  So call it ourselves before setting LAST.
4381          Likewise for do_pending_stack_adjust.  */
4382
4383       emit_queue ();
4384       do_pending_stack_adjust ();
4385       last = get_last_insn ();
4386
4387       comparison
4388         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4389       if (GET_CODE (comparison) == CONST_INT)
4390         return (comparison == const0_rtx ? const0_rtx
4391                 : normalizep == 1 ? const1_rtx
4392                 : normalizep == -1 ? constm1_rtx
4393                 : const_true_rtx);
4394
4395       /* The code of COMPARISON may not match CODE if compare_from_rtx
4396          decided to swap its operands and reverse the original code.
4397
4398          We know that compare_from_rtx returns either a CONST_INT or
4399          a new comparison code, so it is safe to just extract the
4400          code from COMPARISON.  */
4401       code = GET_CODE (comparison);
4402
4403       /* Get a reference to the target in the proper mode for this insn.  */
4404       compare_mode = insn_data[(int) icode].operand[0].mode;
4405       subtarget = target;
4406       pred = insn_data[(int) icode].operand[0].predicate;
4407       if (preserve_subexpressions_p ()
4408           || ! (*pred) (subtarget, compare_mode))
4409         subtarget = gen_reg_rtx (compare_mode);
4410
4411       pattern = GEN_FCN (icode) (subtarget);
4412       if (pattern)
4413         {
4414           emit_insn (pattern);
4415
4416           /* If we are converting to a wider mode, first convert to
4417              TARGET_MODE, then normalize.  This produces better combining
4418              opportunities on machines that have a SIGN_EXTRACT when we are
4419              testing a single bit.  This mostly benefits the 68k.
4420
4421              If STORE_FLAG_VALUE does not have the sign bit set when
4422              interpreted in COMPARE_MODE, we can do this conversion as
4423              unsigned, which is usually more efficient.  */
4424           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4425             {
4426               convert_move (target, subtarget,
4427                             (GET_MODE_BITSIZE (compare_mode)
4428                              <= HOST_BITS_PER_WIDE_INT)
4429                             && 0 == (STORE_FLAG_VALUE
4430                                      & ((HOST_WIDE_INT) 1
4431                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4432               op0 = target;
4433               compare_mode = target_mode;
4434             }
4435           else
4436             op0 = subtarget;
4437
4438           /* If we want to keep subexpressions around, don't reuse our
4439              last target.  */
4440
4441           if (preserve_subexpressions_p ())
4442             subtarget = 0;
4443
4444           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4445              we don't have to do anything.  */
4446           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4447             ;
4448           /* STORE_FLAG_VALUE might be the most negative number, so write
4449              the comparison this way to avoid a compiler-time warning.  */
4450           else if (- normalizep == STORE_FLAG_VALUE)
4451             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4452
4453           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4454              makes it hard to use a value of just the sign bit due to
4455              ANSI integer constant typing rules.  */
4456           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4457                    && (STORE_FLAG_VALUE
4458                        & ((HOST_WIDE_INT) 1
4459                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4460             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4461                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4462                                 subtarget, normalizep == 1);
4463           else if (STORE_FLAG_VALUE & 1)
4464             {
4465               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4466               if (normalizep == -1)
4467                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4468             }
4469           else
4470             abort ();
4471
4472           /* If we were converting to a smaller mode, do the
4473              conversion now.  */
4474           if (target_mode != compare_mode)
4475             {
4476               convert_move (target, op0, 0);
4477               return target;
4478             }
4479           else
4480             return op0;
4481         }
4482     }
4483
4484   delete_insns_since (last);
4485
4486   /* If expensive optimizations, use different pseudo registers for each
4487      insn, instead of reusing the same pseudo.  This leads to better CSE,
4488      but slows down the compiler, since there are more pseudos */
4489   subtarget = (!flag_expensive_optimizations
4490                && (target_mode == mode)) ? target : NULL_RTX;
4491
4492   /* If we reached here, we can't do this with a scc insn.  However, there
4493      are some comparisons that can be done directly.  For example, if
4494      this is an equality comparison of integers, we can try to exclusive-or
4495      (or subtract) the two operands and use a recursive call to try the
4496      comparison with zero.  Don't do any of these cases if branches are
4497      very cheap.  */
4498
4499   if (BRANCH_COST > 0
4500       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4501       && op1 != const0_rtx)
4502     {
4503       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4504                           OPTAB_WIDEN);
4505
4506       if (tem == 0)
4507         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4508                             OPTAB_WIDEN);
4509       if (tem != 0)
4510         tem = emit_store_flag (target, code, tem, const0_rtx,
4511                                mode, unsignedp, normalizep);
4512       if (tem == 0)
4513         delete_insns_since (last);
4514       return tem;
4515     }
4516
4517   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4518      the constant zero.  Reject all other comparisons at this point.  Only
4519      do LE and GT if branches are expensive since they are expensive on
4520      2-operand machines.  */
4521
4522   if (BRANCH_COST == 0
4523       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4524       || (code != EQ && code != NE
4525           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4526     return 0;
4527
4528   /* See what we need to return.  We can only return a 1, -1, or the
4529      sign bit.  */
4530
4531   if (normalizep == 0)
4532     {
4533       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4534         normalizep = STORE_FLAG_VALUE;
4535
4536       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4537                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4538                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4539         ;
4540       else
4541         return 0;
4542     }
4543
4544   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4545      do the necessary operation below.  */
4546
4547   tem = 0;
4548
4549   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4550      the sign bit set.  */
4551
4552   if (code == LE)
4553     {
4554       /* This is destructive, so SUBTARGET can't be OP0.  */
4555       if (rtx_equal_p (subtarget, op0))
4556         subtarget = 0;
4557
4558       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4559                           OPTAB_WIDEN);
4560       if (tem)
4561         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4562                             OPTAB_WIDEN);
4563     }
4564
4565   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4566      number of bits in the mode of OP0, minus one.  */
4567
4568   if (code == GT)
4569     {
4570       if (rtx_equal_p (subtarget, op0))
4571         subtarget = 0;
4572
4573       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4574                           size_int (GET_MODE_BITSIZE (mode) - 1),
4575                           subtarget, 0);
4576       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4577                           OPTAB_WIDEN);
4578     }
4579
4580   if (code == EQ || code == NE)
4581     {
4582       /* For EQ or NE, one way to do the comparison is to apply an operation
4583          that converts the operand into a positive number if it is nonzero
4584          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4585          for NE we negate.  This puts the result in the sign bit.  Then we
4586          normalize with a shift, if needed.
4587
4588          Two operations that can do the above actions are ABS and FFS, so try
4589          them.  If that doesn't work, and MODE is smaller than a full word,
4590          we can use zero-extension to the wider mode (an unsigned conversion)
4591          as the operation.  */
4592
4593       /* Note that ABS doesn't yield a positive number for INT_MIN, but
4594          that is compensated by the subsequent overflow when subtracting
4595          one / negating.  */
4596
4597       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4598         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4599       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4600         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4601       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4602         {
4603           op0 = protect_from_queue (op0, 0);
4604           tem = convert_modes (word_mode, mode, op0, 1);
4605           mode = word_mode;
4606         }
4607
4608       if (tem != 0)
4609         {
4610           if (code == EQ)
4611             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4612                                 0, OPTAB_WIDEN);
4613           else
4614             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4615         }
4616
4617       /* If we couldn't do it that way, for NE we can "or" the two's complement
4618          of the value with itself.  For EQ, we take the one's complement of
4619          that "or", which is an extra insn, so we only handle EQ if branches
4620          are expensive.  */
4621
4622       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4623         {
4624           if (rtx_equal_p (subtarget, op0))
4625             subtarget = 0;
4626
4627           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4628           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4629                               OPTAB_WIDEN);
4630
4631           if (tem && code == EQ)
4632             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4633         }
4634     }
4635
4636   if (tem && normalizep)
4637     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4638                         size_int (GET_MODE_BITSIZE (mode) - 1),
4639                         subtarget, normalizep == 1);
4640
4641   if (tem)
4642     {
4643       if (GET_MODE (tem) != target_mode)
4644         {
4645           convert_move (target, tem, 0);
4646           tem = target;
4647         }
4648       else if (!subtarget)
4649         {
4650           emit_move_insn (target, tem);
4651           tem = target;
4652         }
4653     }
4654   else
4655     delete_insns_since (last);
4656
4657   return tem;
4658 }
4659
4660 /* Like emit_store_flag, but always succeeds.  */
4661
4662 rtx
4663 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
4664                        enum machine_mode mode, int unsignedp, int normalizep)
4665 {
4666   rtx tem, label;
4667
4668   /* First see if emit_store_flag can do the job.  */
4669   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4670   if (tem != 0)
4671     return tem;
4672
4673   if (normalizep == 0)
4674     normalizep = 1;
4675
4676   /* If this failed, we have to do this with set/compare/jump/set code.  */
4677
4678   if (GET_CODE (target) != REG
4679       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4680     target = gen_reg_rtx (GET_MODE (target));
4681
4682   emit_move_insn (target, const1_rtx);
4683   label = gen_label_rtx ();
4684   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
4685                            NULL_RTX, label);
4686
4687   emit_move_insn (target, const0_rtx);
4688   emit_label (label);
4689
4690   return target;
4691 }
4692 \f
4693 /* Perform possibly multi-word comparison and conditional jump to LABEL
4694    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4695
4696    The algorithm is based on the code in expr.c:do_jump.
4697
4698    Note that this does not perform a general comparison.  Only variants
4699    generated within expmed.c are correctly handled, others abort (but could
4700    be handled if needed).  */
4701
4702 static void
4703 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
4704                  rtx label)
4705 {
4706   /* If this mode is an integer too wide to compare properly,
4707      compare word by word.  Rely on cse to optimize constant cases.  */
4708
4709   if (GET_MODE_CLASS (mode) == MODE_INT
4710       && ! can_compare_p (op, mode, ccp_jump))
4711     {
4712       rtx label2 = gen_label_rtx ();
4713
4714       switch (op)
4715         {
4716         case LTU:
4717           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4718           break;
4719
4720         case LEU:
4721           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4722           break;
4723
4724         case LT:
4725           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4726           break;
4727
4728         case GT:
4729           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4730           break;
4731
4732         case GE:
4733           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4734           break;
4735
4736           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
4737              that's the only equality operations we do */
4738         case EQ:
4739           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4740             abort ();
4741           do_jump_by_parts_equality_rtx (arg1, label2, label);
4742           break;
4743
4744         case NE:
4745           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4746             abort ();
4747           do_jump_by_parts_equality_rtx (arg1, label, label2);
4748           break;
4749
4750         default:
4751           abort ();
4752         }
4753
4754       emit_label (label2);
4755     }
4756   else
4757     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
4758 }