gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54
  55 /* Nonzero means divides or modulus operations are relatively cheap for
  56    powers of two, so don't use branches; emit the operation instead.
  57    Usually, this will mean that the MD file will emit non-branch
  58    sequences.  */
  59
  60 static int sdiv_pow2_cheap, smod_pow2_cheap;
  61
  62 #ifndef SLOW_UNALIGNED_ACCESS
  63 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  64 #endif
  65
  66 /* For compilers that support multiple targets with different word sizes,
  67    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  68    is the H8/300(H) compiler.  */
  69
  70 #ifndef MAX_BITS_PER_WORD
  71 #define MAX_BITS_PER_WORD BITS_PER_WORD
  72 #endif
  73
  74 /* Reduce conditional compilation elsewhere.  */
  75 #ifndef HAVE_insv
  76 #define HAVE_insv       0
  77 #define CODE_FOR_insv   CODE_FOR_nothing
  78 #define gen_insv(a,b,c,d) NULL_RTX
  79 #endif
  80 #ifndef HAVE_extv
  81 #define HAVE_extv       0
  82 #define CODE_FOR_extv   CODE_FOR_nothing
  83 #define gen_extv(a,b,c,d) NULL_RTX
  84 #endif
  85 #ifndef HAVE_extzv
  86 #define HAVE_extzv      0
  87 #define CODE_FOR_extzv  CODE_FOR_nothing
  88 #define gen_extzv(a,b,c,d) NULL_RTX
  89 #endif
  90
  91 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  92    shift count and some by mode.  */
  93 static int add_cost, negate_cost, zero_cost;
  94 static int shift_cost[MAX_BITS_PER_WORD];
  95 static int shiftadd_cost[MAX_BITS_PER_WORD];
  96 static int shiftsub_cost[MAX_BITS_PER_WORD];
  97 static int mul_cost[NUM_MACHINE_MODES];
  98 static int div_cost[NUM_MACHINE_MODES];
  99 static int mul_widen_cost[NUM_MACHINE_MODES];
 100 static int mul_highpart_cost[NUM_MACHINE_MODES];
 101
 102 void
 103 init_expmed (void)
 104 {
 105   rtx reg, shift_insn, shiftadd_insn, shiftsub_insn;
 106   int dummy;
 107   int m;
 108   enum machine_mode mode, wider_mode;
 109
 110   start_sequence ();
 111
 112   /* This is "some random pseudo register" for purposes of calling recog
 113      to see what insns exist.  */
 114   reg = gen_rtx_REG (word_mode, 10000);
 115
 116   zero_cost = rtx_cost (const0_rtx, 0);
 117   add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
 118
 119   shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
 120                                        gen_rtx_ASHIFT (word_mode, reg,
 121                                                        const0_rtx)));
 122
 123   shiftadd_insn
 124     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 125                               gen_rtx_PLUS (word_mode,
 126                                             gen_rtx_MULT (word_mode,
 127                                                           reg, const0_rtx),
 128                                             reg)));
 129
 130   shiftsub_insn
 131     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 132                               gen_rtx_MINUS (word_mode,
 133                                              gen_rtx_MULT (word_mode,
 134                                                            reg, const0_rtx),
 135                                              reg)));
 136
 137   init_recog ();
 138
 139   shift_cost[0] = 0;
 140   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 141
 142   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 143     {
 144       rtx c_int = GEN_INT ((HOST_WIDE_INT) 1 << m);
 145       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 146
 147       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 148       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 149         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 150
 151       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) = c_int;
 152       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 153         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 154
 155       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) = c_int;
 156       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 157         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 158     }
 159
 160   negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
 161
 162   sdiv_pow2_cheap
 163     = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
 164        <= 2 * add_cost);
 165   smod_pow2_cheap
 166     = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
 167        <= 2 * add_cost);
 168
 169   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 170        mode != VOIDmode;
 171        mode = GET_MODE_WIDER_MODE (mode))
 172     {
 173       reg = gen_rtx_REG (mode, 10000);
 174       div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
 175       mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
 176       wider_mode = GET_MODE_WIDER_MODE (mode);
 177       if (wider_mode != VOIDmode)
 178         {
 179           mul_widen_cost[(int) wider_mode]
 180             = rtx_cost (gen_rtx_MULT (wider_mode,
 181                                       gen_rtx_ZERO_EXTEND (wider_mode, reg),
 182                                       gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 183                         SET);
 184           mul_highpart_cost[(int) mode]
 185             = rtx_cost (gen_rtx_TRUNCATE
 186                         (mode,
 187                          gen_rtx_LSHIFTRT (wider_mode,
 188                                            gen_rtx_MULT (wider_mode,
 189                                                          gen_rtx_ZERO_EXTEND
 190                                                          (wider_mode, reg),
 191                                                          gen_rtx_ZERO_EXTEND
 192                                                          (wider_mode, reg)),
 193                                            GEN_INT (GET_MODE_BITSIZE (mode)))),
 194                         SET);
 195         }
 196     }
 197
 198   end_sequence ();
 199 }
 200
 201 /* Return an rtx representing minus the value of X.
 202    MODE is the intended mode of the result,
 203    useful if X is a CONST_INT.  */
 204
 205 rtx
 206 negate_rtx (enum machine_mode mode, rtx x)
 207 {
 208   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 209
 210   if (result == 0)
 211     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 212
 213   return result;
 214 }
 215
 216 /* Report on the availability of insv/extv/extzv and the desired mode
 217    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 218    is false; else the mode of the specified operand.  If OPNO is -1,
 219    all the caller cares about is whether the insn is available.  */
 220 enum machine_mode
 221 mode_for_extraction (enum extraction_pattern pattern, int opno)
 222 {
 223   const struct insn_data *data;
 224
 225   switch (pattern)
 226     {
 227     case EP_insv:
 228       if (HAVE_insv)
 229         {
 230           data = &insn_data[CODE_FOR_insv];
 231           break;
 232         }
 233       return MAX_MACHINE_MODE;
 234
 235     case EP_extv:
 236       if (HAVE_extv)
 237         {
 238           data = &insn_data[CODE_FOR_extv];
 239           break;
 240         }
 241       return MAX_MACHINE_MODE;
 242
 243     case EP_extzv:
 244       if (HAVE_extzv)
 245         {
 246           data = &insn_data[CODE_FOR_extzv];
 247           break;
 248         }
 249       return MAX_MACHINE_MODE;
 250
 251     default:
 252       abort ();
 253     }
 254
 255   if (opno == -1)
 256     return VOIDmode;
 257
 258   /* Everyone who uses this function used to follow it with
 259      if (result == VOIDmode) result = word_mode; */
 260   if (data->operand[opno].mode == VOIDmode)
 261     return word_mode;
 262   return data->operand[opno].mode;
 263 }
 264
 265 \f
 266 /* Generate code to store value from rtx VALUE
 267    into a bit-field within structure STR_RTX
 268    containing BITSIZE bits starting at bit BITNUM.
 269    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 270    ALIGN is the alignment that STR_RTX is known to have.
 271    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 272
 273 /* ??? Note that there are two different ideas here for how
 274    to determine the size to count bits within, for a register.
 275    One is BITS_PER_WORD, and the other is the size of operand 3
 276    of the insv pattern.
 277
 278    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 279    else, we use the mode of operand 3.  */
 280
 281 rtx
 282 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 283                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 284                  rtx value, HOST_WIDE_INT total_size)
 285 {
 286   unsigned int unit
 287     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 288   unsigned HOST_WIDE_INT offset = bitnum / unit;
 289   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 290   rtx op0 = str_rtx;
 291   int byte_offset;
 292
 293   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 294
 295   /* Discount the part of the structure before the desired byte.
 296      We need to know how many bytes are safe to reference after it.  */
 297   if (total_size >= 0)
 298     total_size -= (bitpos / BIGGEST_ALIGNMENT
 299                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 300
 301   while (GET_CODE (op0) == SUBREG)
 302     {
 303       /* The following line once was done only if WORDS_BIG_ENDIAN,
 304          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 305          meaningful at a much higher level; when structures are copied
 306          between memory and regs, the higher-numbered regs
 307          always get higher addresses.  */
 308       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 309       /* We used to adjust BITPOS here, but now we do the whole adjustment
 310          right after the loop.  */
 311       op0 = SUBREG_REG (op0);
 312     }
 313
 314   value = protect_from_queue (value, 0);
 315
 316   if (flag_force_mem)
 317     {
 318       int old_generating_concat_p = generating_concat_p;
 319       generating_concat_p = 0;
 320       value = force_not_mem (value);
 321       generating_concat_p = old_generating_concat_p;
 322     }
 323
 324   /* If the target is a register, overwriting the entire object, or storing
 325      a full-word or multi-word field can be done with just a SUBREG.
 326
 327      If the target is memory, storing any naturally aligned field can be
 328      done with a simple store.  For targets that support fast unaligned
 329      memory, any naturally sized, unit aligned field can be done directly.
 330
 331      It's okay if the requested bitsize is greater than fieldmode's
 332      bitsize; that just means the mode has padding bits.  */
 333
 334   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 335                 + (offset * UNITS_PER_WORD);
 336
 337   if (bitpos == 0
 338       && GET_MODE_BITSIZE (fieldmode) != 0
 339       && bitsize >= GET_MODE_BITSIZE (fieldmode)
 340       && (GET_CODE (op0) != MEM
 341           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 342              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 343              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 344           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 345              || (offset * BITS_PER_UNIT % bitsize == 0
 346                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 347     {
 348       if (GET_MODE (op0) != fieldmode)
 349         {
 350           if (GET_CODE (op0) == SUBREG)
 351             {
 352               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 353                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 354                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 355                 op0 = SUBREG_REG (op0);
 356               else
 357                 /* Else we've got some float mode source being extracted into
 358                    a different float mode destination -- this combination of
 359                    subregs results in Severe Tire Damage.  */
 360                 abort ();
 361             }
 362           if (GET_CODE (op0) == REG)
 363             op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 364           else
 365             op0 = adjust_address (op0, fieldmode, offset);
 366         }
 367       emit_move_insn (op0, value);
 368       return value;
 369     }
 370
 371   /* Make sure we are playing with integral modes.  Pun with subregs
 372      if we aren't.  This must come after the entire register case above,
 373      since that case is valid for any mode.  The following cases are only
 374      valid for integral modes.  */
 375   {
 376     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 377     if (imode != GET_MODE (op0))
 378       {
 379         if (GET_CODE (op0) == MEM)
 380           op0 = adjust_address (op0, imode, 0);
 381         else if (imode != BLKmode)
 382           op0 = gen_lowpart (imode, op0);
 383         else
 384           abort ();
 385       }
 386   }
 387
 388   /* We may be accessing data outside the field, which means
 389      we can alias adjacent data.  */
 390   if (GET_CODE (op0) == MEM)
 391     {
 392       op0 = shallow_copy_rtx (op0);
 393       set_mem_alias_set (op0, 0);
 394       set_mem_expr (op0, 0);
 395     }
 396
 397   /* If OP0 is a register, BITPOS must count within a word.
 398      But as we have it, it counts within whatever size OP0 now has.
 399      On a bigendian machine, these are not the same, so convert.  */
 400   if (BYTES_BIG_ENDIAN
 401       && GET_CODE (op0) != MEM
 402       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 403     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 404
 405   /* Storing an lsb-aligned field in a register
 406      can be done with a movestrict instruction.  */
 407
 408   if (GET_CODE (op0) != MEM
 409       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 410       && bitsize == GET_MODE_BITSIZE (fieldmode)
 411       && (movstrict_optab->handlers[(int) fieldmode].insn_code
 412           != CODE_FOR_nothing))
 413     {
 414       int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 415
 416       /* Get appropriate low part of the value being stored.  */
 417       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 418         value = gen_lowpart (fieldmode, value);
 419       else if (!(GET_CODE (value) == SYMBOL_REF
 420                  || GET_CODE (value) == LABEL_REF
 421                  || GET_CODE (value) == CONST))
 422         value = convert_to_mode (fieldmode, value, 0);
 423
 424       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 425         value = copy_to_mode_reg (fieldmode, value);
 426
 427       if (GET_CODE (op0) == SUBREG)
 428         {
 429           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 430               || GET_MODE_CLASS (fieldmode) == MODE_INT
 431               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 432             op0 = SUBREG_REG (op0);
 433           else
 434             /* Else we've got some float mode source being extracted into
 435                a different float mode destination -- this combination of
 436                subregs results in Severe Tire Damage.  */
 437             abort ();
 438         }
 439
 440       emit_insn (GEN_FCN (icode)
 441                  (gen_rtx_SUBREG (fieldmode, op0,
 442                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 443                                   + (offset * UNITS_PER_WORD)),
 444                                   value));
 445
 446       return value;
 447     }
 448
 449   /* Handle fields bigger than a word.  */
 450
 451   if (bitsize > BITS_PER_WORD)
 452     {
 453       /* Here we transfer the words of the field
 454          in the order least significant first.
 455          This is because the most significant word is the one which may
 456          be less than full.
 457          However, only do that if the value is not BLKmode.  */
 458
 459       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 460       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 461       unsigned int i;
 462
 463       /* This is the mode we must force value to, so that there will be enough
 464          subwords to extract.  Note that fieldmode will often (always?) be
 465          VOIDmode, because that is what store_field uses to indicate that this
 466          is a bit field, but passing VOIDmode to operand_subword_force will
 467          result in an abort.  */
 468       fieldmode = GET_MODE (value);
 469       if (fieldmode == VOIDmode)
 470         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 471
 472       for (i = 0; i < nwords; i++)
 473         {
 474           /* If I is 0, use the low-order word in both field and target;
 475              if I is 1, use the next to lowest word; and so on.  */
 476           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 477           unsigned int bit_offset = (backwards
 478                                      ? MAX ((int) bitsize - ((int) i + 1)
 479                                             * BITS_PER_WORD,
 480                                             0)
 481                                      : (int) i * BITS_PER_WORD);
 482
 483           store_bit_field (op0, MIN (BITS_PER_WORD,
 484                                      bitsize - i * BITS_PER_WORD),
 485                            bitnum + bit_offset, word_mode,
 486                            operand_subword_force (value, wordnum, fieldmode),
 487                            total_size);
 488         }
 489       return value;
 490     }
 491
 492   /* From here on we can assume that the field to be stored in is
 493      a full-word (whatever type that is), since it is shorter than a word.  */
 494
 495   /* OFFSET is the number of words or bytes (UNIT says which)
 496      from STR_RTX to the first word or byte containing part of the field.  */
 497
 498   if (GET_CODE (op0) != MEM)
 499     {
 500       if (offset != 0
 501           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 502         {
 503           if (GET_CODE (op0) != REG)
 504             {
 505               /* Since this is a destination (lvalue), we can't copy it to a
 506                  pseudo.  We can trivially remove a SUBREG that does not
 507                  change the size of the operand.  Such a SUBREG may have been
 508                  added above.  Otherwise, abort.  */
 509               if (GET_CODE (op0) == SUBREG
 510                   && (GET_MODE_SIZE (GET_MODE (op0))
 511                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 512                 op0 = SUBREG_REG (op0);
 513               else
 514                 abort ();
 515             }
 516           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 517                                 op0, (offset * UNITS_PER_WORD));
 518         }
 519       offset = 0;
 520     }
 521   else
 522     op0 = protect_from_queue (op0, 1);
 523
 524   /* If VALUE is a floating-point mode, access it as an integer of the
 525      corresponding size.  This can occur on a machine with 64 bit registers
 526      that uses SFmode for float.  This can also occur for unaligned float
 527      structure fields.  */
 528   if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 529       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 530     value = gen_lowpart ((GET_MODE (value) == VOIDmode
 531                           ? word_mode : int_mode_for_mode (GET_MODE (value))),
 532                          value);
 533
 534   /* Now OFFSET is nonzero only if OP0 is memory
 535      and is therefore always measured in bytes.  */
 536
 537   if (HAVE_insv
 538       && GET_MODE (value) != BLKmode
 539       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 540       /* Ensure insv's size is wide enough for this field.  */
 541       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 542       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 543             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 544     {
 545       int xbitpos = bitpos;
 546       rtx value1;
 547       rtx xop0 = op0;
 548       rtx last = get_last_insn ();
 549       rtx pat;
 550       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 551       int save_volatile_ok = volatile_ok;
 552
 553       volatile_ok = 1;
 554
 555       /* If this machine's insv can only insert into a register, copy OP0
 556          into a register and save it back later.  */
 557       /* This used to check flag_force_mem, but that was a serious
 558          de-optimization now that flag_force_mem is enabled by -O2.  */
 559       if (GET_CODE (op0) == MEM
 560           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 561                 (op0, VOIDmode)))
 562         {
 563           rtx tempreg;
 564           enum machine_mode bestmode;
 565
 566           /* Get the mode to use for inserting into this field.  If OP0 is
 567              BLKmode, get the smallest mode consistent with the alignment. If
 568              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 569              mode. Otherwise, use the smallest mode containing the field.  */
 570
 571           if (GET_MODE (op0) == BLKmode
 572               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 573             bestmode
 574               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 575                                MEM_VOLATILE_P (op0));
 576           else
 577             bestmode = GET_MODE (op0);
 578
 579           if (bestmode == VOIDmode
 580               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 581                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 582             goto insv_loses;
 583
 584           /* Adjust address to point to the containing unit of that mode.
 585              Compute offset as multiple of this unit, counting in bytes.  */
 586           unit = GET_MODE_BITSIZE (bestmode);
 587           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 588           bitpos = bitnum % unit;
 589           op0 = adjust_address (op0, bestmode,  offset);
 590
 591           /* Fetch that unit, store the bitfield in it, then store
 592              the unit.  */
 593           tempreg = copy_to_reg (op0);
 594           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 595                            total_size);
 596           emit_move_insn (op0, tempreg);
 597           return value;
 598         }
 599       volatile_ok = save_volatile_ok;
 600
 601       /* Add OFFSET into OP0's address.  */
 602       if (GET_CODE (xop0) == MEM)
 603         xop0 = adjust_address (xop0, byte_mode, offset);
 604
 605       /* If xop0 is a register, we need it in MAXMODE
 606          to make it acceptable to the format of insv.  */
 607       if (GET_CODE (xop0) == SUBREG)
 608         /* We can't just change the mode, because this might clobber op0,
 609            and we will need the original value of op0 if insv fails.  */
 610         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 611       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 612         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 613
 614       /* On big-endian machines, we count bits from the most significant.
 615          If the bit field insn does not, we must invert.  */
 616
 617       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 618         xbitpos = unit - bitsize - xbitpos;
 619
 620       /* We have been counting XBITPOS within UNIT.
 621          Count instead within the size of the register.  */
 622       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 623         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 624
 625       unit = GET_MODE_BITSIZE (maxmode);
 626
 627       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 628       value1 = value;
 629       if (GET_MODE (value) != maxmode)
 630         {
 631           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 632             {
 633               /* Optimization: Don't bother really extending VALUE
 634                  if it has all the bits we will actually use.  However,
 635                  if we must narrow it, be sure we do it correctly.  */
 636
 637               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 638                 {
 639                   rtx tmp;
 640
 641                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 642                   if (! tmp)
 643                     tmp = simplify_gen_subreg (maxmode,
 644                                                force_reg (GET_MODE (value),
 645                                                           value1),
 646                                                GET_MODE (value), 0);
 647                   value1 = tmp;
 648                 }
 649               else
 650                 value1 = gen_lowpart (maxmode, value1);
 651             }
 652           else if (GET_CODE (value) == CONST_INT)
 653             value1 = gen_int_mode (INTVAL (value), maxmode);
 654           else if (!CONSTANT_P (value))
 655             /* Parse phase is supposed to make VALUE's data type
 656                match that of the component reference, which is a type
 657                at least as wide as the field; so VALUE should have
 658                a mode that corresponds to that type.  */
 659             abort ();
 660         }
 661
 662       /* If this machine's insv insists on a register,
 663          get VALUE1 into a register.  */
 664       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 665              (value1, maxmode)))
 666         value1 = force_reg (maxmode, value1);
 667
 668       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 669       if (pat)
 670         emit_insn (pat);
 671       else
 672         {
 673           delete_insns_since (last);
 674           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 675         }
 676     }
 677   else
 678     insv_loses:
 679     /* Insv is not available; store using shifts and boolean ops.  */
 680     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 681   return value;
 682 }
 683 \f
 684 /* Use shifts and boolean operations to store VALUE
 685    into a bit field of width BITSIZE
 686    in a memory location specified by OP0 except offset by OFFSET bytes.
 687      (OFFSET must be 0 if OP0 is a register.)
 688    The field starts at position BITPOS within the byte.
 689     (If OP0 is a register, it may be a full word or a narrower mode,
 690      but BITPOS still counts within a full word,
 691      which is significant on bigendian machines.)
 692
 693    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 694
 695 static void
 696 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 697                        unsigned HOST_WIDE_INT bitsize,
 698                        unsigned HOST_WIDE_INT bitpos, rtx value)
 699 {
 700   enum machine_mode mode;
 701   unsigned int total_bits = BITS_PER_WORD;
 702   rtx subtarget, temp;
 703   int all_zero = 0;
 704   int all_one = 0;
 705
 706   /* There is a case not handled here:
 707      a structure with a known alignment of just a halfword
 708      and a field split across two aligned halfwords within the structure.
 709      Or likewise a structure with a known alignment of just a byte
 710      and a field split across two bytes.
 711      Such cases are not supposed to be able to occur.  */
 712
 713   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 714     {
 715       if (offset != 0)
 716         abort ();
 717       /* Special treatment for a bit field split across two registers.  */
 718       if (bitsize + bitpos > BITS_PER_WORD)
 719         {
 720           store_split_bit_field (op0, bitsize, bitpos, value);
 721           return;
 722         }
 723     }
 724   else
 725     {
 726       /* Get the proper mode to use for this field.  We want a mode that
 727          includes the entire field.  If such a mode would be larger than
 728          a word, we won't be doing the extraction the normal way.
 729          We don't want a mode bigger than the destination.  */
 730
 731       mode = GET_MODE (op0);
 732       if (GET_MODE_BITSIZE (mode) == 0
 733           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 734         mode = word_mode;
 735       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 736                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 737
 738       if (mode == VOIDmode)
 739         {
 740           /* The only way this should occur is if the field spans word
 741              boundaries.  */
 742           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 743                                  value);
 744           return;
 745         }
 746
 747       total_bits = GET_MODE_BITSIZE (mode);
 748
 749       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 750          be in the range 0 to total_bits-1, and put any excess bytes in
 751          OFFSET.  */
 752       if (bitpos >= total_bits)
 753         {
 754           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 755           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 756                      * BITS_PER_UNIT);
 757         }
 758
 759       /* Get ref to an aligned byte, halfword, or word containing the field.
 760          Adjust BITPOS to be position within a word,
 761          and OFFSET to be the offset of that word.
 762          Then alter OP0 to refer to that word.  */
 763       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 764       offset -= (offset % (total_bits / BITS_PER_UNIT));
 765       op0 = adjust_address (op0, mode, offset);
 766     }
 767
 768   mode = GET_MODE (op0);
 769
 770   /* Now MODE is either some integral mode for a MEM as OP0,
 771      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 772      The bit field is contained entirely within OP0.
 773      BITPOS is the starting bit number within OP0.
 774      (OP0's mode may actually be narrower than MODE.)  */
 775
 776   if (BYTES_BIG_ENDIAN)
 777       /* BITPOS is the distance between our msb
 778          and that of the containing datum.
 779          Convert it to the distance from the lsb.  */
 780       bitpos = total_bits - bitsize - bitpos;
 781
 782   /* Now BITPOS is always the distance between our lsb
 783      and that of OP0.  */
 784
 785   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 786      we must first convert its mode to MODE.  */
 787
 788   if (GET_CODE (value) == CONST_INT)
 789     {
 790       HOST_WIDE_INT v = INTVAL (value);
 791
 792       if (bitsize < HOST_BITS_PER_WIDE_INT)
 793         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 794
 795       if (v == 0)
 796         all_zero = 1;
 797       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 798                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 799                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 800         all_one = 1;
 801
 802       value = lshift_value (mode, value, bitpos, bitsize);
 803     }
 804   else
 805     {
 806       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 807                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 808
 809       if (GET_MODE (value) != mode)
 810         {
 811           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 812               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 813             value = gen_lowpart (mode, value);
 814           else
 815             value = convert_to_mode (mode, value, 1);
 816         }
 817
 818       if (must_and)
 819         value = expand_binop (mode, and_optab, value,
 820                               mask_rtx (mode, 0, bitsize, 0),
 821                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 822       if (bitpos > 0)
 823         value = expand_shift (LSHIFT_EXPR, mode, value,
 824                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 825     }
 826
 827   /* Now clear the chosen bits in OP0,
 828      except that if VALUE is -1 we need not bother.  */
 829
 830   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 831
 832   if (! all_one)
 833     {
 834       temp = expand_binop (mode, and_optab, op0,
 835                            mask_rtx (mode, bitpos, bitsize, 1),
 836                            subtarget, 1, OPTAB_LIB_WIDEN);
 837       subtarget = temp;
 838     }
 839   else
 840     temp = op0;
 841
 842   /* Now logical-or VALUE into OP0, unless it is zero.  */
 843
 844   if (! all_zero)
 845     temp = expand_binop (mode, ior_optab, temp, value,
 846                          subtarget, 1, OPTAB_LIB_WIDEN);
 847   if (op0 != temp)
 848     emit_move_insn (op0, temp);
 849 }
 850 \f
 851 /* Store a bit field that is split across multiple accessible memory objects.
 852
 853    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 854    BITSIZE is the field width; BITPOS the position of its first bit
 855    (within the word).
 856    VALUE is the value to store.
 857
 858    This does not yet handle fields wider than BITS_PER_WORD.  */
 859
 860 static void
 861 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 862                        unsigned HOST_WIDE_INT bitpos, rtx value)
 863 {
 864   unsigned int unit;
 865   unsigned int bitsdone = 0;
 866
 867   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 868      much at a time.  */
 869   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 870     unit = BITS_PER_WORD;
 871   else
 872     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 873
 874   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 875      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 876      that VALUE might be a floating-point constant.  */
 877   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 878     {
 879       rtx word = gen_lowpart_common (word_mode, value);
 880
 881       if (word && (value != word))
 882         value = word;
 883       else
 884         value = gen_lowpart_common (word_mode,
 885                                     force_reg (GET_MODE (value) != VOIDmode
 886                                                ? GET_MODE (value)
 887                                                : word_mode, value));
 888     }
 889   else if (GET_CODE (value) == ADDRESSOF)
 890     value = copy_to_reg (value);
 891
 892   while (bitsdone < bitsize)
 893     {
 894       unsigned HOST_WIDE_INT thissize;
 895       rtx part, word;
 896       unsigned HOST_WIDE_INT thispos;
 897       unsigned HOST_WIDE_INT offset;
 898
 899       offset = (bitpos + bitsdone) / unit;
 900       thispos = (bitpos + bitsdone) % unit;
 901
 902       /* THISSIZE must not overrun a word boundary.  Otherwise,
 903          store_fixed_bit_field will call us again, and we will mutually
 904          recurse forever.  */
 905       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 906       thissize = MIN (thissize, unit - thispos);
 907
 908       if (BYTES_BIG_ENDIAN)
 909         {
 910           int total_bits;
 911
 912           /* We must do an endian conversion exactly the same way as it is
 913              done in extract_bit_field, so that the two calls to
 914              extract_fixed_bit_field will have comparable arguments.  */
 915           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 916             total_bits = BITS_PER_WORD;
 917           else
 918             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 919
 920           /* Fetch successively less significant portions.  */
 921           if (GET_CODE (value) == CONST_INT)
 922             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 923                              >> (bitsize - bitsdone - thissize))
 924                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 925           else
 926             /* The args are chosen so that the last part includes the
 927                lsb.  Give extract_bit_field the value it needs (with
 928                endianness compensation) to fetch the piece we want.  */
 929             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 930                                             total_bits - bitsize + bitsdone,
 931                                             NULL_RTX, 1);
 932         }
 933       else
 934         {
 935           /* Fetch successively more significant portions.  */
 936           if (GET_CODE (value) == CONST_INT)
 937             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 938                              >> bitsdone)
 939                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 940           else
 941             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 942                                             bitsdone, NULL_RTX, 1);
 943         }
 944
 945       /* If OP0 is a register, then handle OFFSET here.
 946
 947          When handling multiword bitfields, extract_bit_field may pass
 948          down a word_mode SUBREG of a larger REG for a bitfield that actually
 949          crosses a word boundary.  Thus, for a SUBREG, we must find
 950          the current word starting from the base register.  */
 951       if (GET_CODE (op0) == SUBREG)
 952         {
 953           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
 954           word = operand_subword_force (SUBREG_REG (op0), word_offset,
 955                                         GET_MODE (SUBREG_REG (op0)));
 956           offset = 0;
 957         }
 958       else if (GET_CODE (op0) == REG)
 959         {
 960           word = operand_subword_force (op0, offset, GET_MODE (op0));
 961           offset = 0;
 962         }
 963       else
 964         word = op0;
 965
 966       /* OFFSET is in UNITs, and UNIT is in bits.
 967          store_fixed_bit_field wants offset in bytes.  */
 968       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
 969                              thispos, part);
 970       bitsdone += thissize;
 971     }
 972 }
 973 \f
 974 /* Generate code to extract a byte-field from STR_RTX
 975    containing BITSIZE bits, starting at BITNUM,
 976    and put it in TARGET if possible (if TARGET is nonzero).
 977    Regardless of TARGET, we return the rtx for where the value is placed.
 978    It may be a QUEUED.
 979
 980    STR_RTX is the structure containing the byte (a REG or MEM).
 981    UNSIGNEDP is nonzero if this is an unsigned bit field.
 982    MODE is the natural mode of the field value once extracted.
 983    TMODE is the mode the caller would like the value to have;
 984    but the value may be returned with type MODE instead.
 985
 986    TOTAL_SIZE is the size in bytes of the containing structure,
 987    or -1 if varying.
 988
 989    If a TARGET is specified and we can store in it at no extra cost,
 990    we do so, and return TARGET.
 991    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
 992    if they are equally easy.  */
 993
 994 rtx
 995 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 996                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
 997                    enum machine_mode mode, enum machine_mode tmode,
 998                    HOST_WIDE_INT total_size)
 999 {
1000   unsigned int unit
1001     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
1002   unsigned HOST_WIDE_INT offset = bitnum / unit;
1003   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1004   rtx op0 = str_rtx;
1005   rtx spec_target = target;
1006   rtx spec_target_subreg = 0;
1007   enum machine_mode int_mode;
1008   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1009   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1010   enum machine_mode mode1;
1011   int byte_offset;
1012
1013   /* Discount the part of the structure before the desired byte.
1014      We need to know how many bytes are safe to reference after it.  */
1015   if (total_size >= 0)
1016     total_size -= (bitpos / BIGGEST_ALIGNMENT
1017                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
1018
1019   if (tmode == VOIDmode)
1020     tmode = mode;
1021
1022   while (GET_CODE (op0) == SUBREG)
1023     {
1024       bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1025       if (bitpos > unit)
1026         {
1027           offset += (bitpos / unit);
1028           bitpos %= unit;
1029         }
1030       op0 = SUBREG_REG (op0);
1031     }
1032
1033   if (GET_CODE (op0) == REG
1034       && mode == GET_MODE (op0)
1035       && bitnum == 0
1036       && GET_MODE_BITSIZE (GET_MODE (op0)) != 0
1037       && bitsize >= GET_MODE_BITSIZE (GET_MODE (op0)))
1038     {
1039       /* We're trying to extract a full register from itself.
1040          (If the requested bitsize is greater than the bitsize of op0,
1041          that just means op0's mode has padding bits.)  */
1042       return op0;
1043     }
1044
1045   /* Make sure we are playing with integral modes.  Pun with subregs
1046      if we aren't.  */
1047   {
1048     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1049     if (imode != GET_MODE (op0))
1050       {
1051         if (GET_CODE (op0) == MEM)
1052           op0 = adjust_address (op0, imode, 0);
1053         else if (imode != BLKmode)
1054           op0 = gen_lowpart (imode, op0);
1055         else
1056           abort ();
1057       }
1058   }
1059
1060   /* We may be accessing data outside the field, which means
1061      we can alias adjacent data.  */
1062   if (GET_CODE (op0) == MEM)
1063     {
1064       op0 = shallow_copy_rtx (op0);
1065       set_mem_alias_set (op0, 0);
1066       set_mem_expr (op0, 0);
1067     }
1068
1069   /* Extraction of a full-word or multi-word value from a structure
1070      in a register or aligned memory can be done with just a SUBREG.
1071      A subword value in the least significant part of a register
1072      can also be extracted with a SUBREG.  For this, we need the
1073      byte offset of the value in op0.  */
1074
1075   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1076
1077   /* If OP0 is a register, BITPOS must count within a word.
1078      But as we have it, it counts within whatever size OP0 now has.
1079      On a bigendian machine, these are not the same, so convert.  */
1080   if (BYTES_BIG_ENDIAN
1081       && GET_CODE (op0) != MEM
1082       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1083     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1084
1085   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1086      If that's wrong, the solution is to test for it and set TARGET to 0
1087      if needed.  */
1088
1089   mode1  = (VECTOR_MODE_P (tmode)
1090             ? mode
1091             : mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0));
1092
1093   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1094         && bitpos % BITS_PER_WORD == 0)
1095        || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
1096            /* ??? The big endian test here is wrong.  This is correct
1097               if the value is in a register, and if mode_for_size is not
1098               the same mode as op0.  This causes us to get unnecessarily
1099               inefficient code from the Thumb port when -mbig-endian.  */
1100            && (BYTES_BIG_ENDIAN
1101                ? bitpos + bitsize == BITS_PER_WORD
1102                : bitpos == 0)))
1103       && ((GET_CODE (op0) != MEM
1104            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1105                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1106            && GET_MODE_SIZE (mode1) != 0
1107            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1108           || (GET_CODE (op0) == MEM
1109               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1110                   || (offset * BITS_PER_UNIT % bitsize == 0
1111                       && MEM_ALIGN (op0) % bitsize == 0)))))
1112     {
1113       if (mode1 != GET_MODE (op0))
1114         {
1115           if (GET_CODE (op0) == SUBREG)
1116             {
1117               if (GET_MODE (SUBREG_REG (op0)) == mode1
1118                   || GET_MODE_CLASS (mode1) == MODE_INT
1119                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1120                 op0 = SUBREG_REG (op0);
1121               else
1122                 /* Else we've got some float mode source being extracted into
1123                    a different float mode destination -- this combination of
1124                    subregs results in Severe Tire Damage.  */
1125                 goto no_subreg_mode_swap;
1126             }
1127           if (GET_CODE (op0) == REG)
1128             op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1129           else
1130             op0 = adjust_address (op0, mode1, offset);
1131         }
1132       if (mode1 != mode)
1133         return convert_to_mode (tmode, op0, unsignedp);
1134       return op0;
1135     }
1136  no_subreg_mode_swap:
1137
1138   /* Handle fields bigger than a word.  */
1139
1140   if (bitsize > BITS_PER_WORD)
1141     {
1142       /* Here we transfer the words of the field
1143          in the order least significant first.
1144          This is because the most significant word is the one which may
1145          be less than full.  */
1146
1147       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1148       unsigned int i;
1149
1150       if (target == 0 || GET_CODE (target) != REG)
1151         target = gen_reg_rtx (mode);
1152
1153       /* Indicate for flow that the entire target reg is being set.  */
1154       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1155
1156       for (i = 0; i < nwords; i++)
1157         {
1158           /* If I is 0, use the low-order word in both field and target;
1159              if I is 1, use the next to lowest word; and so on.  */
1160           /* Word number in TARGET to use.  */
1161           unsigned int wordnum
1162             = (WORDS_BIG_ENDIAN
1163                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1164                : i);
1165           /* Offset from start of field in OP0.  */
1166           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1167                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1168                                                 * (int) BITS_PER_WORD))
1169                                      : (int) i * BITS_PER_WORD);
1170           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1171           rtx result_part
1172             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1173                                            bitsize - i * BITS_PER_WORD),
1174                                  bitnum + bit_offset, 1, target_part, mode,
1175                                  word_mode, total_size);
1176
1177           if (target_part == 0)
1178             abort ();
1179
1180           if (result_part != target_part)
1181             emit_move_insn (target_part, result_part);
1182         }
1183
1184       if (unsignedp)
1185         {
1186           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1187              need to be zero'd out.  */
1188           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1189             {
1190               unsigned int i, total_words;
1191
1192               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1193               for (i = nwords; i < total_words; i++)
1194                 emit_move_insn
1195                   (operand_subword (target,
1196                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1197                                     1, VOIDmode),
1198                    const0_rtx);
1199             }
1200           return target;
1201         }
1202
1203       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1204       target = expand_shift (LSHIFT_EXPR, mode, target,
1205                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1206                              NULL_RTX, 0);
1207       return expand_shift (RSHIFT_EXPR, mode, target,
1208                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1209                            NULL_RTX, 0);
1210     }
1211
1212   /* From here on we know the desired field is smaller than a word.  */
1213
1214   /* Check if there is a correspondingly-sized integer field, so we can
1215      safely extract it as one size of integer, if necessary; then
1216      truncate or extend to the size that is wanted; then use SUBREGs or
1217      convert_to_mode to get one of the modes we really wanted.  */
1218
1219   int_mode = int_mode_for_mode (tmode);
1220   if (int_mode == BLKmode)
1221     int_mode = int_mode_for_mode (mode);
1222   if (int_mode == BLKmode)
1223     abort ();    /* Should probably push op0 out to memory and then
1224                     do a load.  */
1225
1226   /* OFFSET is the number of words or bytes (UNIT says which)
1227      from STR_RTX to the first word or byte containing part of the field.  */
1228
1229   if (GET_CODE (op0) != MEM)
1230     {
1231       if (offset != 0
1232           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1233         {
1234           if (GET_CODE (op0) != REG)
1235             op0 = copy_to_reg (op0);
1236           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1237                                 op0, (offset * UNITS_PER_WORD));
1238         }
1239       offset = 0;
1240     }
1241   else
1242     op0 = protect_from_queue (str_rtx, 1);
1243
1244   /* Now OFFSET is nonzero only for memory operands.  */
1245
1246   if (unsignedp)
1247     {
1248       if (HAVE_extzv
1249           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1250           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1251                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1252         {
1253           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1254           rtx bitsize_rtx, bitpos_rtx;
1255           rtx last = get_last_insn ();
1256           rtx xop0 = op0;
1257           rtx xtarget = target;
1258           rtx xspec_target = spec_target;
1259           rtx xspec_target_subreg = spec_target_subreg;
1260           rtx pat;
1261           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1262
1263           if (GET_CODE (xop0) == MEM)
1264             {
1265               int save_volatile_ok = volatile_ok;
1266               volatile_ok = 1;
1267
1268               /* Is the memory operand acceptable?  */
1269               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1270                      (xop0, GET_MODE (xop0))))
1271                 {
1272                   /* No, load into a reg and extract from there.  */
1273                   enum machine_mode bestmode;
1274
1275                   /* Get the mode to use for inserting into this field.  If
1276                      OP0 is BLKmode, get the smallest mode consistent with the
1277                      alignment. If OP0 is a non-BLKmode object that is no
1278                      wider than MAXMODE, use its mode. Otherwise, use the
1279                      smallest mode containing the field.  */
1280
1281                   if (GET_MODE (xop0) == BLKmode
1282                       || (GET_MODE_SIZE (GET_MODE (op0))
1283                           > GET_MODE_SIZE (maxmode)))
1284                     bestmode = get_best_mode (bitsize, bitnum,
1285                                               MEM_ALIGN (xop0), maxmode,
1286                                               MEM_VOLATILE_P (xop0));
1287                   else
1288                     bestmode = GET_MODE (xop0);
1289
1290                   if (bestmode == VOIDmode
1291                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1292                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1293                     goto extzv_loses;
1294
1295                   /* Compute offset as multiple of this unit,
1296                      counting in bytes.  */
1297                   unit = GET_MODE_BITSIZE (bestmode);
1298                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1299                   xbitpos = bitnum % unit;
1300                   xop0 = adjust_address (xop0, bestmode, xoffset);
1301
1302                   /* Fetch it to a register in that size.  */
1303                   xop0 = force_reg (bestmode, xop0);
1304
1305                   /* XBITPOS counts within UNIT, which is what is expected.  */
1306                 }
1307               else
1308                 /* Get ref to first byte containing part of the field.  */
1309                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1310
1311               volatile_ok = save_volatile_ok;
1312             }
1313
1314           /* If op0 is a register, we need it in MAXMODE (which is usually
1315              SImode). to make it acceptable to the format of extzv.  */
1316           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1317             goto extzv_loses;
1318           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1319             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1320
1321           /* On big-endian machines, we count bits from the most significant.
1322              If the bit field insn does not, we must invert.  */
1323           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1324             xbitpos = unit - bitsize - xbitpos;
1325
1326           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1327           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1328             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1329
1330           unit = GET_MODE_BITSIZE (maxmode);
1331
1332           if (xtarget == 0
1333               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1334             xtarget = xspec_target = gen_reg_rtx (tmode);
1335
1336           if (GET_MODE (xtarget) != maxmode)
1337             {
1338               if (GET_CODE (xtarget) == REG)
1339                 {
1340                   int wider = (GET_MODE_SIZE (maxmode)
1341                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1342                   xtarget = gen_lowpart (maxmode, xtarget);
1343                   if (wider)
1344                     xspec_target_subreg = xtarget;
1345                 }
1346               else
1347                 xtarget = gen_reg_rtx (maxmode);
1348             }
1349
1350           /* If this machine's extzv insists on a register target,
1351              make sure we have one.  */
1352           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1353                  (xtarget, maxmode)))
1354             xtarget = gen_reg_rtx (maxmode);
1355
1356           bitsize_rtx = GEN_INT (bitsize);
1357           bitpos_rtx = GEN_INT (xbitpos);
1358
1359           pat = gen_extzv (protect_from_queue (xtarget, 1),
1360                            xop0, bitsize_rtx, bitpos_rtx);
1361           if (pat)
1362             {
1363               emit_insn (pat);
1364               target = xtarget;
1365               spec_target = xspec_target;
1366               spec_target_subreg = xspec_target_subreg;
1367             }
1368           else
1369             {
1370               delete_insns_since (last);
1371               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1372                                                 bitpos, target, 1);
1373             }
1374         }
1375       else
1376       extzv_loses:
1377         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1378                                           bitpos, target, 1);
1379     }
1380   else
1381     {
1382       if (HAVE_extv
1383           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1384           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1385                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1386         {
1387           int xbitpos = bitpos, xoffset = offset;
1388           rtx bitsize_rtx, bitpos_rtx;
1389           rtx last = get_last_insn ();
1390           rtx xop0 = op0, xtarget = target;
1391           rtx xspec_target = spec_target;
1392           rtx xspec_target_subreg = spec_target_subreg;
1393           rtx pat;
1394           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1395
1396           if (GET_CODE (xop0) == MEM)
1397             {
1398               /* Is the memory operand acceptable?  */
1399               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1400                      (xop0, GET_MODE (xop0))))
1401                 {
1402                   /* No, load into a reg and extract from there.  */
1403                   enum machine_mode bestmode;
1404
1405                   /* Get the mode to use for inserting into this field.  If
1406                      OP0 is BLKmode, get the smallest mode consistent with the
1407                      alignment. If OP0 is a non-BLKmode object that is no
1408                      wider than MAXMODE, use its mode. Otherwise, use the
1409                      smallest mode containing the field.  */
1410
1411                   if (GET_MODE (xop0) == BLKmode
1412                       || (GET_MODE_SIZE (GET_MODE (op0))
1413                           > GET_MODE_SIZE (maxmode)))
1414                     bestmode = get_best_mode (bitsize, bitnum,
1415                                               MEM_ALIGN (xop0), maxmode,
1416                                               MEM_VOLATILE_P (xop0));
1417                   else
1418                     bestmode = GET_MODE (xop0);
1419
1420                   if (bestmode == VOIDmode
1421                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1422                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1423                     goto extv_loses;
1424
1425                   /* Compute offset as multiple of this unit,
1426                      counting in bytes.  */
1427                   unit = GET_MODE_BITSIZE (bestmode);
1428                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1429                   xbitpos = bitnum % unit;
1430                   xop0 = adjust_address (xop0, bestmode, xoffset);
1431
1432                   /* Fetch it to a register in that size.  */
1433                   xop0 = force_reg (bestmode, xop0);
1434
1435                   /* XBITPOS counts within UNIT, which is what is expected.  */
1436                 }
1437               else
1438                 /* Get ref to first byte containing part of the field.  */
1439                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1440             }
1441
1442           /* If op0 is a register, we need it in MAXMODE (which is usually
1443              SImode) to make it acceptable to the format of extv.  */
1444           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1445             goto extv_loses;
1446           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1447             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1448
1449           /* On big-endian machines, we count bits from the most significant.
1450              If the bit field insn does not, we must invert.  */
1451           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1452             xbitpos = unit - bitsize - xbitpos;
1453
1454           /* XBITPOS counts within a size of UNIT.
1455              Adjust to count within a size of MAXMODE.  */
1456           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1457             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1458
1459           unit = GET_MODE_BITSIZE (maxmode);
1460
1461           if (xtarget == 0
1462               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1463             xtarget = xspec_target = gen_reg_rtx (tmode);
1464
1465           if (GET_MODE (xtarget) != maxmode)
1466             {
1467               if (GET_CODE (xtarget) == REG)
1468                 {
1469                   int wider = (GET_MODE_SIZE (maxmode)
1470                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1471                   xtarget = gen_lowpart (maxmode, xtarget);
1472                   if (wider)
1473                     xspec_target_subreg = xtarget;
1474                 }
1475               else
1476                 xtarget = gen_reg_rtx (maxmode);
1477             }
1478
1479           /* If this machine's extv insists on a register target,
1480              make sure we have one.  */
1481           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1482                  (xtarget, maxmode)))
1483             xtarget = gen_reg_rtx (maxmode);
1484
1485           bitsize_rtx = GEN_INT (bitsize);
1486           bitpos_rtx = GEN_INT (xbitpos);
1487
1488           pat = gen_extv (protect_from_queue (xtarget, 1),
1489                           xop0, bitsize_rtx, bitpos_rtx);
1490           if (pat)
1491             {
1492               emit_insn (pat);
1493               target = xtarget;
1494               spec_target = xspec_target;
1495               spec_target_subreg = xspec_target_subreg;
1496             }
1497           else
1498             {
1499               delete_insns_since (last);
1500               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1501                                                 bitpos, target, 0);
1502             }
1503         }
1504       else
1505       extv_loses:
1506         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1507                                           bitpos, target, 0);
1508     }
1509   if (target == spec_target)
1510     return target;
1511   if (target == spec_target_subreg)
1512     return spec_target;
1513   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1514     {
1515       /* If the target mode is floating-point, first convert to the
1516          integer mode of that size and then access it as a floating-point
1517          value via a SUBREG.  */
1518       if (GET_MODE_CLASS (tmode) != MODE_INT
1519           && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1520         {
1521           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1522                                                    MODE_INT, 0),
1523                                     target, unsignedp);
1524           return gen_lowpart (tmode, target);
1525         }
1526       else
1527         return convert_to_mode (tmode, target, unsignedp);
1528     }
1529   return target;
1530 }
1531 \f
1532 /* Extract a bit field using shifts and boolean operations
1533    Returns an rtx to represent the value.
1534    OP0 addresses a register (word) or memory (byte).
1535    BITPOS says which bit within the word or byte the bit field starts in.
1536    OFFSET says how many bytes farther the bit field starts;
1537     it is 0 if OP0 is a register.
1538    BITSIZE says how many bits long the bit field is.
1539     (If OP0 is a register, it may be narrower than a full word,
1540      but BITPOS still counts within a full word,
1541      which is significant on bigendian machines.)
1542
1543    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1544    If TARGET is nonzero, attempts to store the value there
1545    and return TARGET, but this is not guaranteed.
1546    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1547
1548 static rtx
1549 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1550                          unsigned HOST_WIDE_INT offset,
1551                          unsigned HOST_WIDE_INT bitsize,
1552                          unsigned HOST_WIDE_INT bitpos, rtx target,
1553                          int unsignedp)
1554 {
1555   unsigned int total_bits = BITS_PER_WORD;
1556   enum machine_mode mode;
1557
1558   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1559     {
1560       /* Special treatment for a bit field split across two registers.  */
1561       if (bitsize + bitpos > BITS_PER_WORD)
1562         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1563     }
1564   else
1565     {
1566       /* Get the proper mode to use for this field.  We want a mode that
1567          includes the entire field.  If such a mode would be larger than
1568          a word, we won't be doing the extraction the normal way.  */
1569
1570       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1571                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1572
1573       if (mode == VOIDmode)
1574         /* The only way this should occur is if the field spans word
1575            boundaries.  */
1576         return extract_split_bit_field (op0, bitsize,
1577                                         bitpos + offset * BITS_PER_UNIT,
1578                                         unsignedp);
1579
1580       total_bits = GET_MODE_BITSIZE (mode);
1581
1582       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1583          be in the range 0 to total_bits-1, and put any excess bytes in
1584          OFFSET.  */
1585       if (bitpos >= total_bits)
1586         {
1587           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1588           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1589                      * BITS_PER_UNIT);
1590         }
1591
1592       /* Get ref to an aligned byte, halfword, or word containing the field.
1593          Adjust BITPOS to be position within a word,
1594          and OFFSET to be the offset of that word.
1595          Then alter OP0 to refer to that word.  */
1596       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1597       offset -= (offset % (total_bits / BITS_PER_UNIT));
1598       op0 = adjust_address (op0, mode, offset);
1599     }
1600
1601   mode = GET_MODE (op0);
1602
1603   if (BYTES_BIG_ENDIAN)
1604     /* BITPOS is the distance between our msb and that of OP0.
1605        Convert it to the distance from the lsb.  */
1606     bitpos = total_bits - bitsize - bitpos;
1607
1608   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1609      We have reduced the big-endian case to the little-endian case.  */
1610
1611   if (unsignedp)
1612     {
1613       if (bitpos)
1614         {
1615           /* If the field does not already start at the lsb,
1616              shift it so it does.  */
1617           tree amount = build_int_2 (bitpos, 0);
1618           /* Maybe propagate the target for the shift.  */
1619           /* But not if we will return it--could confuse integrate.c.  */
1620           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1621                            && !REG_FUNCTION_VALUE_P (target)
1622                            ? target : 0);
1623           if (tmode != mode) subtarget = 0;
1624           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1625         }
1626       /* Convert the value to the desired mode.  */
1627       if (mode != tmode)
1628         op0 = convert_to_mode (tmode, op0, 1);
1629
1630       /* Unless the msb of the field used to be the msb when we shifted,
1631          mask out the upper bits.  */
1632
1633       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1634         return expand_binop (GET_MODE (op0), and_optab, op0,
1635                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1636                              target, 1, OPTAB_LIB_WIDEN);
1637       return op0;
1638     }
1639
1640   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1641      then arithmetic-shift its lsb to the lsb of the word.  */
1642   op0 = force_reg (mode, op0);
1643   if (mode != tmode)
1644     target = 0;
1645
1646   /* Find the narrowest integer mode that contains the field.  */
1647
1648   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1649        mode = GET_MODE_WIDER_MODE (mode))
1650     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1651       {
1652         op0 = convert_to_mode (mode, op0, 0);
1653         break;
1654       }
1655
1656   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1657     {
1658       tree amount
1659         = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1660       /* Maybe propagate the target for the shift.  */
1661       /* But not if we will return the result--could confuse integrate.c.  */
1662       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1663                        && ! REG_FUNCTION_VALUE_P (target)
1664                        ? target : 0);
1665       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1666     }
1667
1668   return expand_shift (RSHIFT_EXPR, mode, op0,
1669                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1670                        target, 0);
1671 }
1672 \f
1673 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1674    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1675    complement of that if COMPLEMENT.  The mask is truncated if
1676    necessary to the width of mode MODE.  The mask is zero-extended if
1677    BITSIZE+BITPOS is too small for MODE.  */
1678
1679 static rtx
1680 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1681 {
1682   HOST_WIDE_INT masklow, maskhigh;
1683
1684   if (bitsize == 0)
1685     masklow = 0;
1686   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1687     masklow = (HOST_WIDE_INT) -1 << bitpos;
1688   else
1689     masklow = 0;
1690
1691   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1692     masklow &= ((unsigned HOST_WIDE_INT) -1
1693                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1694
1695   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1696     maskhigh = -1;
1697   else
1698     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1699
1700   if (bitsize == 0)
1701     maskhigh = 0;
1702   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1703     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1704                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1705   else
1706     maskhigh = 0;
1707
1708   if (complement)
1709     {
1710       maskhigh = ~maskhigh;
1711       masklow = ~masklow;
1712     }
1713
1714   return immed_double_const (masklow, maskhigh, mode);
1715 }
1716
1717 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1718    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1719
1720 static rtx
1721 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1722 {
1723   unsigned HOST_WIDE_INT v = INTVAL (value);
1724   HOST_WIDE_INT low, high;
1725
1726   if (bitsize < HOST_BITS_PER_WIDE_INT)
1727     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1728
1729   if (bitpos < HOST_BITS_PER_WIDE_INT)
1730     {
1731       low = v << bitpos;
1732       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1733     }
1734   else
1735     {
1736       low = 0;
1737       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1738     }
1739
1740   return immed_double_const (low, high, mode);
1741 }
1742 \f
1743 /* Extract a bit field that is split across two words
1744    and return an RTX for the result.
1745
1746    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1747    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1748    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1749
1750 static rtx
1751 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1752                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1753 {
1754   unsigned int unit;
1755   unsigned int bitsdone = 0;
1756   rtx result = NULL_RTX;
1757   int first = 1;
1758
1759   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1760      much at a time.  */
1761   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1762     unit = BITS_PER_WORD;
1763   else
1764     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1765
1766   while (bitsdone < bitsize)
1767     {
1768       unsigned HOST_WIDE_INT thissize;
1769       rtx part, word;
1770       unsigned HOST_WIDE_INT thispos;
1771       unsigned HOST_WIDE_INT offset;
1772
1773       offset = (bitpos + bitsdone) / unit;
1774       thispos = (bitpos + bitsdone) % unit;
1775
1776       /* THISSIZE must not overrun a word boundary.  Otherwise,
1777          extract_fixed_bit_field will call us again, and we will mutually
1778          recurse forever.  */
1779       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1780       thissize = MIN (thissize, unit - thispos);
1781
1782       /* If OP0 is a register, then handle OFFSET here.
1783
1784          When handling multiword bitfields, extract_bit_field may pass
1785          down a word_mode SUBREG of a larger REG for a bitfield that actually
1786          crosses a word boundary.  Thus, for a SUBREG, we must find
1787          the current word starting from the base register.  */
1788       if (GET_CODE (op0) == SUBREG)
1789         {
1790           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1791           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1792                                         GET_MODE (SUBREG_REG (op0)));
1793           offset = 0;
1794         }
1795       else if (GET_CODE (op0) == REG)
1796         {
1797           word = operand_subword_force (op0, offset, GET_MODE (op0));
1798           offset = 0;
1799         }
1800       else
1801         word = op0;
1802
1803       /* Extract the parts in bit-counting order,
1804          whose meaning is determined by BYTES_PER_UNIT.
1805          OFFSET is in UNITs, and UNIT is in bits.
1806          extract_fixed_bit_field wants offset in bytes.  */
1807       part = extract_fixed_bit_field (word_mode, word,
1808                                       offset * unit / BITS_PER_UNIT,
1809                                       thissize, thispos, 0, 1);
1810       bitsdone += thissize;
1811
1812       /* Shift this part into place for the result.  */
1813       if (BYTES_BIG_ENDIAN)
1814         {
1815           if (bitsize != bitsdone)
1816             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1817                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1818         }
1819       else
1820         {
1821           if (bitsdone != thissize)
1822             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1823                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1824         }
1825
1826       if (first)
1827         result = part;
1828       else
1829         /* Combine the parts with bitwise or.  This works
1830            because we extracted each part as an unsigned bit field.  */
1831         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1832                                OPTAB_LIB_WIDEN);
1833
1834       first = 0;
1835     }
1836
1837   /* Unsigned bit field: we are done.  */
1838   if (unsignedp)
1839     return result;
1840   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1841   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1842                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1843                          NULL_RTX, 0);
1844   return expand_shift (RSHIFT_EXPR, word_mode, result,
1845                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1846 }
1847 \f
1848 /* Add INC into TARGET.  */
1849
1850 void
1851 expand_inc (rtx target, rtx inc)
1852 {
1853   rtx value = expand_binop (GET_MODE (target), add_optab,
1854                             target, inc,
1855                             target, 0, OPTAB_LIB_WIDEN);
1856   if (value != target)
1857     emit_move_insn (target, value);
1858 }
1859
1860 /* Subtract DEC from TARGET.  */
1861
1862 void
1863 expand_dec (rtx target, rtx dec)
1864 {
1865   rtx value = expand_binop (GET_MODE (target), sub_optab,
1866                             target, dec,
1867                             target, 0, OPTAB_LIB_WIDEN);
1868   if (value != target)
1869     emit_move_insn (target, value);
1870 }
1871 \f
1872 /* Output a shift instruction for expression code CODE,
1873    with SHIFTED being the rtx for the value to shift,
1874    and AMOUNT the tree for the amount to shift by.
1875    Store the result in the rtx TARGET, if that is convenient.
1876    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1877    Return the rtx for where the value is.  */
1878
1879 rtx
1880 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
1881               tree amount, rtx target, int unsignedp)
1882 {
1883   rtx op1, temp = 0;
1884   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1885   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1886   int try;
1887
1888   /* Previously detected shift-counts computed by NEGATE_EXPR
1889      and shifted in the other direction; but that does not work
1890      on all machines.  */
1891
1892   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1893
1894 #ifdef SHIFT_COUNT_TRUNCATED
1895   if (SHIFT_COUNT_TRUNCATED)
1896     {
1897       if (GET_CODE (op1) == CONST_INT
1898           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
1899               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
1900         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1901                        % GET_MODE_BITSIZE (mode));
1902       else if (GET_CODE (op1) == SUBREG
1903                && subreg_lowpart_p (op1))
1904         op1 = SUBREG_REG (op1);
1905     }
1906 #endif
1907
1908   if (op1 == const0_rtx)
1909     return shifted;
1910
1911   for (try = 0; temp == 0 && try < 3; try++)
1912     {
1913       enum optab_methods methods;
1914
1915       if (try == 0)
1916         methods = OPTAB_DIRECT;
1917       else if (try == 1)
1918         methods = OPTAB_WIDEN;
1919       else
1920         methods = OPTAB_LIB_WIDEN;
1921
1922       if (rotate)
1923         {
1924           /* Widening does not work for rotation.  */
1925           if (methods == OPTAB_WIDEN)
1926             continue;
1927           else if (methods == OPTAB_LIB_WIDEN)
1928             {
1929               /* If we have been unable to open-code this by a rotation,
1930                  do it as the IOR of two shifts.  I.e., to rotate A
1931                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1932                  where C is the bitsize of A.
1933
1934                  It is theoretically possible that the target machine might
1935                  not be able to perform either shift and hence we would
1936                  be making two libcalls rather than just the one for the
1937                  shift (similarly if IOR could not be done).  We will allow
1938                  this extremely unlikely lossage to avoid complicating the
1939                  code below.  */
1940
1941               rtx subtarget = target == shifted ? 0 : target;
1942               rtx temp1;
1943               tree type = TREE_TYPE (amount);
1944               tree new_amount = make_tree (type, op1);
1945               tree other_amount
1946                 = fold (build (MINUS_EXPR, type,
1947                                convert (type,
1948                                         build_int_2 (GET_MODE_BITSIZE (mode),
1949                                                      0)),
1950                                amount));
1951
1952               shifted = force_reg (mode, shifted);
1953
1954               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1955                                    mode, shifted, new_amount, subtarget, 1);
1956               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1957                                     mode, shifted, other_amount, 0, 1);
1958               return expand_binop (mode, ior_optab, temp, temp1, target,
1959                                    unsignedp, methods);
1960             }
1961
1962           temp = expand_binop (mode,
1963                                left ? rotl_optab : rotr_optab,
1964                                shifted, op1, target, unsignedp, methods);
1965
1966           /* If we don't have the rotate, but we are rotating by a constant
1967              that is in range, try a rotate in the opposite direction.  */
1968
1969           if (temp == 0 && GET_CODE (op1) == CONST_INT
1970               && INTVAL (op1) > 0
1971               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
1972             temp = expand_binop (mode,
1973                                  left ? rotr_optab : rotl_optab,
1974                                  shifted,
1975                                  GEN_INT (GET_MODE_BITSIZE (mode)
1976                                           - INTVAL (op1)),
1977                                  target, unsignedp, methods);
1978         }
1979       else if (unsignedp)
1980         temp = expand_binop (mode,
1981                              left ? ashl_optab : lshr_optab,
1982                              shifted, op1, target, unsignedp, methods);
1983
1984       /* Do arithmetic shifts.
1985          Also, if we are going to widen the operand, we can just as well
1986          use an arithmetic right-shift instead of a logical one.  */
1987       if (temp == 0 && ! rotate
1988           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
1989         {
1990           enum optab_methods methods1 = methods;
1991
1992           /* If trying to widen a log shift to an arithmetic shift,
1993              don't accept an arithmetic shift of the same size.  */
1994           if (unsignedp)
1995             methods1 = OPTAB_MUST_WIDEN;
1996
1997           /* Arithmetic shift */
1998
1999           temp = expand_binop (mode,
2000                                left ? ashl_optab : ashr_optab,
2001                                shifted, op1, target, unsignedp, methods1);
2002         }
2003
2004       /* We used to try extzv here for logical right shifts, but that was
2005          only useful for one machine, the VAX, and caused poor code
2006          generation there for lshrdi3, so the code was deleted and a
2007          define_expand for lshrsi3 was added to vax.md.  */
2008     }
2009
2010   if (temp == 0)
2011     abort ();
2012   return temp;
2013 }
2014 \f
2015 enum alg_code { alg_zero, alg_m, alg_shift,
2016                   alg_add_t_m2, alg_sub_t_m2,
2017                   alg_add_factor, alg_sub_factor,
2018                   alg_add_t2_m, alg_sub_t2_m,
2019                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2020
2021 /* This structure records a sequence of operations.
2022    `ops' is the number of operations recorded.
2023    `cost' is their total cost.
2024    The operations are stored in `op' and the corresponding
2025    logarithms of the integer coefficients in `log'.
2026
2027    These are the operations:
2028    alg_zero             total := 0;
2029    alg_m                total := multiplicand;
2030    alg_shift            total := total * coeff
2031    alg_add_t_m2         total := total + multiplicand * coeff;
2032    alg_sub_t_m2         total := total - multiplicand * coeff;
2033    alg_add_factor       total := total * coeff + total;
2034    alg_sub_factor       total := total * coeff - total;
2035    alg_add_t2_m         total := total * coeff + multiplicand;
2036    alg_sub_t2_m         total := total * coeff - multiplicand;
2037
2038    The first operand must be either alg_zero or alg_m.  */
2039
2040 struct algorithm
2041 {
2042   short cost;
2043   short ops;
2044   /* The size of the OP and LOG fields are not directly related to the
2045      word size, but the worst-case algorithms will be if we have few
2046      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2047      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2048      in total wordsize operations.  */
2049   enum alg_code op[MAX_BITS_PER_WORD];
2050   char log[MAX_BITS_PER_WORD];
2051 };
2052
2053 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, int);
2054 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2055                                                  int, unsigned HOST_WIDE_INT *,
2056                                                  int *, int *);
2057 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2058 /* Compute and return the best algorithm for multiplying by T.
2059    The algorithm must cost less than cost_limit
2060    If retval.cost >= COST_LIMIT, no algorithm was found and all
2061    other field of the returned struct are undefined.  */
2062
2063 static void
2064 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2065             int cost_limit)
2066 {
2067   int m;
2068   struct algorithm *alg_in, *best_alg;
2069   int cost;
2070   unsigned HOST_WIDE_INT q;
2071
2072   /* Indicate that no algorithm is yet found.  If no algorithm
2073      is found, this value will be returned and indicate failure.  */
2074   alg_out->cost = cost_limit;
2075
2076   if (cost_limit <= 0)
2077     return;
2078
2079   /* t == 1 can be done in zero cost.  */
2080   if (t == 1)
2081     {
2082       alg_out->ops = 1;
2083       alg_out->cost = 0;
2084       alg_out->op[0] = alg_m;
2085       return;
2086     }
2087
2088   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2089      fail now.  */
2090   if (t == 0)
2091     {
2092       if (zero_cost >= cost_limit)
2093         return;
2094       else
2095         {
2096           alg_out->ops = 1;
2097           alg_out->cost = zero_cost;
2098           alg_out->op[0] = alg_zero;
2099           return;
2100         }
2101     }
2102
2103   /* We'll be needing a couple extra algorithm structures now.  */
2104
2105   alg_in = alloca (sizeof (struct algorithm));
2106   best_alg = alloca (sizeof (struct algorithm));
2107
2108   /* If we have a group of zero bits at the low-order part of T, try
2109      multiplying by the remaining bits and then doing a shift.  */
2110
2111   if ((t & 1) == 0)
2112     {
2113       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2114       if (m < BITS_PER_WORD)
2115         {
2116           q = t >> m;
2117           cost = shift_cost[m];
2118           synth_mult (alg_in, q, cost_limit - cost);
2119
2120           cost += alg_in->cost;
2121           if (cost < cost_limit)
2122             {
2123               struct algorithm *x;
2124               x = alg_in, alg_in = best_alg, best_alg = x;
2125               best_alg->log[best_alg->ops] = m;
2126               best_alg->op[best_alg->ops] = alg_shift;
2127               cost_limit = cost;
2128             }
2129         }
2130     }
2131
2132   /* If we have an odd number, add or subtract one.  */
2133   if ((t & 1) != 0)
2134     {
2135       unsigned HOST_WIDE_INT w;
2136
2137       for (w = 1; (w & t) != 0; w <<= 1)
2138         ;
2139       /* If T was -1, then W will be zero after the loop.  This is another
2140          case where T ends with ...111.  Handling this with (T + 1) and
2141          subtract 1 produces slightly better code and results in algorithm
2142          selection much faster than treating it like the ...0111 case
2143          below.  */
2144       if (w == 0
2145           || (w > 2
2146               /* Reject the case where t is 3.
2147                  Thus we prefer addition in that case.  */
2148               && t != 3))
2149         {
2150           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2151
2152           cost = add_cost;
2153           synth_mult (alg_in, t + 1, cost_limit - cost);
2154
2155           cost += alg_in->cost;
2156           if (cost < cost_limit)
2157             {
2158               struct algorithm *x;
2159               x = alg_in, alg_in = best_alg, best_alg = x;
2160               best_alg->log[best_alg->ops] = 0;
2161               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2162               cost_limit = cost;
2163             }
2164         }
2165       else
2166         {
2167           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2168
2169           cost = add_cost;
2170           synth_mult (alg_in, t - 1, cost_limit - cost);
2171
2172           cost += alg_in->cost;
2173           if (cost < cost_limit)
2174             {
2175               struct algorithm *x;
2176               x = alg_in, alg_in = best_alg, best_alg = x;
2177               best_alg->log[best_alg->ops] = 0;
2178               best_alg->op[best_alg->ops] = alg_add_t_m2;
2179               cost_limit = cost;
2180             }
2181         }
2182     }
2183
2184   /* Look for factors of t of the form
2185      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2186      If we find such a factor, we can multiply by t using an algorithm that
2187      multiplies by q, shift the result by m and add/subtract it to itself.
2188
2189      We search for large factors first and loop down, even if large factors
2190      are less probable than small; if we find a large factor we will find a
2191      good sequence quickly, and therefore be able to prune (by decreasing
2192      COST_LIMIT) the search.  */
2193
2194   for (m = floor_log2 (t - 1); m >= 2; m--)
2195     {
2196       unsigned HOST_WIDE_INT d;
2197
2198       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2199       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2200         {
2201           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2202           synth_mult (alg_in, t / d, cost_limit - cost);
2203
2204           cost += alg_in->cost;
2205           if (cost < cost_limit)
2206             {
2207               struct algorithm *x;
2208               x = alg_in, alg_in = best_alg, best_alg = x;
2209               best_alg->log[best_alg->ops] = m;
2210               best_alg->op[best_alg->ops] = alg_add_factor;
2211               cost_limit = cost;
2212             }
2213           /* Other factors will have been taken care of in the recursion.  */
2214           break;
2215         }
2216
2217       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2218       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2219         {
2220           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2221           synth_mult (alg_in, t / d, cost_limit - cost);
2222
2223           cost += alg_in->cost;
2224           if (cost < cost_limit)
2225             {
2226               struct algorithm *x;
2227               x = alg_in, alg_in = best_alg, best_alg = x;
2228               best_alg->log[best_alg->ops] = m;
2229               best_alg->op[best_alg->ops] = alg_sub_factor;
2230               cost_limit = cost;
2231             }
2232           break;
2233         }
2234     }
2235
2236   /* Try shift-and-add (load effective address) instructions,
2237      i.e. do a*3, a*5, a*9.  */
2238   if ((t & 1) != 0)
2239     {
2240       q = t - 1;
2241       q = q & -q;
2242       m = exact_log2 (q);
2243       if (m >= 0 && m < BITS_PER_WORD)
2244         {
2245           cost = shiftadd_cost[m];
2246           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2247
2248           cost += alg_in->cost;
2249           if (cost < cost_limit)
2250             {
2251               struct algorithm *x;
2252               x = alg_in, alg_in = best_alg, best_alg = x;
2253               best_alg->log[best_alg->ops] = m;
2254               best_alg->op[best_alg->ops] = alg_add_t2_m;
2255               cost_limit = cost;
2256             }
2257         }
2258
2259       q = t + 1;
2260       q = q & -q;
2261       m = exact_log2 (q);
2262       if (m >= 0 && m < BITS_PER_WORD)
2263         {
2264           cost = shiftsub_cost[m];
2265           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2266
2267           cost += alg_in->cost;
2268           if (cost < cost_limit)
2269             {
2270               struct algorithm *x;
2271               x = alg_in, alg_in = best_alg, best_alg = x;
2272               best_alg->log[best_alg->ops] = m;
2273               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2274               cost_limit = cost;
2275             }
2276         }
2277     }
2278
2279   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2280      we have not found any algorithm.  */
2281   if (cost_limit == alg_out->cost)
2282     return;
2283
2284   /* If we are getting a too long sequence for `struct algorithm'
2285      to record, make this search fail.  */
2286   if (best_alg->ops == MAX_BITS_PER_WORD)
2287     return;
2288
2289   /* Copy the algorithm from temporary space to the space at alg_out.
2290      We avoid using structure assignment because the majority of
2291      best_alg is normally undefined, and this is a critical function.  */
2292   alg_out->ops = best_alg->ops + 1;
2293   alg_out->cost = cost_limit;
2294   memcpy (alg_out->op, best_alg->op,
2295           alg_out->ops * sizeof *alg_out->op);
2296   memcpy (alg_out->log, best_alg->log,
2297           alg_out->ops * sizeof *alg_out->log);
2298 }
2299 \f
2300 /* Perform a multiplication and return an rtx for the result.
2301    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2302    TARGET is a suggestion for where to store the result (an rtx).
2303
2304    We check specially for a constant integer as OP1.
2305    If you want this check for OP0 as well, then before calling
2306    you should swap the two operands if OP0 would be constant.  */
2307
2308 rtx
2309 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2310              int unsignedp)
2311 {
2312   rtx const_op1 = op1;
2313
2314   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2315      less than or equal in size to `unsigned int' this doesn't matter.
2316      If the mode is larger than `unsigned int', then synth_mult works only
2317      if the constant value exactly fits in an `unsigned int' without any
2318      truncation.  This means that multiplying by negative values does
2319      not work; results are off by 2^32 on a 32 bit machine.  */
2320
2321   /* If we are multiplying in DImode, it may still be a win
2322      to try to work with shifts and adds.  */
2323   if (GET_CODE (op1) == CONST_DOUBLE
2324       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2325       && HOST_BITS_PER_INT >= BITS_PER_WORD
2326       && CONST_DOUBLE_HIGH (op1) == 0)
2327     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2328   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2329            && GET_CODE (op1) == CONST_INT
2330            && INTVAL (op1) < 0)
2331     const_op1 = 0;
2332
2333   /* We used to test optimize here, on the grounds that it's better to
2334      produce a smaller program when -O is not used.
2335      But this causes such a terrible slowdown sometimes
2336      that it seems better to use synth_mult always.  */
2337
2338   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2339       && (unsignedp || ! flag_trapv))
2340     {
2341       struct algorithm alg;
2342       struct algorithm alg2;
2343       HOST_WIDE_INT val = INTVAL (op1);
2344       HOST_WIDE_INT val_so_far;
2345       rtx insn;
2346       int mult_cost;
2347       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2348
2349       /* op0 must be register to make mult_cost match the precomputed
2350          shiftadd_cost array.  */
2351       op0 = force_reg (mode, op0);
2352
2353       /* Try to do the computation three ways: multiply by the negative of OP1
2354          and then negate, do the multiplication directly, or do multiplication
2355          by OP1 - 1.  */
2356
2357       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2358       mult_cost = MIN (12 * add_cost, mult_cost);
2359
2360       synth_mult (&alg, val, mult_cost);
2361
2362       /* This works only if the inverted value actually fits in an
2363          `unsigned int' */
2364       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2365         {
2366           synth_mult (&alg2, - val,
2367                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2368           if (alg2.cost + negate_cost < alg.cost)
2369             alg = alg2, variant = negate_variant;
2370         }
2371
2372       /* This proves very useful for division-by-constant.  */
2373       synth_mult (&alg2, val - 1,
2374                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2375       if (alg2.cost + add_cost < alg.cost)
2376         alg = alg2, variant = add_variant;
2377
2378       if (alg.cost < mult_cost)
2379         {
2380           /* We found something cheaper than a multiply insn.  */
2381           int opno;
2382           rtx accum, tem;
2383           enum machine_mode nmode;
2384
2385           op0 = protect_from_queue (op0, 0);
2386
2387           /* Avoid referencing memory over and over.
2388              For speed, but also for correctness when mem is volatile.  */
2389           if (GET_CODE (op0) == MEM)
2390             op0 = force_reg (mode, op0);
2391
2392           /* ACCUM starts out either as OP0 or as a zero, depending on
2393              the first operation.  */
2394
2395           if (alg.op[0] == alg_zero)
2396             {
2397               accum = copy_to_mode_reg (mode, const0_rtx);
2398               val_so_far = 0;
2399             }
2400           else if (alg.op[0] == alg_m)
2401             {
2402               accum = copy_to_mode_reg (mode, op0);
2403               val_so_far = 1;
2404             }
2405           else
2406             abort ();
2407
2408           for (opno = 1; opno < alg.ops; opno++)
2409             {
2410               int log = alg.log[opno];
2411               int preserve = preserve_subexpressions_p ();
2412               rtx shift_subtarget = preserve ? 0 : accum;
2413               rtx add_target
2414                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2415                    && ! preserve)
2416                   ? target : 0;
2417               rtx accum_target = preserve ? 0 : accum;
2418
2419               switch (alg.op[opno])
2420                 {
2421                 case alg_shift:
2422                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2423                                         build_int_2 (log, 0), NULL_RTX, 0);
2424                   val_so_far <<= log;
2425                   break;
2426
2427                 case alg_add_t_m2:
2428                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2429                                       build_int_2 (log, 0), NULL_RTX, 0);
2430                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2431                                          add_target
2432                                          ? add_target : accum_target);
2433                   val_so_far += (HOST_WIDE_INT) 1 << log;
2434                   break;
2435
2436                 case alg_sub_t_m2:
2437                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2438                                       build_int_2 (log, 0), NULL_RTX, 0);
2439                   accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2440                                          add_target
2441                                          ? add_target : accum_target);
2442                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2443                   break;
2444
2445                 case alg_add_t2_m:
2446                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2447                                         build_int_2 (log, 0), shift_subtarget,
2448                                         0);
2449                   accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2450                                          add_target
2451                                          ? add_target : accum_target);
2452                   val_so_far = (val_so_far << log) + 1;
2453                   break;
2454
2455                 case alg_sub_t2_m:
2456                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2457                                         build_int_2 (log, 0), shift_subtarget,
2458                                         0);
2459                   accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2460                                          add_target
2461                                          ? add_target : accum_target);
2462                   val_so_far = (val_so_far << log) - 1;
2463                   break;
2464
2465                 case alg_add_factor:
2466                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2467                                       build_int_2 (log, 0), NULL_RTX, 0);
2468                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2469                                          add_target
2470                                          ? add_target : accum_target);
2471                   val_so_far += val_so_far << log;
2472                   break;
2473
2474                 case alg_sub_factor:
2475                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2476                                       build_int_2 (log, 0), NULL_RTX, 0);
2477                   accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2478                                          (add_target ? add_target
2479                                           : preserve ? 0 : tem));
2480                   val_so_far = (val_so_far << log) - val_so_far;
2481                   break;
2482
2483                 default:
2484                   abort ();
2485                 }
2486
2487               /* Write a REG_EQUAL note on the last insn so that we can cse
2488                  multiplication sequences.  Note that if ACCUM is a SUBREG,
2489                  we've set the inner register and must properly indicate
2490                  that.  */
2491
2492               tem = op0, nmode = mode;
2493               if (GET_CODE (accum) == SUBREG)
2494                 {
2495                   nmode = GET_MODE (SUBREG_REG (accum));
2496                   tem = gen_lowpart (nmode, op0);
2497                 }
2498
2499               insn = get_last_insn ();
2500               set_unique_reg_note (insn,
2501                                    REG_EQUAL,
2502                                    gen_rtx_MULT (nmode, tem,
2503                                                  GEN_INT (val_so_far)));
2504             }
2505
2506           if (variant == negate_variant)
2507             {
2508               val_so_far = - val_so_far;
2509               accum = expand_unop (mode, neg_optab, accum, target, 0);
2510             }
2511           else if (variant == add_variant)
2512             {
2513               val_so_far = val_so_far + 1;
2514               accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2515             }
2516
2517           if (val != val_so_far)
2518             abort ();
2519
2520           return accum;
2521         }
2522     }
2523
2524   if (GET_CODE (op0) == CONST_DOUBLE)
2525     {
2526       rtx temp = op0;
2527       op0 = op1;
2528       op1 = temp;
2529     }
2530
2531   /* Expand x*2.0 as x+x.  */
2532   if (GET_CODE (op1) == CONST_DOUBLE
2533       && GET_MODE_CLASS (mode) == MODE_FLOAT)
2534     {
2535       REAL_VALUE_TYPE d;
2536       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
2537
2538       if (REAL_VALUES_EQUAL (d, dconst2))
2539         {
2540           op0 = force_reg (GET_MODE (op0), op0);
2541           return expand_binop (mode, add_optab, op0, op0,
2542                                target, unsignedp, OPTAB_LIB_WIDEN);
2543         }
2544     }
2545
2546   /* This used to use umul_optab if unsigned, but for non-widening multiply
2547      there is no difference between signed and unsigned.  */
2548   op0 = expand_binop (mode,
2549                       ! unsignedp
2550                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2551                       ? smulv_optab : smul_optab,
2552                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2553   if (op0 == 0)
2554     abort ();
2555   return op0;
2556 }
2557 \f
2558 /* Return the smallest n such that 2**n >= X.  */
2559
2560 int
2561 ceil_log2 (unsigned HOST_WIDE_INT x)
2562 {
2563   return floor_log2 (x - 1) + 1;
2564 }
2565
2566 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2567    replace division by D, and put the least significant N bits of the result
2568    in *MULTIPLIER_PTR and return the most significant bit.
2569
2570    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2571    needed precision is in PRECISION (should be <= N).
2572
2573    PRECISION should be as small as possible so this function can choose
2574    multiplier more freely.
2575
2576    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2577    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2578
2579    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2580    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2581
2582 static
2583 unsigned HOST_WIDE_INT
2584 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
2585                    unsigned HOST_WIDE_INT *multiplier_ptr,
2586                    int *post_shift_ptr, int *lgup_ptr)
2587 {
2588   HOST_WIDE_INT mhigh_hi, mlow_hi;
2589   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2590   int lgup, post_shift;
2591   int pow, pow2;
2592   unsigned HOST_WIDE_INT nl, dummy1;
2593   HOST_WIDE_INT nh, dummy2;
2594
2595   /* lgup = ceil(log2(divisor)); */
2596   lgup = ceil_log2 (d);
2597
2598   if (lgup > n)
2599     abort ();
2600
2601   pow = n + lgup;
2602   pow2 = n + lgup - precision;
2603
2604   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2605     {
2606       /* We could handle this with some effort, but this case is much better
2607          handled directly with a scc insn, so rely on caller using that.  */
2608       abort ();
2609     }
2610
2611   /* mlow = 2^(N + lgup)/d */
2612  if (pow >= HOST_BITS_PER_WIDE_INT)
2613     {
2614       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2615       nl = 0;
2616     }
2617   else
2618     {
2619       nh = 0;
2620       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2621     }
2622   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2623                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2624
2625   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2626   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2627     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2628   else
2629     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2630   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2631                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2632
2633   if (mhigh_hi && nh - d >= d)
2634     abort ();
2635   if (mhigh_hi > 1 || mlow_hi > 1)
2636     abort ();
2637   /* Assert that mlow < mhigh.  */
2638   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2639     abort ();
2640
2641   /* If precision == N, then mlow, mhigh exceed 2^N
2642      (but they do not exceed 2^(N+1)).  */
2643
2644   /* Reduce to lowest terms.  */
2645   for (post_shift = lgup; post_shift > 0; post_shift--)
2646     {
2647       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2648       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2649       if (ml_lo >= mh_lo)
2650         break;
2651
2652       mlow_hi = 0;
2653       mlow_lo = ml_lo;
2654       mhigh_hi = 0;
2655       mhigh_lo = mh_lo;
2656     }
2657
2658   *post_shift_ptr = post_shift;
2659   *lgup_ptr = lgup;
2660   if (n < HOST_BITS_PER_WIDE_INT)
2661     {
2662       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2663       *multiplier_ptr = mhigh_lo & mask;
2664       return mhigh_lo >= mask;
2665     }
2666   else
2667     {
2668       *multiplier_ptr = mhigh_lo;
2669       return mhigh_hi;
2670     }
2671 }
2672
2673 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2674    congruent to 1 (mod 2**N).  */
2675
2676 static unsigned HOST_WIDE_INT
2677 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
2678 {
2679   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2680
2681   /* The algorithm notes that the choice y = x satisfies
2682      x*y == 1 mod 2^3, since x is assumed odd.
2683      Each iteration doubles the number of bits of significance in y.  */
2684
2685   unsigned HOST_WIDE_INT mask;
2686   unsigned HOST_WIDE_INT y = x;
2687   int nbit = 3;
2688
2689   mask = (n == HOST_BITS_PER_WIDE_INT
2690           ? ~(unsigned HOST_WIDE_INT) 0
2691           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2692
2693   while (nbit < n)
2694     {
2695       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2696       nbit *= 2;
2697     }
2698   return y;
2699 }
2700
2701 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2702    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2703    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2704    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2705    become signed.
2706
2707    The result is put in TARGET if that is convenient.
2708
2709    MODE is the mode of operation.  */
2710
2711 rtx
2712 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
2713                              rtx op1, rtx target, int unsignedp)
2714 {
2715   rtx tem;
2716   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2717
2718   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2719                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2720                       NULL_RTX, 0);
2721   tem = expand_and (mode, tem, op1, NULL_RTX);
2722   adj_operand
2723     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2724                      adj_operand);
2725
2726   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2727                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2728                       NULL_RTX, 0);
2729   tem = expand_and (mode, tem, op0, NULL_RTX);
2730   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2731                           target);
2732
2733   return target;
2734 }
2735
2736 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2737    in TARGET if that is convenient, and return where the result is.  If the
2738    operation can not be performed, 0 is returned.
2739
2740    MODE is the mode of operation and result.
2741
2742    UNSIGNEDP nonzero means unsigned multiply.
2743
2744    MAX_COST is the total allowed cost for the expanded RTL.  */
2745
2746 rtx
2747 expand_mult_highpart (enum machine_mode mode, rtx op0,
2748                       unsigned HOST_WIDE_INT cnst1, rtx target,
2749                       int unsignedp, int max_cost)
2750 {
2751   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2752   optab mul_highpart_optab;
2753   optab moptab;
2754   rtx tem;
2755   int size = GET_MODE_BITSIZE (mode);
2756   rtx op1, wide_op1;
2757
2758   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2759   if (size > HOST_BITS_PER_WIDE_INT)
2760     abort ();
2761
2762   op1 = gen_int_mode (cnst1, mode);
2763
2764   wide_op1
2765     = immed_double_const (cnst1,
2766                           (unsignedp
2767                            ? (HOST_WIDE_INT) 0
2768                            : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2769                           wider_mode);
2770
2771   /* expand_mult handles constant multiplication of word_mode
2772      or narrower.  It does a poor job for large modes.  */
2773   if (size < BITS_PER_WORD
2774       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2775     {
2776       /* We have to do this, since expand_binop doesn't do conversion for
2777          multiply.  Maybe change expand_binop to handle widening multiply?  */
2778       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2779
2780       /* We know that this can't have signed overflow, so pretend this is
2781          an unsigned multiply.  */
2782       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
2783       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2784                           build_int_2 (size, 0), NULL_RTX, 1);
2785       return convert_modes (mode, wider_mode, tem, unsignedp);
2786     }
2787
2788   if (target == 0)
2789     target = gen_reg_rtx (mode);
2790
2791   /* Firstly, try using a multiplication insn that only generates the needed
2792      high part of the product, and in the sign flavor of unsignedp.  */
2793   if (mul_highpart_cost[(int) mode] < max_cost)
2794     {
2795       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2796       target = expand_binop (mode, mul_highpart_optab,
2797                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2798       if (target)
2799         return target;
2800     }
2801
2802   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2803      Need to adjust the result after the multiplication.  */
2804   if (size - 1 < BITS_PER_WORD
2805       && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
2806           < max_cost))
2807     {
2808       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2809       target = expand_binop (mode, mul_highpart_optab,
2810                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2811       if (target)
2812         /* We used the wrong signedness.  Adjust the result.  */
2813         return expand_mult_highpart_adjust (mode, target, op0,
2814                                             op1, target, unsignedp);
2815     }
2816
2817   /* Try widening multiplication.  */
2818   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2819   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2820       && mul_widen_cost[(int) wider_mode] < max_cost)
2821     {
2822       op1 = force_reg (mode, op1);
2823       goto try;
2824     }
2825
2826   /* Try widening the mode and perform a non-widening multiplication.  */
2827   moptab = smul_optab;
2828   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2829       && size - 1 < BITS_PER_WORD
2830       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2831     {
2832       op1 = wide_op1;
2833       goto try;
2834     }
2835
2836   /* Try widening multiplication of opposite signedness, and adjust.  */
2837   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2838   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2839       && size - 1 < BITS_PER_WORD
2840       && (mul_widen_cost[(int) wider_mode]
2841           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2842     {
2843       rtx regop1 = force_reg (mode, op1);
2844       tem = expand_binop (wider_mode, moptab, op0, regop1,
2845                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2846       if (tem != 0)
2847         {
2848           /* Extract the high half of the just generated product.  */
2849           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2850                               build_int_2 (size, 0), NULL_RTX, 1);
2851           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2852           /* We used the wrong signedness.  Adjust the result.  */
2853           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2854                                               target, unsignedp);
2855         }
2856     }
2857
2858   return 0;
2859
2860  try:
2861   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2862   tem = expand_binop (wider_mode, moptab, op0, op1,
2863                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2864   if (tem == 0)
2865     return 0;
2866
2867   /* Extract the high half of the just generated product.  */
2868   if (mode == word_mode)
2869     {
2870       return gen_highpart (mode, tem);
2871     }
2872   else
2873     {
2874       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2875                           build_int_2 (size, 0), NULL_RTX, 1);
2876       return convert_modes (mode, wider_mode, tem, unsignedp);
2877     }
2878 }
2879 \f
2880 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2881    if that is convenient, and returning where the result is.
2882    You may request either the quotient or the remainder as the result;
2883    specify REM_FLAG nonzero to get the remainder.
2884
2885    CODE is the expression code for which kind of division this is;
2886    it controls how rounding is done.  MODE is the machine mode to use.
2887    UNSIGNEDP nonzero means do unsigned division.  */
2888
2889 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2890    and then correct it by or'ing in missing high bits
2891    if result of ANDI is nonzero.
2892    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2893    This could optimize to a bfexts instruction.
2894    But C doesn't use these operations, so their optimizations are
2895    left for later.  */
2896 /* ??? For modulo, we don't actually need the highpart of the first product,
2897    the low part will do nicely.  And for small divisors, the second multiply
2898    can also be a low-part only multiply or even be completely left out.
2899    E.g. to calculate the remainder of a division by 3 with a 32 bit
2900    multiply, multiply with 0x55555556 and extract the upper two bits;
2901    the result is exact for inputs up to 0x1fffffff.
2902    The input range can be reduced by using cross-sum rules.
2903    For odd divisors >= 3, the following table gives right shift counts
2904    so that if a number is shifted by an integer multiple of the given
2905    amount, the remainder stays the same:
2906    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
2907    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
2908    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
2909    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
2910    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
2911
2912    Cross-sum rules for even numbers can be derived by leaving as many bits
2913    to the right alone as the divisor has zeros to the right.
2914    E.g. if x is an unsigned 32 bit number:
2915    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
2916    */
2917
2918 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2919
2920 rtx
2921 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
2922                rtx op0, rtx op1, rtx target, int unsignedp)
2923 {
2924   enum machine_mode compute_mode;
2925   rtx tquotient;
2926   rtx quotient = 0, remainder = 0;
2927   rtx last;
2928   int size;
2929   rtx insn, set;
2930   optab optab1, optab2;
2931   int op1_is_constant, op1_is_pow2 = 0;
2932   int max_cost, extra_cost;
2933   static HOST_WIDE_INT last_div_const = 0;
2934   static HOST_WIDE_INT ext_op1;
2935
2936   op1_is_constant = GET_CODE (op1) == CONST_INT;
2937   if (op1_is_constant)
2938     {
2939       ext_op1 = INTVAL (op1);
2940       if (unsignedp)
2941         ext_op1 &= GET_MODE_MASK (mode);
2942       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
2943                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
2944     }
2945
2946   /*
2947      This is the structure of expand_divmod:
2948
2949      First comes code to fix up the operands so we can perform the operations
2950      correctly and efficiently.
2951
2952      Second comes a switch statement with code specific for each rounding mode.
2953      For some special operands this code emits all RTL for the desired
2954      operation, for other cases, it generates only a quotient and stores it in
2955      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2956      to indicate that it has not done anything.
2957
2958      Last comes code that finishes the operation.  If QUOTIENT is set and
2959      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2960      QUOTIENT is not set, it is computed using trunc rounding.
2961
2962      We try to generate special code for division and remainder when OP1 is a
2963      constant.  If |OP1| = 2**n we can use shifts and some other fast
2964      operations.  For other values of OP1, we compute a carefully selected
2965      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2966      by m.
2967
2968      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2969      half of the product.  Different strategies for generating the product are
2970      implemented in expand_mult_highpart.
2971
2972      If what we actually want is the remainder, we generate that by another
2973      by-constant multiplication and a subtraction.  */
2974
2975   /* We shouldn't be called with OP1 == const1_rtx, but some of the
2976      code below will malfunction if we are, so check here and handle
2977      the special case if so.  */
2978   if (op1 == const1_rtx)
2979     return rem_flag ? const0_rtx : op0;
2980
2981     /* When dividing by -1, we could get an overflow.
2982      negv_optab can handle overflows.  */
2983   if (! unsignedp && op1 == constm1_rtx)
2984     {
2985       if (rem_flag)
2986         return const0_rtx;
2987       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
2988                           ? negv_optab : neg_optab, op0, target, 0);
2989     }
2990
2991   if (target
2992       /* Don't use the function value register as a target
2993          since we have to read it as well as write it,
2994          and function-inlining gets confused by this.  */
2995       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
2996           /* Don't clobber an operand while doing a multi-step calculation.  */
2997           || ((rem_flag || op1_is_constant)
2998               && (reg_mentioned_p (target, op0)
2999                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
3000           || reg_mentioned_p (target, op1)
3001           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
3002     target = 0;
3003
3004   /* Get the mode in which to perform this computation.  Normally it will
3005      be MODE, but sometimes we can't do the desired operation in MODE.
3006      If so, pick a wider mode in which we can do the operation.  Convert
3007      to that mode at the start to avoid repeated conversions.
3008
3009      First see what operations we need.  These depend on the expression
3010      we are evaluating.  (We assume that divxx3 insns exist under the
3011      same conditions that modxx3 insns and that these insns don't normally
3012      fail.  If these assumptions are not correct, we may generate less
3013      efficient code in some cases.)
3014
3015      Then see if we find a mode in which we can open-code that operation
3016      (either a division, modulus, or shift).  Finally, check for the smallest
3017      mode for which we can do the operation with a library call.  */
3018
3019   /* We might want to refine this now that we have division-by-constant
3020      optimization.  Since expand_mult_highpart tries so many variants, it is
3021      not straightforward to generalize this.  Maybe we should make an array
3022      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3023
3024   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3025             ? (unsignedp ? lshr_optab : ashr_optab)
3026             : (unsignedp ? udiv_optab : sdiv_optab));
3027   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3028             ? optab1
3029             : (unsignedp ? udivmod_optab : sdivmod_optab));
3030
3031   for (compute_mode = mode; compute_mode != VOIDmode;
3032        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3033     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
3034         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
3035       break;
3036
3037   if (compute_mode == VOIDmode)
3038     for (compute_mode = mode; compute_mode != VOIDmode;
3039          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3040       if (optab1->handlers[(int) compute_mode].libfunc
3041           || optab2->handlers[(int) compute_mode].libfunc)
3042         break;
3043
3044   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3045      in expand_binop.  */
3046   if (compute_mode == VOIDmode)
3047     compute_mode = mode;
3048
3049   if (target && GET_MODE (target) == compute_mode)
3050     tquotient = target;
3051   else
3052     tquotient = gen_reg_rtx (compute_mode);
3053
3054   size = GET_MODE_BITSIZE (compute_mode);
3055 #if 0
3056   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3057      (mode), and thereby get better code when OP1 is a constant.  Do that
3058      later.  It will require going over all usages of SIZE below.  */
3059   size = GET_MODE_BITSIZE (mode);
3060 #endif
3061
3062   /* Only deduct something for a REM if the last divide done was
3063      for a different constant.   Then set the constant of the last
3064      divide.  */
3065   max_cost = div_cost[(int) compute_mode]
3066     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3067                       && INTVAL (op1) == last_div_const)
3068        ? mul_cost[(int) compute_mode] + add_cost : 0);
3069
3070   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3071
3072   /* Now convert to the best mode to use.  */
3073   if (compute_mode != mode)
3074     {
3075       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3076       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3077
3078       /* convert_modes may have placed op1 into a register, so we
3079          must recompute the following.  */
3080       op1_is_constant = GET_CODE (op1) == CONST_INT;
3081       op1_is_pow2 = (op1_is_constant
3082                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3083                           || (! unsignedp
3084                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3085     }
3086
3087   /* If one of the operands is a volatile MEM, copy it into a register.  */
3088
3089   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3090     op0 = force_reg (compute_mode, op0);
3091   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3092     op1 = force_reg (compute_mode, op1);
3093
3094   /* If we need the remainder or if OP1 is constant, we need to
3095      put OP0 in a register in case it has any queued subexpressions.  */
3096   if (rem_flag || op1_is_constant)
3097     op0 = force_reg (compute_mode, op0);
3098
3099   last = get_last_insn ();
3100
3101   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3102   if (unsignedp)
3103     {
3104       if (code == FLOOR_DIV_EXPR)
3105         code = TRUNC_DIV_EXPR;
3106       if (code == FLOOR_MOD_EXPR)
3107         code = TRUNC_MOD_EXPR;
3108       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3109         code = TRUNC_DIV_EXPR;
3110     }
3111
3112   if (op1 != const0_rtx)
3113     switch (code)
3114       {
3115       case TRUNC_MOD_EXPR:
3116       case TRUNC_DIV_EXPR:
3117         if (op1_is_constant)
3118           {
3119             if (unsignedp)
3120               {
3121                 unsigned HOST_WIDE_INT mh, ml;
3122                 int pre_shift, post_shift;
3123                 int dummy;
3124                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3125                                             & GET_MODE_MASK (compute_mode));
3126
3127                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3128                   {
3129                     pre_shift = floor_log2 (d);
3130                     if (rem_flag)
3131                       {
3132                         remainder
3133                           = expand_binop (compute_mode, and_optab, op0,
3134                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3135                                           remainder, 1,
3136                                           OPTAB_LIB_WIDEN);
3137                         if (remainder)
3138                           return gen_lowpart (mode, remainder);
3139                       }
3140                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3141                                              build_int_2 (pre_shift, 0),
3142                                              tquotient, 1);
3143                   }
3144                 else if (size <= HOST_BITS_PER_WIDE_INT)
3145                   {
3146                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3147                       {
3148                         /* Most significant bit of divisor is set; emit an scc
3149                            insn.  */
3150                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3151                                                     compute_mode, 1, 1);
3152                         if (quotient == 0)
3153                           goto fail1;
3154                       }
3155                     else
3156                       {
3157                         /* Find a suitable multiplier and right shift count
3158                            instead of multiplying with D.  */
3159
3160                         mh = choose_multiplier (d, size, size,
3161                                                 &ml, &post_shift, &dummy);
3162
3163                         /* If the suggested multiplier is more than SIZE bits,
3164                            we can do better for even divisors, using an
3165                            initial right shift.  */
3166                         if (mh != 0 && (d & 1) == 0)
3167                           {
3168                             pre_shift = floor_log2 (d & -d);
3169                             mh = choose_multiplier (d >> pre_shift, size,
3170                                                     size - pre_shift,
3171                                                     &ml, &post_shift, &dummy);
3172                             if (mh)
3173                               abort ();
3174                           }
3175                         else
3176                           pre_shift = 0;
3177
3178                         if (mh != 0)
3179                           {
3180                             rtx t1, t2, t3, t4;
3181
3182                             if (post_shift - 1 >= BITS_PER_WORD)
3183                               goto fail1;
3184
3185                             extra_cost = (shift_cost[post_shift - 1]
3186                                           + shift_cost[1] + 2 * add_cost);
3187                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3188                                                        NULL_RTX, 1,
3189                                                        max_cost - extra_cost);
3190                             if (t1 == 0)
3191                               goto fail1;
3192                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3193                                                                op0, t1),
3194                                                 NULL_RTX);
3195                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3196                                                build_int_2 (1, 0), NULL_RTX,1);
3197                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3198                                                               t1, t3),
3199                                                 NULL_RTX);
3200                             quotient
3201                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3202                                               build_int_2 (post_shift - 1, 0),
3203                                               tquotient, 1);
3204                           }
3205                         else
3206                           {
3207                             rtx t1, t2;
3208
3209                             if (pre_shift >= BITS_PER_WORD
3210                                 || post_shift >= BITS_PER_WORD)
3211                               goto fail1;
3212
3213                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3214                                                build_int_2 (pre_shift, 0),
3215                                                NULL_RTX, 1);
3216                             extra_cost = (shift_cost[pre_shift]
3217                                           + shift_cost[post_shift]);
3218                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3219                                                        NULL_RTX, 1,
3220                                                        max_cost - extra_cost);
3221                             if (t2 == 0)
3222                               goto fail1;
3223                             quotient
3224                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3225                                               build_int_2 (post_shift, 0),
3226                                               tquotient, 1);
3227                           }
3228                       }
3229                   }
3230                 else            /* Too wide mode to use tricky code */
3231                   break;
3232
3233                 insn = get_last_insn ();
3234                 if (insn != last
3235                     && (set = single_set (insn)) != 0
3236                     && SET_DEST (set) == quotient)
3237                   set_unique_reg_note (insn,
3238                                        REG_EQUAL,
3239                                        gen_rtx_UDIV (compute_mode, op0, op1));
3240               }
3241             else                /* TRUNC_DIV, signed */
3242               {
3243                 unsigned HOST_WIDE_INT ml;
3244                 int lgup, post_shift;
3245                 HOST_WIDE_INT d = INTVAL (op1);
3246                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3247
3248                 /* n rem d = n rem -d */
3249                 if (rem_flag && d < 0)
3250                   {
3251                     d = abs_d;
3252                     op1 = gen_int_mode (abs_d, compute_mode);
3253                   }
3254
3255                 if (d == 1)
3256                   quotient = op0;
3257                 else if (d == -1)
3258                   quotient = expand_unop (compute_mode, neg_optab, op0,
3259                                           tquotient, 0);
3260                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3261                   {
3262                     /* This case is not handled correctly below.  */
3263                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3264                                                 compute_mode, 1, 1);
3265                     if (quotient == 0)
3266                       goto fail1;
3267                   }
3268                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3269                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap)
3270                          /* ??? The cheap metric is computed only for
3271                             word_mode.  If this operation is wider, this may
3272                             not be so.  Assume true if the optab has an
3273                             expander for this mode.  */
3274                          && (((rem_flag ? smod_optab : sdiv_optab)
3275                               ->handlers[(int) compute_mode].insn_code
3276                               != CODE_FOR_nothing)
3277                              || (sdivmod_optab->handlers[(int) compute_mode]
3278                                  .insn_code != CODE_FOR_nothing)))
3279                   ;
3280                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3281                   {
3282                     lgup = floor_log2 (abs_d);
3283                     if (BRANCH_COST < 1 || (abs_d != 2 && BRANCH_COST < 3))
3284                       {
3285                         rtx label = gen_label_rtx ();
3286                         rtx t1;
3287
3288                         t1 = copy_to_mode_reg (compute_mode, op0);
3289                         do_cmp_and_jump (t1, const0_rtx, GE,
3290                                          compute_mode, label);
3291                         expand_inc (t1, gen_int_mode (abs_d - 1,
3292                                                       compute_mode));
3293                         emit_label (label);
3294                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3295                                                  build_int_2 (lgup, 0),
3296                                                  tquotient, 0);
3297                       }
3298                     else
3299                       {
3300                         rtx t1, t2, t3;
3301                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3302                                            build_int_2 (size - 1, 0),
3303                                            NULL_RTX, 0);
3304                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3305                                            build_int_2 (size - lgup, 0),
3306                                            NULL_RTX, 1);
3307                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3308                                                           op0, t2),
3309                                             NULL_RTX);
3310                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3311                                                  build_int_2 (lgup, 0),
3312                                                  tquotient, 0);
3313                       }
3314
3315                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3316                        the quotient.  */
3317                     if (d < 0)
3318                       {
3319                         insn = get_last_insn ();
3320                         if (insn != last
3321                             && (set = single_set (insn)) != 0
3322                             && SET_DEST (set) == quotient
3323                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3324                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3325                           set_unique_reg_note (insn,
3326                                                REG_EQUAL,
3327                                                gen_rtx_DIV (compute_mode,
3328                                                             op0,
3329                                                             GEN_INT
3330                                                             (trunc_int_for_mode
3331                                                              (abs_d,
3332                                                               compute_mode))));
3333
3334                         quotient = expand_unop (compute_mode, neg_optab,
3335                                                 quotient, quotient, 0);
3336                       }
3337                   }
3338                 else if (size <= HOST_BITS_PER_WIDE_INT)
3339                   {
3340                     choose_multiplier (abs_d, size, size - 1,
3341                                        &ml, &post_shift, &lgup);
3342                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3343                       {
3344                         rtx t1, t2, t3;
3345
3346                         if (post_shift >= BITS_PER_WORD
3347                             || size - 1 >= BITS_PER_WORD)
3348                           goto fail1;
3349
3350                         extra_cost = (shift_cost[post_shift]
3351                                       + shift_cost[size - 1] + add_cost);
3352                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3353                                                    NULL_RTX, 0,
3354                                                    max_cost - extra_cost);
3355                         if (t1 == 0)
3356                           goto fail1;
3357                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3358                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3359                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3360                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3361                         if (d < 0)
3362                           quotient
3363                             = force_operand (gen_rtx_MINUS (compute_mode,
3364                                                             t3, t2),
3365                                              tquotient);
3366                         else
3367                           quotient
3368                             = force_operand (gen_rtx_MINUS (compute_mode,
3369                                                             t2, t3),
3370                                              tquotient);
3371                       }
3372                     else
3373                       {
3374                         rtx t1, t2, t3, t4;
3375
3376                         if (post_shift >= BITS_PER_WORD
3377                             || size - 1 >= BITS_PER_WORD)
3378                           goto fail1;
3379
3380                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3381                         extra_cost = (shift_cost[post_shift]
3382                                       + shift_cost[size - 1] + 2 * add_cost);
3383                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3384                                                    NULL_RTX, 0,
3385                                                    max_cost - extra_cost);
3386                         if (t1 == 0)
3387                           goto fail1;
3388                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3389                                                           t1, op0),
3390                                             NULL_RTX);
3391                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3392                                            build_int_2 (post_shift, 0),
3393                                            NULL_RTX, 0);
3394                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3395                                            build_int_2 (size - 1, 0),
3396                                            NULL_RTX, 0);
3397                         if (d < 0)
3398                           quotient
3399                             = force_operand (gen_rtx_MINUS (compute_mode,
3400                                                             t4, t3),
3401                                              tquotient);
3402                         else
3403                           quotient
3404                             = force_operand (gen_rtx_MINUS (compute_mode,
3405                                                             t3, t4),
3406                                              tquotient);
3407                       }
3408                   }
3409                 else            /* Too wide mode to use tricky code */
3410                   break;
3411
3412                 insn = get_last_insn ();
3413                 if (insn != last
3414                     && (set = single_set (insn)) != 0
3415                     && SET_DEST (set) == quotient)
3416                   set_unique_reg_note (insn,
3417                                        REG_EQUAL,
3418                                        gen_rtx_DIV (compute_mode, op0, op1));
3419               }
3420             break;
3421           }
3422       fail1:
3423         delete_insns_since (last);
3424         break;
3425
3426       case FLOOR_DIV_EXPR:
3427       case FLOOR_MOD_EXPR:
3428       /* We will come here only for signed operations.  */
3429         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3430           {
3431             unsigned HOST_WIDE_INT mh, ml;
3432             int pre_shift, lgup, post_shift;
3433             HOST_WIDE_INT d = INTVAL (op1);
3434
3435             if (d > 0)
3436               {
3437                 /* We could just as easily deal with negative constants here,
3438                    but it does not seem worth the trouble for GCC 2.6.  */
3439                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3440                   {
3441                     pre_shift = floor_log2 (d);
3442                     if (rem_flag)
3443                       {
3444                         remainder = expand_binop (compute_mode, and_optab, op0,
3445                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3446                                                   remainder, 0, OPTAB_LIB_WIDEN);
3447                         if (remainder)
3448                           return gen_lowpart (mode, remainder);
3449                       }
3450                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3451                                              build_int_2 (pre_shift, 0),
3452                                              tquotient, 0);
3453                   }
3454                 else
3455                   {
3456                     rtx t1, t2, t3, t4;
3457
3458                     mh = choose_multiplier (d, size, size - 1,
3459                                             &ml, &post_shift, &lgup);
3460                     if (mh)
3461                       abort ();
3462
3463                     if (post_shift < BITS_PER_WORD
3464                         && size - 1 < BITS_PER_WORD)
3465                       {
3466                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3467                                            build_int_2 (size - 1, 0),
3468                                            NULL_RTX, 0);
3469                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3470                                            NULL_RTX, 0, OPTAB_WIDEN);
3471                         extra_cost = (shift_cost[post_shift]
3472                                       + shift_cost[size - 1] + 2 * add_cost);
3473                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3474                                                    NULL_RTX, 1,
3475                                                    max_cost - extra_cost);
3476                         if (t3 != 0)
3477                           {
3478                             t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3479                                                build_int_2 (post_shift, 0),
3480                                                NULL_RTX, 1);
3481                             quotient = expand_binop (compute_mode, xor_optab,
3482                                                      t4, t1, tquotient, 0,
3483                                                      OPTAB_WIDEN);
3484                           }
3485                       }
3486                   }
3487               }
3488             else
3489               {
3490                 rtx nsign, t1, t2, t3, t4;
3491                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3492                                                   op0, constm1_rtx), NULL_RTX);
3493                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3494                                    0, OPTAB_WIDEN);
3495                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3496                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3497                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3498                                     NULL_RTX);
3499                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3500                                     NULL_RTX, 0);
3501                 if (t4)
3502                   {
3503                     rtx t5;
3504                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3505                                       NULL_RTX, 0);
3506                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3507                                                             t4, t5),
3508                                               tquotient);
3509                   }
3510               }
3511           }
3512
3513         if (quotient != 0)
3514           break;
3515         delete_insns_since (last);
3516
3517         /* Try using an instruction that produces both the quotient and
3518            remainder, using truncation.  We can easily compensate the quotient
3519            or remainder to get floor rounding, once we have the remainder.
3520            Notice that we compute also the final remainder value here,
3521            and return the result right away.  */
3522         if (target == 0 || GET_MODE (target) != compute_mode)
3523           target = gen_reg_rtx (compute_mode);
3524
3525         if (rem_flag)
3526           {
3527             remainder
3528               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3529             quotient = gen_reg_rtx (compute_mode);
3530           }
3531         else
3532           {
3533             quotient
3534               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3535             remainder = gen_reg_rtx (compute_mode);
3536           }
3537
3538         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3539                                  quotient, remainder, 0))
3540           {
3541             /* This could be computed with a branch-less sequence.
3542                Save that for later.  */
3543             rtx tem;
3544             rtx label = gen_label_rtx ();
3545             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3546             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3547                                 NULL_RTX, 0, OPTAB_WIDEN);
3548             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3549             expand_dec (quotient, const1_rtx);
3550             expand_inc (remainder, op1);
3551             emit_label (label);
3552             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3553           }
3554
3555         /* No luck with division elimination or divmod.  Have to do it
3556            by conditionally adjusting op0 *and* the result.  */
3557         {
3558           rtx label1, label2, label3, label4, label5;
3559           rtx adjusted_op0;
3560           rtx tem;
3561
3562           quotient = gen_reg_rtx (compute_mode);
3563           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3564           label1 = gen_label_rtx ();
3565           label2 = gen_label_rtx ();
3566           label3 = gen_label_rtx ();
3567           label4 = gen_label_rtx ();
3568           label5 = gen_label_rtx ();
3569           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3570           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3571           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3572                               quotient, 0, OPTAB_LIB_WIDEN);
3573           if (tem != quotient)
3574             emit_move_insn (quotient, tem);
3575           emit_jump_insn (gen_jump (label5));
3576           emit_barrier ();
3577           emit_label (label1);
3578           expand_inc (adjusted_op0, const1_rtx);
3579           emit_jump_insn (gen_jump (label4));
3580           emit_barrier ();
3581           emit_label (label2);
3582           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3583           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3584                               quotient, 0, OPTAB_LIB_WIDEN);
3585           if (tem != quotient)
3586             emit_move_insn (quotient, tem);
3587           emit_jump_insn (gen_jump (label5));
3588           emit_barrier ();
3589           emit_label (label3);
3590           expand_dec (adjusted_op0, const1_rtx);
3591           emit_label (label4);
3592           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3593                               quotient, 0, OPTAB_LIB_WIDEN);
3594           if (tem != quotient)
3595             emit_move_insn (quotient, tem);
3596           expand_dec (quotient, const1_rtx);
3597           emit_label (label5);
3598         }
3599         break;
3600
3601       case CEIL_DIV_EXPR:
3602       case CEIL_MOD_EXPR:
3603         if (unsignedp)
3604           {
3605             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3606               {
3607                 rtx t1, t2, t3;
3608                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3609                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3610                                    build_int_2 (floor_log2 (d), 0),
3611                                    tquotient, 1);
3612                 t2 = expand_binop (compute_mode, and_optab, op0,
3613                                    GEN_INT (d - 1),
3614                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3615                 t3 = gen_reg_rtx (compute_mode);
3616                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3617                                       compute_mode, 1, 1);
3618                 if (t3 == 0)
3619                   {
3620                     rtx lab;
3621                     lab = gen_label_rtx ();
3622                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3623                     expand_inc (t1, const1_rtx);
3624                     emit_label (lab);
3625                     quotient = t1;
3626                   }
3627                 else
3628                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3629                                                           t1, t3),
3630                                             tquotient);
3631                 break;
3632               }
3633
3634             /* Try using an instruction that produces both the quotient and
3635                remainder, using truncation.  We can easily compensate the
3636                quotient or remainder to get ceiling rounding, once we have the
3637                remainder.  Notice that we compute also the final remainder
3638                value here, and return the result right away.  */
3639             if (target == 0 || GET_MODE (target) != compute_mode)
3640               target = gen_reg_rtx (compute_mode);
3641
3642             if (rem_flag)
3643               {
3644                 remainder = (GET_CODE (target) == REG
3645                              ? target : gen_reg_rtx (compute_mode));
3646                 quotient = gen_reg_rtx (compute_mode);
3647               }
3648             else
3649               {
3650                 quotient = (GET_CODE (target) == REG
3651                             ? target : gen_reg_rtx (compute_mode));
3652                 remainder = gen_reg_rtx (compute_mode);
3653               }
3654
3655             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3656                                      remainder, 1))
3657               {
3658                 /* This could be computed with a branch-less sequence.
3659                    Save that for later.  */
3660                 rtx label = gen_label_rtx ();
3661                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3662                                  compute_mode, label);
3663                 expand_inc (quotient, const1_rtx);
3664                 expand_dec (remainder, op1);
3665                 emit_label (label);
3666                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3667               }
3668
3669             /* No luck with division elimination or divmod.  Have to do it
3670                by conditionally adjusting op0 *and* the result.  */
3671             {
3672               rtx label1, label2;
3673               rtx adjusted_op0, tem;
3674
3675               quotient = gen_reg_rtx (compute_mode);
3676               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3677               label1 = gen_label_rtx ();
3678               label2 = gen_label_rtx ();
3679               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3680                                compute_mode, label1);
3681               emit_move_insn  (quotient, const0_rtx);
3682               emit_jump_insn (gen_jump (label2));
3683               emit_barrier ();
3684               emit_label (label1);
3685               expand_dec (adjusted_op0, const1_rtx);
3686               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3687                                   quotient, 1, OPTAB_LIB_WIDEN);
3688               if (tem != quotient)
3689                 emit_move_insn (quotient, tem);
3690               expand_inc (quotient, const1_rtx);
3691               emit_label (label2);
3692             }
3693           }
3694         else /* signed */
3695           {
3696             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3697                 && INTVAL (op1) >= 0)
3698               {
3699                 /* This is extremely similar to the code for the unsigned case
3700                    above.  For 2.7 we should merge these variants, but for
3701                    2.6.1 I don't want to touch the code for unsigned since that
3702                    get used in C.  The signed case will only be used by other
3703                    languages (Ada).  */
3704
3705                 rtx t1, t2, t3;
3706                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3707                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3708                                    build_int_2 (floor_log2 (d), 0),
3709                                    tquotient, 0);
3710                 t2 = expand_binop (compute_mode, and_optab, op0,
3711                                    GEN_INT (d - 1),
3712                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3713                 t3 = gen_reg_rtx (compute_mode);
3714                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3715                                       compute_mode, 1, 1);
3716                 if (t3 == 0)
3717                   {
3718                     rtx lab;
3719                     lab = gen_label_rtx ();
3720                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3721                     expand_inc (t1, const1_rtx);
3722                     emit_label (lab);
3723                     quotient = t1;
3724                   }
3725                 else
3726                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3727                                                           t1, t3),
3728                                             tquotient);
3729                 break;
3730               }
3731
3732             /* Try using an instruction that produces both the quotient and
3733                remainder, using truncation.  We can easily compensate the
3734                quotient or remainder to get ceiling rounding, once we have the
3735                remainder.  Notice that we compute also the final remainder
3736                value here, and return the result right away.  */
3737             if (target == 0 || GET_MODE (target) != compute_mode)
3738               target = gen_reg_rtx (compute_mode);
3739             if (rem_flag)
3740               {
3741                 remainder= (GET_CODE (target) == REG
3742                             ? target : gen_reg_rtx (compute_mode));
3743                 quotient = gen_reg_rtx (compute_mode);
3744               }
3745             else
3746               {
3747                 quotient = (GET_CODE (target) == REG
3748                             ? target : gen_reg_rtx (compute_mode));
3749                 remainder = gen_reg_rtx (compute_mode);
3750               }
3751
3752             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3753                                      remainder, 0))
3754               {
3755                 /* This could be computed with a branch-less sequence.
3756                    Save that for later.  */
3757                 rtx tem;
3758                 rtx label = gen_label_rtx ();
3759                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3760                                  compute_mode, label);
3761                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3762                                     NULL_RTX, 0, OPTAB_WIDEN);
3763                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3764                 expand_inc (quotient, const1_rtx);
3765                 expand_dec (remainder, op1);
3766                 emit_label (label);
3767                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3768               }
3769
3770             /* No luck with division elimination or divmod.  Have to do it
3771                by conditionally adjusting op0 *and* the result.  */
3772             {
3773               rtx label1, label2, label3, label4, label5;
3774               rtx adjusted_op0;
3775               rtx tem;
3776
3777               quotient = gen_reg_rtx (compute_mode);
3778               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3779               label1 = gen_label_rtx ();
3780               label2 = gen_label_rtx ();
3781               label3 = gen_label_rtx ();
3782               label4 = gen_label_rtx ();
3783               label5 = gen_label_rtx ();
3784               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3785               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3786                                compute_mode, label1);
3787               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3788                                   quotient, 0, OPTAB_LIB_WIDEN);
3789               if (tem != quotient)
3790                 emit_move_insn (quotient, tem);
3791               emit_jump_insn (gen_jump (label5));
3792               emit_barrier ();
3793               emit_label (label1);
3794               expand_dec (adjusted_op0, const1_rtx);
3795               emit_jump_insn (gen_jump (label4));
3796               emit_barrier ();
3797               emit_label (label2);
3798               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3799                                compute_mode, label3);
3800               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3801                                   quotient, 0, OPTAB_LIB_WIDEN);
3802               if (tem != quotient)
3803                 emit_move_insn (quotient, tem);
3804               emit_jump_insn (gen_jump (label5));
3805               emit_barrier ();
3806               emit_label (label3);
3807               expand_inc (adjusted_op0, const1_rtx);
3808               emit_label (label4);
3809               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3810                                   quotient, 0, OPTAB_LIB_WIDEN);
3811               if (tem != quotient)
3812                 emit_move_insn (quotient, tem);
3813               expand_inc (quotient, const1_rtx);
3814               emit_label (label5);
3815             }
3816           }
3817         break;
3818
3819       case EXACT_DIV_EXPR:
3820         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3821           {
3822             HOST_WIDE_INT d = INTVAL (op1);
3823             unsigned HOST_WIDE_INT ml;
3824             int pre_shift;
3825             rtx t1;
3826
3827             pre_shift = floor_log2 (d & -d);
3828             ml = invert_mod2n (d >> pre_shift, size);
3829             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3830                                build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
3831             quotient = expand_mult (compute_mode, t1,
3832                                     gen_int_mode (ml, compute_mode),
3833                                     NULL_RTX, 1);
3834
3835             insn = get_last_insn ();
3836             set_unique_reg_note (insn,
3837                                  REG_EQUAL,
3838                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3839                                                  compute_mode,
3840                                                  op0, op1));
3841           }
3842         break;
3843
3844       case ROUND_DIV_EXPR:
3845       case ROUND_MOD_EXPR:
3846         if (unsignedp)
3847           {
3848             rtx tem;
3849             rtx label;
3850             label = gen_label_rtx ();
3851             quotient = gen_reg_rtx (compute_mode);
3852             remainder = gen_reg_rtx (compute_mode);
3853             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3854               {
3855                 rtx tem;
3856                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3857                                          quotient, 1, OPTAB_LIB_WIDEN);
3858                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3859                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3860                                           remainder, 1, OPTAB_LIB_WIDEN);
3861               }
3862             tem = plus_constant (op1, -1);
3863             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3864                                 build_int_2 (1, 0), NULL_RTX, 1);
3865             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3866             expand_inc (quotient, const1_rtx);
3867             expand_dec (remainder, op1);
3868             emit_label (label);
3869           }
3870         else
3871           {
3872             rtx abs_rem, abs_op1, tem, mask;
3873             rtx label;
3874             label = gen_label_rtx ();
3875             quotient = gen_reg_rtx (compute_mode);
3876             remainder = gen_reg_rtx (compute_mode);
3877             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3878               {
3879                 rtx tem;
3880                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3881                                          quotient, 0, OPTAB_LIB_WIDEN);
3882                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3883                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3884                                           remainder, 0, OPTAB_LIB_WIDEN);
3885               }
3886             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
3887             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
3888             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3889                                 build_int_2 (1, 0), NULL_RTX, 1);
3890             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3891             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3892                                 NULL_RTX, 0, OPTAB_WIDEN);
3893             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3894                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3895             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3896                                 NULL_RTX, 0, OPTAB_WIDEN);
3897             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3898                                 NULL_RTX, 0, OPTAB_WIDEN);
3899             expand_inc (quotient, tem);
3900             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3901                                 NULL_RTX, 0, OPTAB_WIDEN);
3902             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3903                                 NULL_RTX, 0, OPTAB_WIDEN);
3904             expand_dec (remainder, tem);
3905             emit_label (label);
3906           }
3907         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3908
3909       default:
3910         abort ();
3911       }
3912
3913   if (quotient == 0)
3914     {
3915       if (target && GET_MODE (target) != compute_mode)
3916         target = 0;
3917
3918       if (rem_flag)
3919         {
3920           /* Try to produce the remainder without producing the quotient.
3921              If we seem to have a divmod pattern that does not require widening,
3922              don't try widening here.  We should really have a WIDEN argument
3923              to expand_twoval_binop, since what we'd really like to do here is
3924              1) try a mod insn in compute_mode
3925              2) try a divmod insn in compute_mode
3926              3) try a div insn in compute_mode and multiply-subtract to get
3927                 remainder
3928              4) try the same things with widening allowed.  */
3929           remainder
3930             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3931                                  op0, op1, target,
3932                                  unsignedp,
3933                                  ((optab2->handlers[(int) compute_mode].insn_code
3934                                    != CODE_FOR_nothing)
3935                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
3936           if (remainder == 0)
3937             {
3938               /* No luck there.  Can we do remainder and divide at once
3939                  without a library call?  */
3940               remainder = gen_reg_rtx (compute_mode);
3941               if (! expand_twoval_binop ((unsignedp
3942                                           ? udivmod_optab
3943                                           : sdivmod_optab),
3944                                          op0, op1,
3945                                          NULL_RTX, remainder, unsignedp))
3946                 remainder = 0;
3947             }
3948
3949           if (remainder)
3950             return gen_lowpart (mode, remainder);
3951         }
3952
3953       /* Produce the quotient.  Try a quotient insn, but not a library call.
3954          If we have a divmod in this mode, use it in preference to widening
3955          the div (for this test we assume it will not fail). Note that optab2
3956          is set to the one of the two optabs that the call below will use.  */
3957       quotient
3958         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3959                              op0, op1, rem_flag ? NULL_RTX : target,
3960                              unsignedp,
3961                              ((optab2->handlers[(int) compute_mode].insn_code
3962                                != CODE_FOR_nothing)
3963                               ? OPTAB_DIRECT : OPTAB_WIDEN));
3964
3965       if (quotient == 0)
3966         {
3967           /* No luck there.  Try a quotient-and-remainder insn,
3968              keeping the quotient alone.  */
3969           quotient = gen_reg_rtx (compute_mode);
3970           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3971                                      op0, op1,
3972                                      quotient, NULL_RTX, unsignedp))
3973             {
3974               quotient = 0;
3975               if (! rem_flag)
3976                 /* Still no luck.  If we are not computing the remainder,
3977                    use a library call for the quotient.  */
3978                 quotient = sign_expand_binop (compute_mode,
3979                                               udiv_optab, sdiv_optab,
3980                                               op0, op1, target,
3981                                               unsignedp, OPTAB_LIB_WIDEN);
3982             }
3983         }
3984     }
3985
3986   if (rem_flag)
3987     {
3988       if (target && GET_MODE (target) != compute_mode)
3989         target = 0;
3990
3991       if (quotient == 0)
3992         /* No divide instruction either.  Use library for remainder.  */
3993         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3994                                        op0, op1, target,
3995                                        unsignedp, OPTAB_LIB_WIDEN);
3996       else
3997         {
3998           /* We divided.  Now finish doing X - Y * (X / Y).  */
3999           remainder = expand_mult (compute_mode, quotient, op1,
4000                                    NULL_RTX, unsignedp);
4001           remainder = expand_binop (compute_mode, sub_optab, op0,
4002                                     remainder, target, unsignedp,
4003                                     OPTAB_LIB_WIDEN);
4004         }
4005     }
4006
4007   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4008 }
4009 \f
4010 /* Return a tree node with data type TYPE, describing the value of X.
4011    Usually this is an RTL_EXPR, if there is no obvious better choice.
4012    X may be an expression, however we only support those expressions
4013    generated by loop.c.  */
4014
4015 tree
4016 make_tree (tree type, rtx x)
4017 {
4018   tree t;
4019
4020   switch (GET_CODE (x))
4021     {
4022     case CONST_INT:
4023       t = build_int_2 (INTVAL (x),
4024                        (TREE_UNSIGNED (type)
4025                         && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
4026                        || INTVAL (x) >= 0 ? 0 : -1);
4027       TREE_TYPE (t) = type;
4028       return t;
4029
4030     case CONST_DOUBLE:
4031       if (GET_MODE (x) == VOIDmode)
4032         {
4033           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4034           TREE_TYPE (t) = type;
4035         }
4036       else
4037         {
4038           REAL_VALUE_TYPE d;
4039
4040           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4041           t = build_real (type, d);
4042         }
4043
4044       return t;
4045
4046     case CONST_VECTOR:
4047       {
4048         int i, units;
4049         rtx elt;
4050         tree t = NULL_TREE;
4051
4052         units = CONST_VECTOR_NUNITS (x);
4053
4054         /* Build a tree with vector elements.  */
4055         for (i = units - 1; i >= 0; --i)
4056           {
4057             elt = CONST_VECTOR_ELT (x, i);
4058             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4059           }
4060
4061         return build_vector (type, t);
4062       }
4063
4064     case PLUS:
4065       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4066                           make_tree (type, XEXP (x, 1))));
4067
4068     case MINUS:
4069       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4070                           make_tree (type, XEXP (x, 1))));
4071
4072     case NEG:
4073       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4074
4075     case MULT:
4076       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4077                           make_tree (type, XEXP (x, 1))));
4078
4079     case ASHIFT:
4080       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4081                           make_tree (type, XEXP (x, 1))));
4082
4083     case LSHIFTRT:
4084       t = (*lang_hooks.types.unsigned_type) (type);
4085       return fold (convert (type,
4086                             build (RSHIFT_EXPR, t,
4087                                    make_tree (t, XEXP (x, 0)),
4088                                    make_tree (type, XEXP (x, 1)))));
4089
4090     case ASHIFTRT:
4091       t = (*lang_hooks.types.signed_type) (type);
4092       return fold (convert (type,
4093                             build (RSHIFT_EXPR, t,
4094                                    make_tree (t, XEXP (x, 0)),
4095                                    make_tree (type, XEXP (x, 1)))));
4096
4097     case DIV:
4098       if (TREE_CODE (type) != REAL_TYPE)
4099         t = (*lang_hooks.types.signed_type) (type);
4100       else
4101         t = type;
4102
4103       return fold (convert (type,
4104                             build (TRUNC_DIV_EXPR, t,
4105                                    make_tree (t, XEXP (x, 0)),
4106                                    make_tree (t, XEXP (x, 1)))));
4107     case UDIV:
4108       t = (*lang_hooks.types.unsigned_type) (type);
4109       return fold (convert (type,
4110                             build (TRUNC_DIV_EXPR, t,
4111                                    make_tree (t, XEXP (x, 0)),
4112                                    make_tree (t, XEXP (x, 1)))));
4113
4114     case SIGN_EXTEND:
4115     case ZERO_EXTEND:
4116       t = (*lang_hooks.types.type_for_mode) (GET_MODE (XEXP (x, 0)),
4117                                              GET_CODE (x) == ZERO_EXTEND);
4118       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4119
4120    default:
4121       t = make_node (RTL_EXPR);
4122       TREE_TYPE (t) = type;
4123
4124       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4125          ptr_mode.  So convert.  */
4126       if (POINTER_TYPE_P (type))
4127         x = convert_memory_address (TYPE_MODE (type), x);
4128
4129       RTL_EXPR_RTL (t) = x;
4130       /* There are no insns to be output
4131          when this rtl_expr is used.  */
4132       RTL_EXPR_SEQUENCE (t) = 0;
4133       return t;
4134     }
4135 }
4136
4137 /* Check whether the multiplication X * MULT + ADD overflows.
4138    X, MULT and ADD must be CONST_*.
4139    MODE is the machine mode for the computation.
4140    X and MULT must have mode MODE.  ADD may have a different mode.
4141    So can X (defaults to same as MODE).
4142    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4143
4144 bool
4145 const_mult_add_overflow_p (rtx x, rtx mult, rtx add, enum machine_mode mode, int unsignedp)
4146 {
4147   tree type, mult_type, add_type, result;
4148
4149   type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4150
4151   /* In order to get a proper overflow indication from an unsigned
4152      type, we have to pretend that it's a sizetype.  */
4153   mult_type = type;
4154   if (unsignedp)
4155     {
4156       mult_type = copy_node (type);
4157       TYPE_IS_SIZETYPE (mult_type) = 1;
4158     }
4159
4160   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4161               : (*lang_hooks.types.type_for_mode) (GET_MODE (add), unsignedp));
4162
4163   result = fold (build (PLUS_EXPR, mult_type,
4164                         fold (build (MULT_EXPR, mult_type,
4165                                      make_tree (mult_type, x),
4166                                      make_tree (mult_type, mult))),
4167                         make_tree (add_type, add)));
4168
4169   return TREE_CONSTANT_OVERFLOW (result);
4170 }
4171
4172 /* Return an rtx representing the value of X * MULT + ADD.
4173    TARGET is a suggestion for where to store the result (an rtx).
4174    MODE is the machine mode for the computation.
4175    X and MULT must have mode MODE.  ADD may have a different mode.
4176    So can X (defaults to same as MODE).
4177    UNSIGNEDP is nonzero to do unsigned multiplication.
4178    This may emit insns.  */
4179
4180 rtx
4181 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4182                  int unsignedp)
4183 {
4184   tree type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4185   tree add_type = (GET_MODE (add) == VOIDmode
4186                    ? type: (*lang_hooks.types.type_for_mode) (GET_MODE (add),
4187                                                               unsignedp));
4188   tree result =  fold (build (PLUS_EXPR, type,
4189                               fold (build (MULT_EXPR, type,
4190                                            make_tree (type, x),
4191                                            make_tree (type, mult))),
4192                               make_tree (add_type, add)));
4193
4194   return expand_expr (result, target, VOIDmode, 0);
4195 }
4196 \f
4197 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4198    and returning TARGET.
4199
4200    If TARGET is 0, a pseudo-register or constant is returned.  */
4201
4202 rtx
4203 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4204 {
4205   rtx tem = 0;
4206
4207   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4208     tem = simplify_binary_operation (AND, mode, op0, op1);
4209   if (tem == 0)
4210     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4211
4212   if (target == 0)
4213     target = tem;
4214   else if (tem != target)
4215     emit_move_insn (target, tem);
4216   return target;
4217 }
4218 \f
4219 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4220    and storing in TARGET.  Normally return TARGET.
4221    Return 0 if that cannot be done.
4222
4223    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4224    it is VOIDmode, they cannot both be CONST_INT.
4225
4226    UNSIGNEDP is for the case where we have to widen the operands
4227    to perform the operation.  It says to use zero-extension.
4228
4229    NORMALIZEP is 1 if we should convert the result to be either zero
4230    or one.  Normalize is -1 if we should convert the result to be
4231    either zero or -1.  If NORMALIZEP is zero, the result will be left
4232    "raw" out of the scc insn.  */
4233
4234 rtx
4235 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4236                  enum machine_mode mode, int unsignedp, int normalizep)
4237 {
4238   rtx subtarget;
4239   enum insn_code icode;
4240   enum machine_mode compare_mode;
4241   enum machine_mode target_mode = GET_MODE (target);
4242   rtx tem;
4243   rtx last = get_last_insn ();
4244   rtx pattern, comparison;
4245
4246   /* ??? Ok to do this and then fail? */
4247   op0 = protect_from_queue (op0, 0);
4248   op1 = protect_from_queue (op1, 0);
4249
4250   if (unsignedp)
4251     code = unsigned_condition (code);
4252
4253   /* If one operand is constant, make it the second one.  Only do this
4254      if the other operand is not constant as well.  */
4255
4256   if (swap_commutative_operands_p (op0, op1))
4257     {
4258       tem = op0;
4259       op0 = op1;
4260       op1 = tem;
4261       code = swap_condition (code);
4262     }
4263
4264   if (mode == VOIDmode)
4265     mode = GET_MODE (op0);
4266
4267   /* For some comparisons with 1 and -1, we can convert this to
4268      comparisons with zero.  This will often produce more opportunities for
4269      store-flag insns.  */
4270
4271   switch (code)
4272     {
4273     case LT:
4274       if (op1 == const1_rtx)
4275         op1 = const0_rtx, code = LE;
4276       break;
4277     case LE:
4278       if (op1 == constm1_rtx)
4279         op1 = const0_rtx, code = LT;
4280       break;
4281     case GE:
4282       if (op1 == const1_rtx)
4283         op1 = const0_rtx, code = GT;
4284       break;
4285     case GT:
4286       if (op1 == constm1_rtx)
4287         op1 = const0_rtx, code = GE;
4288       break;
4289     case GEU:
4290       if (op1 == const1_rtx)
4291         op1 = const0_rtx, code = NE;
4292       break;
4293     case LTU:
4294       if (op1 == const1_rtx)
4295         op1 = const0_rtx, code = EQ;
4296       break;
4297     default:
4298       break;
4299     }
4300
4301   /* If we are comparing a double-word integer with zero, we can convert
4302      the comparison into one involving a single word.  */
4303   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4304       && GET_MODE_CLASS (mode) == MODE_INT
4305       && op1 == const0_rtx
4306       && (GET_CODE (op0) != MEM || ! MEM_VOLATILE_P (op0)))
4307     {
4308       if (code == EQ || code == NE)
4309         {
4310           rtx op00, op01, op0both;
4311
4312           /* Do a logical OR of the two words and compare the result.  */
4313           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
4314           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
4315           op0both = expand_binop (word_mode, ior_optab, op00, op01,
4316                                   NULL_RTX, unsignedp, OPTAB_DIRECT);
4317           if (op0both != 0)
4318             return emit_store_flag (target, code, op0both, op1, word_mode,
4319                                     unsignedp, normalizep);
4320         }
4321       else if (code == LT || code == GE)
4322         {
4323           rtx op0h;
4324
4325           /* If testing the sign bit, can just test on high word.  */
4326           op0h = simplify_gen_subreg (word_mode, op0, mode,
4327                                       subreg_highpart_offset (word_mode, mode));
4328           return emit_store_flag (target, code, op0h, op1, word_mode,
4329                                   unsignedp, normalizep);
4330         }
4331     }
4332
4333   /* From now on, we won't change CODE, so set ICODE now.  */
4334   icode = setcc_gen_code[(int) code];
4335
4336   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4337      complement of A (for GE) and shifting the sign bit to the low bit.  */
4338   if (op1 == const0_rtx && (code == LT || code == GE)
4339       && GET_MODE_CLASS (mode) == MODE_INT
4340       && (normalizep || STORE_FLAG_VALUE == 1
4341           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4342               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4343                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4344     {
4345       subtarget = target;
4346
4347       /* If the result is to be wider than OP0, it is best to convert it
4348          first.  If it is to be narrower, it is *incorrect* to convert it
4349          first.  */
4350       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4351         {
4352           op0 = protect_from_queue (op0, 0);
4353           op0 = convert_modes (target_mode, mode, op0, 0);
4354           mode = target_mode;
4355         }
4356
4357       if (target_mode != mode)
4358         subtarget = 0;
4359
4360       if (code == GE)
4361         op0 = expand_unop (mode, one_cmpl_optab, op0,
4362                            ((STORE_FLAG_VALUE == 1 || normalizep)
4363                             ? 0 : subtarget), 0);
4364
4365       if (STORE_FLAG_VALUE == 1 || normalizep)
4366         /* If we are supposed to produce a 0/1 value, we want to do
4367            a logical shift from the sign bit to the low-order bit; for
4368            a -1/0 value, we do an arithmetic shift.  */
4369         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4370                             size_int (GET_MODE_BITSIZE (mode) - 1),
4371                             subtarget, normalizep != -1);
4372
4373       if (mode != target_mode)
4374         op0 = convert_modes (target_mode, mode, op0, 0);
4375
4376       return op0;
4377     }
4378
4379   if (icode != CODE_FOR_nothing)
4380     {
4381       insn_operand_predicate_fn pred;
4382
4383       /* We think we may be able to do this with a scc insn.  Emit the
4384          comparison and then the scc insn.
4385
4386          compare_from_rtx may call emit_queue, which would be deleted below
4387          if the scc insn fails.  So call it ourselves before setting LAST.
4388          Likewise for do_pending_stack_adjust.  */
4389
4390       emit_queue ();
4391       do_pending_stack_adjust ();
4392       last = get_last_insn ();
4393
4394       comparison
4395         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4396       if (GET_CODE (comparison) == CONST_INT)
4397         return (comparison == const0_rtx ? const0_rtx
4398                 : normalizep == 1 ? const1_rtx
4399                 : normalizep == -1 ? constm1_rtx
4400                 : const_true_rtx);
4401
4402       /* The code of COMPARISON may not match CODE if compare_from_rtx
4403          decided to swap its operands and reverse the original code.
4404
4405          We know that compare_from_rtx returns either a CONST_INT or
4406          a new comparison code, so it is safe to just extract the
4407          code from COMPARISON.  */
4408       code = GET_CODE (comparison);
4409
4410       /* Get a reference to the target in the proper mode for this insn.  */
4411       compare_mode = insn_data[(int) icode].operand[0].mode;
4412       subtarget = target;
4413       pred = insn_data[(int) icode].operand[0].predicate;
4414       if (preserve_subexpressions_p ()
4415           || ! (*pred) (subtarget, compare_mode))
4416         subtarget = gen_reg_rtx (compare_mode);
4417
4418       pattern = GEN_FCN (icode) (subtarget);
4419       if (pattern)
4420         {
4421           emit_insn (pattern);
4422
4423           /* If we are converting to a wider mode, first convert to
4424              TARGET_MODE, then normalize.  This produces better combining
4425              opportunities on machines that have a SIGN_EXTRACT when we are
4426              testing a single bit.  This mostly benefits the 68k.
4427
4428              If STORE_FLAG_VALUE does not have the sign bit set when
4429              interpreted in COMPARE_MODE, we can do this conversion as
4430              unsigned, which is usually more efficient.  */
4431           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4432             {
4433               convert_move (target, subtarget,
4434                             (GET_MODE_BITSIZE (compare_mode)
4435                              <= HOST_BITS_PER_WIDE_INT)
4436                             && 0 == (STORE_FLAG_VALUE
4437                                      & ((HOST_WIDE_INT) 1
4438                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4439               op0 = target;
4440               compare_mode = target_mode;
4441             }
4442           else
4443             op0 = subtarget;
4444
4445           /* If we want to keep subexpressions around, don't reuse our
4446              last target.  */
4447
4448           if (preserve_subexpressions_p ())
4449             subtarget = 0;
4450
4451           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4452              we don't have to do anything.  */
4453           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4454             ;
4455           /* STORE_FLAG_VALUE might be the most negative number, so write
4456              the comparison this way to avoid a compiler-time warning.  */
4457           else if (- normalizep == STORE_FLAG_VALUE)
4458             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4459
4460           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4461              makes it hard to use a value of just the sign bit due to
4462              ANSI integer constant typing rules.  */
4463           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4464                    && (STORE_FLAG_VALUE
4465                        & ((HOST_WIDE_INT) 1
4466                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4467             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4468                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4469                                 subtarget, normalizep == 1);
4470           else if (STORE_FLAG_VALUE & 1)
4471             {
4472               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4473               if (normalizep == -1)
4474                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4475             }
4476           else
4477             abort ();
4478
4479           /* If we were converting to a smaller mode, do the
4480              conversion now.  */
4481           if (target_mode != compare_mode)
4482             {
4483               convert_move (target, op0, 0);
4484               return target;
4485             }
4486           else
4487             return op0;
4488         }
4489     }
4490
4491   delete_insns_since (last);
4492
4493   /* If expensive optimizations, use different pseudo registers for each
4494      insn, instead of reusing the same pseudo.  This leads to better CSE,
4495      but slows down the compiler, since there are more pseudos */
4496   subtarget = (!flag_expensive_optimizations
4497                && (target_mode == mode)) ? target : NULL_RTX;
4498
4499   /* If we reached here, we can't do this with a scc insn.  However, there
4500      are some comparisons that can be done directly.  For example, if
4501      this is an equality comparison of integers, we can try to exclusive-or
4502      (or subtract) the two operands and use a recursive call to try the
4503      comparison with zero.  Don't do any of these cases if branches are
4504      very cheap.  */
4505
4506   if (BRANCH_COST > 0
4507       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4508       && op1 != const0_rtx)
4509     {
4510       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4511                           OPTAB_WIDEN);
4512
4513       if (tem == 0)
4514         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4515                             OPTAB_WIDEN);
4516       if (tem != 0)
4517         tem = emit_store_flag (target, code, tem, const0_rtx,
4518                                mode, unsignedp, normalizep);
4519       if (tem == 0)
4520         delete_insns_since (last);
4521       return tem;
4522     }
4523
4524   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4525      the constant zero.  Reject all other comparisons at this point.  Only
4526      do LE and GT if branches are expensive since they are expensive on
4527      2-operand machines.  */
4528
4529   if (BRANCH_COST == 0
4530       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4531       || (code != EQ && code != NE
4532           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4533     return 0;
4534
4535   /* See what we need to return.  We can only return a 1, -1, or the
4536      sign bit.  */
4537
4538   if (normalizep == 0)
4539     {
4540       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4541         normalizep = STORE_FLAG_VALUE;
4542
4543       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4544                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4545                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4546         ;
4547       else
4548         return 0;
4549     }
4550
4551   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4552      do the necessary operation below.  */
4553
4554   tem = 0;
4555
4556   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4557      the sign bit set.  */
4558
4559   if (code == LE)
4560     {
4561       /* This is destructive, so SUBTARGET can't be OP0.  */
4562       if (rtx_equal_p (subtarget, op0))
4563         subtarget = 0;
4564
4565       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4566                           OPTAB_WIDEN);
4567       if (tem)
4568         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4569                             OPTAB_WIDEN);
4570     }
4571
4572   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4573      number of bits in the mode of OP0, minus one.  */
4574
4575   if (code == GT)
4576     {
4577       if (rtx_equal_p (subtarget, op0))
4578         subtarget = 0;
4579
4580       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4581                           size_int (GET_MODE_BITSIZE (mode) - 1),
4582                           subtarget, 0);
4583       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4584                           OPTAB_WIDEN);
4585     }
4586
4587   if (code == EQ || code == NE)
4588     {
4589       /* For EQ or NE, one way to do the comparison is to apply an operation
4590          that converts the operand into a positive number if it is nonzero
4591          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4592          for NE we negate.  This puts the result in the sign bit.  Then we
4593          normalize with a shift, if needed.
4594
4595          Two operations that can do the above actions are ABS and FFS, so try
4596          them.  If that doesn't work, and MODE is smaller than a full word,
4597          we can use zero-extension to the wider mode (an unsigned conversion)
4598          as the operation.  */
4599
4600       /* Note that ABS doesn't yield a positive number for INT_MIN, but
4601          that is compensated by the subsequent overflow when subtracting
4602          one / negating.  */
4603
4604       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4605         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4606       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4607         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4608       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4609         {
4610           op0 = protect_from_queue (op0, 0);
4611           tem = convert_modes (word_mode, mode, op0, 1);
4612           mode = word_mode;
4613         }
4614
4615       if (tem != 0)
4616         {
4617           if (code == EQ)
4618             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4619                                 0, OPTAB_WIDEN);
4620           else
4621             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4622         }
4623
4624       /* If we couldn't do it that way, for NE we can "or" the two's complement
4625          of the value with itself.  For EQ, we take the one's complement of
4626          that "or", which is an extra insn, so we only handle EQ if branches
4627          are expensive.  */
4628
4629       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4630         {
4631           if (rtx_equal_p (subtarget, op0))
4632             subtarget = 0;
4633
4634           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4635           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4636                               OPTAB_WIDEN);
4637
4638           if (tem && code == EQ)
4639             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4640         }
4641     }
4642
4643   if (tem && normalizep)
4644     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4645                         size_int (GET_MODE_BITSIZE (mode) - 1),
4646                         subtarget, normalizep == 1);
4647
4648   if (tem)
4649     {
4650       if (GET_MODE (tem) != target_mode)
4651         {
4652           convert_move (target, tem, 0);
4653           tem = target;
4654         }
4655       else if (!subtarget)
4656         {
4657           emit_move_insn (target, tem);
4658           tem = target;
4659         }
4660     }
4661   else
4662     delete_insns_since (last);
4663
4664   return tem;
4665 }
4666
4667 /* Like emit_store_flag, but always succeeds.  */
4668
4669 rtx
4670 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
4671                        enum machine_mode mode, int unsignedp, int normalizep)
4672 {
4673   rtx tem, label;
4674
4675   /* First see if emit_store_flag can do the job.  */
4676   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4677   if (tem != 0)
4678     return tem;
4679
4680   if (normalizep == 0)
4681     normalizep = 1;
4682
4683   /* If this failed, we have to do this with set/compare/jump/set code.  */
4684
4685   if (GET_CODE (target) != REG
4686       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4687     target = gen_reg_rtx (GET_MODE (target));
4688
4689   emit_move_insn (target, const1_rtx);
4690   label = gen_label_rtx ();
4691   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
4692                            NULL_RTX, label);
4693
4694   emit_move_insn (target, const0_rtx);
4695   emit_label (label);
4696
4697   return target;
4698 }
4699 \f
4700 /* Perform possibly multi-word comparison and conditional jump to LABEL
4701    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4702
4703    The algorithm is based on the code in expr.c:do_jump.
4704
4705    Note that this does not perform a general comparison.  Only variants
4706    generated within expmed.c are correctly handled, others abort (but could
4707    be handled if needed).  */
4708
4709 static void
4710 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
4711                  rtx label)
4712 {
4713   /* If this mode is an integer too wide to compare properly,
4714      compare word by word.  Rely on cse to optimize constant cases.  */
4715
4716   if (GET_MODE_CLASS (mode) == MODE_INT
4717       && ! can_compare_p (op, mode, ccp_jump))
4718     {
4719       rtx label2 = gen_label_rtx ();
4720
4721       switch (op)
4722         {
4723         case LTU:
4724           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4725           break;
4726
4727         case LEU:
4728           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4729           break;
4730
4731         case LT:
4732           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4733           break;
4734
4735         case GT:
4736           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4737           break;
4738
4739         case GE:
4740           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4741           break;
4742
4743           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
4744              that's the only equality operations we do */
4745         case EQ:
4746           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4747             abort ();
4748           do_jump_by_parts_equality_rtx (arg1, label2, label);
4749           break;
4750
4751         case NE:
4752           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4753             abort ();
4754           do_jump_by_parts_equality_rtx (arg1, label, label2);
4755           break;
4756
4757         default:
4758           abort ();
4759         }
4760
4761       emit_label (label2);
4762     }
4763   else
4764     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
4765 }