gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "toplev.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "tm_p.h"
  30 #include "flags.h"
  31 #include "insn-config.h"
  32 #include "expr.h"
  33 #include "optabs.h"
  34 #include "real.h"
  35 #include "recog.h"
  36 #include "langhooks.h"
  37
  38 static void store_fixed_bit_field       PARAMS ((rtx, unsigned HOST_WIDE_INT,
  39                                                  unsigned HOST_WIDE_INT,
  40                                                  unsigned HOST_WIDE_INT, rtx));
  41 static void store_split_bit_field       PARAMS ((rtx, unsigned HOST_WIDE_INT,
  42                                                  unsigned HOST_WIDE_INT, rtx));
  43 static rtx extract_fixed_bit_field      PARAMS ((enum machine_mode, rtx,
  44                                                  unsigned HOST_WIDE_INT,
  45                                                  unsigned HOST_WIDE_INT,
  46                                                  unsigned HOST_WIDE_INT,
  47                                                  rtx, int));
  48 static rtx mask_rtx                     PARAMS ((enum machine_mode, int,
  49                                                  int, int));
  50 static rtx lshift_value                 PARAMS ((enum machine_mode, rtx,
  51                                                  int, int));
  52 static rtx extract_split_bit_field      PARAMS ((rtx, unsigned HOST_WIDE_INT,
  53                                                  unsigned HOST_WIDE_INT, int));
  54 static void do_cmp_and_jump             PARAMS ((rtx, rtx, enum rtx_code,
  55                                                  enum machine_mode, rtx));
  56
  57 /* Nonzero means divides or modulus operations are relatively cheap for
  58    powers of two, so don't use branches; emit the operation instead.
  59    Usually, this will mean that the MD file will emit non-branch
  60    sequences.  */
  61
  62 static int sdiv_pow2_cheap, smod_pow2_cheap;
  63
  64 #ifndef SLOW_UNALIGNED_ACCESS
  65 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  66 #endif
  67
  68 /* For compilers that support multiple targets with different word sizes,
  69    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  70    is the H8/300(H) compiler.  */
  71
  72 #ifndef MAX_BITS_PER_WORD
  73 #define MAX_BITS_PER_WORD BITS_PER_WORD
  74 #endif
  75
  76 /* Reduce conditional compilation elsewhere.  */
  77 #ifndef HAVE_insv
  78 #define HAVE_insv       0
  79 #define CODE_FOR_insv   CODE_FOR_nothing
  80 #define gen_insv(a,b,c,d) NULL_RTX
  81 #endif
  82 #ifndef HAVE_extv
  83 #define HAVE_extv       0
  84 #define CODE_FOR_extv   CODE_FOR_nothing
  85 #define gen_extv(a,b,c,d) NULL_RTX
  86 #endif
  87 #ifndef HAVE_extzv
  88 #define HAVE_extzv      0
  89 #define CODE_FOR_extzv  CODE_FOR_nothing
  90 #define gen_extzv(a,b,c,d) NULL_RTX
  91 #endif
  92
  93 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  94    shift count and some by mode.  */
  95 static int add_cost, negate_cost, zero_cost;
  96 static int shift_cost[MAX_BITS_PER_WORD];
  97 static int shiftadd_cost[MAX_BITS_PER_WORD];
  98 static int shiftsub_cost[MAX_BITS_PER_WORD];
  99 static int mul_cost[NUM_MACHINE_MODES];
 100 static int div_cost[NUM_MACHINE_MODES];
 101 static int mul_widen_cost[NUM_MACHINE_MODES];
 102 static int mul_highpart_cost[NUM_MACHINE_MODES];
 103
 104 void
 105 init_expmed ()
 106 {
 107   rtx reg, shift_insn, shiftadd_insn, shiftsub_insn;
 108   int dummy;
 109   int m;
 110   enum machine_mode mode, wider_mode;
 111
 112   start_sequence ();
 113
 114   /* This is "some random pseudo register" for purposes of calling recog
 115      to see what insns exist.  */
 116   reg = gen_rtx_REG (word_mode, 10000);
 117
 118   zero_cost = rtx_cost (const0_rtx, 0);
 119   add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
 120
 121   shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
 122                                        gen_rtx_ASHIFT (word_mode, reg,
 123                                                        const0_rtx)));
 124
 125   shiftadd_insn
 126     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 127                               gen_rtx_PLUS (word_mode,
 128                                             gen_rtx_MULT (word_mode,
 129                                                           reg, const0_rtx),
 130                                             reg)));
 131
 132   shiftsub_insn
 133     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 134                               gen_rtx_MINUS (word_mode,
 135                                              gen_rtx_MULT (word_mode,
 136                                                            reg, const0_rtx),
 137                                              reg)));
 138
 139   init_recog ();
 140
 141   shift_cost[0] = 0;
 142   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 143
 144   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 145     {
 146       rtx c_int = GEN_INT ((HOST_WIDE_INT) 1 << m);
 147       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 148
 149       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 150       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 151         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 152
 153       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) = c_int;
 154       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 155         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 156
 157       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) = c_int;
 158       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 159         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 160     }
 161
 162   negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
 163
 164   sdiv_pow2_cheap
 165     = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
 166        <= 2 * add_cost);
 167   smod_pow2_cheap
 168     = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
 169        <= 2 * add_cost);
 170
 171   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 172        mode != VOIDmode;
 173        mode = GET_MODE_WIDER_MODE (mode))
 174     {
 175       reg = gen_rtx_REG (mode, 10000);
 176       div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
 177       mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
 178       wider_mode = GET_MODE_WIDER_MODE (mode);
 179       if (wider_mode != VOIDmode)
 180         {
 181           mul_widen_cost[(int) wider_mode]
 182             = rtx_cost (gen_rtx_MULT (wider_mode,
 183                                       gen_rtx_ZERO_EXTEND (wider_mode, reg),
 184                                       gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 185                         SET);
 186           mul_highpart_cost[(int) mode]
 187             = rtx_cost (gen_rtx_TRUNCATE
 188                         (mode,
 189                          gen_rtx_LSHIFTRT (wider_mode,
 190                                            gen_rtx_MULT (wider_mode,
 191                                                          gen_rtx_ZERO_EXTEND
 192                                                          (wider_mode, reg),
 193                                                          gen_rtx_ZERO_EXTEND
 194                                                          (wider_mode, reg)),
 195                                            GEN_INT (GET_MODE_BITSIZE (mode)))),
 196                         SET);
 197         }
 198     }
 199
 200   end_sequence ();
 201 }
 202
 203 /* Return an rtx representing minus the value of X.
 204    MODE is the intended mode of the result,
 205    useful if X is a CONST_INT.  */
 206
 207 rtx
 208 negate_rtx (mode, x)
 209      enum machine_mode mode;
 210      rtx x;
 211 {
 212   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 213
 214   if (result == 0)
 215     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 216
 217   return result;
 218 }
 219
 220 /* Report on the availability of insv/extv/extzv and the desired mode
 221    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 222    is false; else the mode of the specified operand.  If OPNO is -1,
 223    all the caller cares about is whether the insn is available.  */
 224 enum machine_mode
 225 mode_for_extraction (pattern, opno)
 226      enum extraction_pattern pattern;
 227      int opno;
 228 {
 229   const struct insn_data *data;
 230
 231   switch (pattern)
 232     {
 233     case EP_insv:
 234       if (HAVE_insv)
 235         {
 236           data = &insn_data[CODE_FOR_insv];
 237           break;
 238         }
 239       return MAX_MACHINE_MODE;
 240
 241     case EP_extv:
 242       if (HAVE_extv)
 243         {
 244           data = &insn_data[CODE_FOR_extv];
 245           break;
 246         }
 247       return MAX_MACHINE_MODE;
 248
 249     case EP_extzv:
 250       if (HAVE_extzv)
 251         {
 252           data = &insn_data[CODE_FOR_extzv];
 253           break;
 254         }
 255       return MAX_MACHINE_MODE;
 256
 257     default:
 258       abort ();
 259     }
 260
 261   if (opno == -1)
 262     return VOIDmode;
 263
 264   /* Everyone who uses this function used to follow it with
 265      if (result == VOIDmode) result = word_mode; */
 266   if (data->operand[opno].mode == VOIDmode)
 267     return word_mode;
 268   return data->operand[opno].mode;
 269 }
 270
 271 \f
 272 /* Generate code to store value from rtx VALUE
 273    into a bit-field within structure STR_RTX
 274    containing BITSIZE bits starting at bit BITNUM.
 275    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 276    ALIGN is the alignment that STR_RTX is known to have.
 277    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 278
 279 /* ??? Note that there are two different ideas here for how
 280    to determine the size to count bits within, for a register.
 281    One is BITS_PER_WORD, and the other is the size of operand 3
 282    of the insv pattern.
 283
 284    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 285    else, we use the mode of operand 3.  */
 286
 287 rtx
 288 store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size)
 289      rtx str_rtx;
 290      unsigned HOST_WIDE_INT bitsize;
 291      unsigned HOST_WIDE_INT bitnum;
 292      enum machine_mode fieldmode;
 293      rtx value;
 294      HOST_WIDE_INT total_size;
 295 {
 296   unsigned int unit
 297     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 298   unsigned HOST_WIDE_INT offset = bitnum / unit;
 299   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 300   rtx op0 = str_rtx;
 301   int byte_offset;
 302
 303   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 304
 305   /* Discount the part of the structure before the desired byte.
 306      We need to know how many bytes are safe to reference after it.  */
 307   if (total_size >= 0)
 308     total_size -= (bitpos / BIGGEST_ALIGNMENT
 309                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 310
 311   while (GET_CODE (op0) == SUBREG)
 312     {
 313       /* The following line once was done only if WORDS_BIG_ENDIAN,
 314          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 315          meaningful at a much higher level; when structures are copied
 316          between memory and regs, the higher-numbered regs
 317          always get higher addresses.  */
 318       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 319       /* We used to adjust BITPOS here, but now we do the whole adjustment
 320          right after the loop.  */
 321       op0 = SUBREG_REG (op0);
 322     }
 323
 324   value = protect_from_queue (value, 0);
 325
 326   if (flag_force_mem)
 327     {
 328       int old_generating_concat_p = generating_concat_p;
 329       generating_concat_p = 0;
 330       value = force_not_mem (value);
 331       generating_concat_p = old_generating_concat_p;
 332     }
 333
 334   /* If the target is a register, overwriting the entire object, or storing
 335      a full-word or multi-word field can be done with just a SUBREG.
 336
 337      If the target is memory, storing any naturally aligned field can be
 338      done with a simple store.  For targets that support fast unaligned
 339      memory, any naturally sized, unit aligned field can be done directly.  */
 340
 341   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 342                 + (offset * UNITS_PER_WORD);
 343
 344   if (bitpos == 0
 345       && bitsize == GET_MODE_BITSIZE (fieldmode)
 346       && (GET_CODE (op0) != MEM
 347           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 348              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 349              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 350           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 351              || (offset * BITS_PER_UNIT % bitsize == 0
 352                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 353     {
 354       if (GET_MODE (op0) != fieldmode)
 355         {
 356           if (GET_CODE (op0) == SUBREG)
 357             {
 358               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 359                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 360                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 361                 op0 = SUBREG_REG (op0);
 362               else
 363                 /* Else we've got some float mode source being extracted into
 364                    a different float mode destination -- this combination of
 365                    subregs results in Severe Tire Damage.  */
 366                 abort ();
 367             }
 368           if (GET_CODE (op0) == REG)
 369             op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 370           else
 371             op0 = adjust_address (op0, fieldmode, offset);
 372         }
 373       emit_move_insn (op0, value);
 374       return value;
 375     }
 376
 377   /* Make sure we are playing with integral modes.  Pun with subregs
 378      if we aren't.  This must come after the entire register case above,
 379      since that case is valid for any mode.  The following cases are only
 380      valid for integral modes.  */
 381   {
 382     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 383     if (imode != GET_MODE (op0))
 384       {
 385         if (GET_CODE (op0) == MEM)
 386           op0 = adjust_address (op0, imode, 0);
 387         else if (imode != BLKmode)
 388           op0 = gen_lowpart (imode, op0);
 389         else
 390           abort ();
 391       }
 392   }
 393
 394   /* We may be accessing data outside the field, which means
 395      we can alias adjacent data.  */
 396   if (GET_CODE (op0) == MEM)
 397     {
 398       op0 = shallow_copy_rtx (op0);
 399       set_mem_alias_set (op0, 0);
 400       set_mem_expr (op0, 0);
 401     }
 402
 403   /* If OP0 is a register, BITPOS must count within a word.
 404      But as we have it, it counts within whatever size OP0 now has.
 405      On a bigendian machine, these are not the same, so convert.  */
 406   if (BYTES_BIG_ENDIAN
 407       && !FUNCTION_ARG_REG_LITTLE_ENDIAN
 408       && GET_CODE (op0) != MEM
 409       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 410     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 411
 412   /* Storing an lsb-aligned field in a register
 413      can be done with a movestrict instruction.  */
 414
 415   if (GET_CODE (op0) != MEM
 416       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 417       && bitsize == GET_MODE_BITSIZE (fieldmode)
 418       && (movstrict_optab->handlers[(int) fieldmode].insn_code
 419           != CODE_FOR_nothing))
 420     {
 421       int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 422
 423       /* Get appropriate low part of the value being stored.  */
 424       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 425         value = gen_lowpart (fieldmode, value);
 426       else if (!(GET_CODE (value) == SYMBOL_REF
 427                  || GET_CODE (value) == LABEL_REF
 428                  || GET_CODE (value) == CONST))
 429         value = convert_to_mode (fieldmode, value, 0);
 430
 431       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 432         value = copy_to_mode_reg (fieldmode, value);
 433
 434       if (GET_CODE (op0) == SUBREG)
 435         {
 436           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 437               || GET_MODE_CLASS (fieldmode) == MODE_INT
 438               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 439             op0 = SUBREG_REG (op0);
 440           else
 441             /* Else we've got some float mode source being extracted into
 442                a different float mode destination -- this combination of
 443                subregs results in Severe Tire Damage.  */
 444             abort ();
 445         }
 446
 447       emit_insn (GEN_FCN (icode)
 448                  (gen_rtx_SUBREG (fieldmode, op0,
 449                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 450                                   + (offset * UNITS_PER_WORD)),
 451                                   value));
 452
 453       return value;
 454     }
 455
 456   /* Handle fields bigger than a word.  */
 457
 458   if (bitsize > BITS_PER_WORD)
 459     {
 460       /* Here we transfer the words of the field
 461          in the order least significant first.
 462          This is because the most significant word is the one which may
 463          be less than full.
 464          However, only do that if the value is not BLKmode.  */
 465
 466       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 467       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 468       unsigned int i;
 469
 470       /* This is the mode we must force value to, so that there will be enough
 471          subwords to extract.  Note that fieldmode will often (always?) be
 472          VOIDmode, because that is what store_field uses to indicate that this
 473          is a bit field, but passing VOIDmode to operand_subword_force will
 474          result in an abort.  */
 475       fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 476
 477       for (i = 0; i < nwords; i++)
 478         {
 479           /* If I is 0, use the low-order word in both field and target;
 480              if I is 1, use the next to lowest word; and so on.  */
 481           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 482           unsigned int bit_offset = (backwards
 483                                      ? MAX ((int) bitsize - ((int) i + 1)
 484                                             * BITS_PER_WORD,
 485                                             0)
 486                                      : (int) i * BITS_PER_WORD);
 487
 488           store_bit_field (op0, MIN (BITS_PER_WORD,
 489                                      bitsize - i * BITS_PER_WORD),
 490                            bitnum + bit_offset, word_mode,
 491                            operand_subword_force (value, wordnum,
 492                                                   (GET_MODE (value) == VOIDmode
 493                                                    ? fieldmode
 494                                                    : GET_MODE (value))),
 495                            total_size);
 496         }
 497       return value;
 498     }
 499
 500   /* From here on we can assume that the field to be stored in is
 501      a full-word (whatever type that is), since it is shorter than a word.  */
 502
 503   /* OFFSET is the number of words or bytes (UNIT says which)
 504      from STR_RTX to the first word or byte containing part of the field.  */
 505
 506   if (GET_CODE (op0) != MEM)
 507     {
 508       if (offset != 0
 509           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 510         {
 511           if (GET_CODE (op0) != REG)
 512             {
 513               /* Since this is a destination (lvalue), we can't copy it to a
 514                  pseudo.  We can trivially remove a SUBREG that does not
 515                  change the size of the operand.  Such a SUBREG may have been
 516                  added above.  Otherwise, abort.  */
 517               if (GET_CODE (op0) == SUBREG
 518                   && (GET_MODE_SIZE (GET_MODE (op0))
 519                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 520                 op0 = SUBREG_REG (op0);
 521               else
 522                 abort ();
 523             }
 524           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 525                                 op0, (offset * UNITS_PER_WORD));
 526         }
 527       offset = 0;
 528     }
 529   else
 530     op0 = protect_from_queue (op0, 1);
 531
 532   /* If VALUE is a floating-point mode, access it as an integer of the
 533      corresponding size.  This can occur on a machine with 64 bit registers
 534      that uses SFmode for float.  This can also occur for unaligned float
 535      structure fields.  */
 536   if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 537       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 538     value = gen_lowpart ((GET_MODE (value) == VOIDmode
 539                           ? word_mode : int_mode_for_mode (GET_MODE (value))),
 540                          value);
 541
 542   /* Now OFFSET is nonzero only if OP0 is memory
 543      and is therefore always measured in bytes.  */
 544
 545   if (HAVE_insv
 546       && GET_MODE (value) != BLKmode
 547       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 548       /* Ensure insv's size is wide enough for this field.  */
 549       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 550       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 551             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 552     {
 553       int xbitpos = bitpos;
 554       rtx value1;
 555       rtx xop0 = op0;
 556       rtx last = get_last_insn ();
 557       rtx pat;
 558       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 559       int save_volatile_ok = volatile_ok;
 560
 561       volatile_ok = 1;
 562
 563       /* If this machine's insv can only insert into a register, copy OP0
 564          into a register and save it back later.  */
 565       /* This used to check flag_force_mem, but that was a serious
 566          de-optimization now that flag_force_mem is enabled by -O2.  */
 567       if (GET_CODE (op0) == MEM
 568           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 569                 (op0, VOIDmode)))
 570         {
 571           rtx tempreg;
 572           enum machine_mode bestmode;
 573
 574           /* Get the mode to use for inserting into this field.  If OP0 is
 575              BLKmode, get the smallest mode consistent with the alignment. If
 576              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 577              mode. Otherwise, use the smallest mode containing the field.  */
 578
 579           if (GET_MODE (op0) == BLKmode
 580               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 581             bestmode
 582               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 583                                MEM_VOLATILE_P (op0));
 584           else
 585             bestmode = GET_MODE (op0);
 586
 587           if (bestmode == VOIDmode
 588               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 589                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 590             goto insv_loses;
 591
 592           /* Adjust address to point to the containing unit of that mode.
 593              Compute offset as multiple of this unit, counting in bytes.  */
 594           unit = GET_MODE_BITSIZE (bestmode);
 595           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 596           bitpos = bitnum % unit;
 597           op0 = adjust_address (op0, bestmode,  offset);
 598
 599           /* Fetch that unit, store the bitfield in it, then store
 600              the unit.  */
 601           tempreg = copy_to_reg (op0);
 602           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 603                            total_size);
 604           emit_move_insn (op0, tempreg);
 605           return value;
 606         }
 607       volatile_ok = save_volatile_ok;
 608
 609       /* Add OFFSET into OP0's address.  */
 610       if (GET_CODE (xop0) == MEM)
 611         xop0 = adjust_address (xop0, byte_mode, offset);
 612
 613       /* If xop0 is a register, we need it in MAXMODE
 614          to make it acceptable to the format of insv.  */
 615       if (GET_CODE (xop0) == SUBREG)
 616         /* We can't just change the mode, because this might clobber op0,
 617            and we will need the original value of op0 if insv fails.  */
 618         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 619       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 620         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 621
 622       /* On big-endian machines, we count bits from the most significant.
 623          If the bit field insn does not, we must invert.  */
 624
 625       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 626         xbitpos = unit - bitsize - xbitpos;
 627
 628       /* We have been counting XBITPOS within UNIT.
 629          Count instead within the size of the register.  */
 630       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 631         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 632
 633       unit = GET_MODE_BITSIZE (maxmode);
 634
 635       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 636       value1 = value;
 637       if (GET_MODE (value) != maxmode)
 638         {
 639           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 640             {
 641               /* Optimization: Don't bother really extending VALUE
 642                  if it has all the bits we will actually use.  However,
 643                  if we must narrow it, be sure we do it correctly.  */
 644
 645               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 646                 {
 647                   rtx tmp;
 648
 649                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 650                   if (! tmp)
 651                     tmp = simplify_gen_subreg (maxmode,
 652                                                force_reg (GET_MODE (value),
 653                                                           value1),
 654                                                GET_MODE (value), 0);
 655                   value1 = tmp;
 656                 }
 657               else
 658                 value1 = gen_lowpart (maxmode, value1);
 659             }
 660           else if (GET_CODE (value) == CONST_INT)
 661             value1 = gen_int_mode (INTVAL (value), maxmode);
 662           else if (!CONSTANT_P (value))
 663             /* Parse phase is supposed to make VALUE's data type
 664                match that of the component reference, which is a type
 665                at least as wide as the field; so VALUE should have
 666                a mode that corresponds to that type.  */
 667             abort ();
 668         }
 669
 670       /* If this machine's insv insists on a register,
 671          get VALUE1 into a register.  */
 672       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 673              (value1, maxmode)))
 674         value1 = force_reg (maxmode, value1);
 675
 676       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 677       if (pat)
 678         emit_insn (pat);
 679       else
 680         {
 681           delete_insns_since (last);
 682           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 683         }
 684     }
 685   else
 686     insv_loses:
 687     /* Insv is not available; store using shifts and boolean ops.  */
 688     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 689   return value;
 690 }
 691 \f
 692 /* Use shifts and boolean operations to store VALUE
 693    into a bit field of width BITSIZE
 694    in a memory location specified by OP0 except offset by OFFSET bytes.
 695      (OFFSET must be 0 if OP0 is a register.)
 696    The field starts at position BITPOS within the byte.
 697     (If OP0 is a register, it may be a full word or a narrower mode,
 698      but BITPOS still counts within a full word,
 699      which is significant on bigendian machines.)
 700
 701    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 702
 703 static void
 704 store_fixed_bit_field (op0, offset, bitsize, bitpos, value)
 705      rtx op0;
 706      unsigned HOST_WIDE_INT offset, bitsize, bitpos;
 707      rtx value;
 708 {
 709   enum machine_mode mode;
 710   unsigned int total_bits = BITS_PER_WORD;
 711   rtx subtarget, temp;
 712   int all_zero = 0;
 713   int all_one = 0;
 714
 715   /* There is a case not handled here:
 716      a structure with a known alignment of just a halfword
 717      and a field split across two aligned halfwords within the structure.
 718      Or likewise a structure with a known alignment of just a byte
 719      and a field split across two bytes.
 720      Such cases are not supposed to be able to occur.  */
 721
 722   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 723     {
 724       if (offset != 0)
 725         abort ();
 726       /* Special treatment for a bit field split across two registers.  */
 727       if (bitsize + bitpos > BITS_PER_WORD)
 728         {
 729           store_split_bit_field (op0, bitsize, bitpos, value);
 730           return;
 731         }
 732     }
 733   else
 734     {
 735       /* Get the proper mode to use for this field.  We want a mode that
 736          includes the entire field.  If such a mode would be larger than
 737          a word, we won't be doing the extraction the normal way.
 738          We don't want a mode bigger than the destination.  */
 739
 740       mode = GET_MODE (op0);
 741       if (GET_MODE_BITSIZE (mode) == 0
 742           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 743         mode = word_mode;
 744       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 745                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 746
 747       if (mode == VOIDmode)
 748         {
 749           /* The only way this should occur is if the field spans word
 750              boundaries.  */
 751           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 752                                  value);
 753           return;
 754         }
 755
 756       total_bits = GET_MODE_BITSIZE (mode);
 757
 758       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 759          be in the range 0 to total_bits-1, and put any excess bytes in
 760          OFFSET.  */
 761       if (bitpos >= total_bits)
 762         {
 763           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 764           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 765                      * BITS_PER_UNIT);
 766         }
 767
 768       /* Get ref to an aligned byte, halfword, or word containing the field.
 769          Adjust BITPOS to be position within a word,
 770          and OFFSET to be the offset of that word.
 771          Then alter OP0 to refer to that word.  */
 772       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 773       offset -= (offset % (total_bits / BITS_PER_UNIT));
 774       op0 = adjust_address (op0, mode, offset);
 775     }
 776
 777   mode = GET_MODE (op0);
 778
 779   /* Now MODE is either some integral mode for a MEM as OP0,
 780      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 781      The bit field is contained entirely within OP0.
 782      BITPOS is the starting bit number within OP0.
 783      (OP0's mode may actually be narrower than MODE.)  */
 784
 785   if (BYTES_BIG_ENDIAN)
 786       /* BITPOS is the distance between our msb
 787          and that of the containing datum.
 788          Convert it to the distance from the lsb.  */
 789       bitpos = total_bits - bitsize - bitpos;
 790
 791   /* Now BITPOS is always the distance between our lsb
 792      and that of OP0.  */
 793
 794   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 795      we must first convert its mode to MODE.  */
 796
 797   if (GET_CODE (value) == CONST_INT)
 798     {
 799       HOST_WIDE_INT v = INTVAL (value);
 800
 801       if (bitsize < HOST_BITS_PER_WIDE_INT)
 802         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 803
 804       if (v == 0)
 805         all_zero = 1;
 806       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 807                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 808                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 809         all_one = 1;
 810
 811       value = lshift_value (mode, value, bitpos, bitsize);
 812     }
 813   else
 814     {
 815       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 816                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 817
 818       if (GET_MODE (value) != mode)
 819         {
 820           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 821               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 822             value = gen_lowpart (mode, value);
 823           else
 824             value = convert_to_mode (mode, value, 1);
 825         }
 826
 827       if (must_and)
 828         value = expand_binop (mode, and_optab, value,
 829                               mask_rtx (mode, 0, bitsize, 0),
 830                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 831       if (bitpos > 0)
 832         value = expand_shift (LSHIFT_EXPR, mode, value,
 833                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 834     }
 835
 836   /* Now clear the chosen bits in OP0,
 837      except that if VALUE is -1 we need not bother.  */
 838
 839   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 840
 841   if (! all_one)
 842     {
 843       temp = expand_binop (mode, and_optab, op0,
 844                            mask_rtx (mode, bitpos, bitsize, 1),
 845                            subtarget, 1, OPTAB_LIB_WIDEN);
 846       subtarget = temp;
 847     }
 848   else
 849     temp = op0;
 850
 851   /* Now logical-or VALUE into OP0, unless it is zero.  */
 852
 853   if (! all_zero)
 854     temp = expand_binop (mode, ior_optab, temp, value,
 855                          subtarget, 1, OPTAB_LIB_WIDEN);
 856   if (op0 != temp)
 857     emit_move_insn (op0, temp);
 858 }
 859 \f
 860 /* Store a bit field that is split across multiple accessible memory objects.
 861
 862    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 863    BITSIZE is the field width; BITPOS the position of its first bit
 864    (within the word).
 865    VALUE is the value to store.
 866
 867    This does not yet handle fields wider than BITS_PER_WORD.  */
 868
 869 static void
 870 store_split_bit_field (op0, bitsize, bitpos, value)
 871      rtx op0;
 872      unsigned HOST_WIDE_INT bitsize, bitpos;
 873      rtx value;
 874 {
 875   unsigned int unit;
 876   unsigned int bitsdone = 0;
 877
 878   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 879      much at a time.  */
 880   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 881     unit = BITS_PER_WORD;
 882   else
 883     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 884
 885   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 886      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 887      that VALUE might be a floating-point constant.  */
 888   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 889     {
 890       rtx word = gen_lowpart_common (word_mode, value);
 891
 892       if (word && (value != word))
 893         value = word;
 894       else
 895         value = gen_lowpart_common (word_mode,
 896                                     force_reg (GET_MODE (value) != VOIDmode
 897                                                ? GET_MODE (value)
 898                                                : word_mode, value));
 899     }
 900   else if (GET_CODE (value) == ADDRESSOF)
 901     value = copy_to_reg (value);
 902
 903   while (bitsdone < bitsize)
 904     {
 905       unsigned HOST_WIDE_INT thissize;
 906       rtx part, word;
 907       unsigned HOST_WIDE_INT thispos;
 908       unsigned HOST_WIDE_INT offset;
 909
 910       offset = (bitpos + bitsdone) / unit;
 911       thispos = (bitpos + bitsdone) % unit;
 912
 913       /* THISSIZE must not overrun a word boundary.  Otherwise,
 914          store_fixed_bit_field will call us again, and we will mutually
 915          recurse forever.  */
 916       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 917       thissize = MIN (thissize, unit - thispos);
 918
 919       if (BYTES_BIG_ENDIAN)
 920         {
 921           int total_bits;
 922
 923           /* We must do an endian conversion exactly the same way as it is
 924              done in extract_bit_field, so that the two calls to
 925              extract_fixed_bit_field will have comparable arguments.  */
 926           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 927             total_bits = BITS_PER_WORD;
 928           else
 929             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 930
 931           /* Fetch successively less significant portions.  */
 932           if (GET_CODE (value) == CONST_INT)
 933             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 934                              >> (bitsize - bitsdone - thissize))
 935                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 936           else
 937             /* The args are chosen so that the last part includes the
 938                lsb.  Give extract_bit_field the value it needs (with
 939                endianness compensation) to fetch the piece we want.  */
 940             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 941                                             total_bits - bitsize + bitsdone,
 942                                             NULL_RTX, 1);
 943         }
 944       else
 945         {
 946           /* Fetch successively more significant portions.  */
 947           if (GET_CODE (value) == CONST_INT)
 948             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 949                              >> bitsdone)
 950                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 951           else
 952             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 953                                             bitsdone, NULL_RTX, 1);
 954         }
 955
 956       /* If OP0 is a register, then handle OFFSET here.
 957
 958          When handling multiword bitfields, extract_bit_field may pass
 959          down a word_mode SUBREG of a larger REG for a bitfield that actually
 960          crosses a word boundary.  Thus, for a SUBREG, we must find
 961          the current word starting from the base register.  */
 962       if (GET_CODE (op0) == SUBREG)
 963         {
 964           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
 965           word = operand_subword_force (SUBREG_REG (op0), word_offset,
 966                                         GET_MODE (SUBREG_REG (op0)));
 967           offset = 0;
 968         }
 969       else if (GET_CODE (op0) == REG)
 970         {
 971           word = operand_subword_force (op0, offset, GET_MODE (op0));
 972           offset = 0;
 973         }
 974       else
 975         word = op0;
 976
 977       /* OFFSET is in UNITs, and UNIT is in bits.
 978          store_fixed_bit_field wants offset in bytes.  */
 979       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
 980                              thispos, part);
 981       bitsdone += thissize;
 982     }
 983 }
 984 \f
 985 /* Generate code to extract a byte-field from STR_RTX
 986    containing BITSIZE bits, starting at BITNUM,
 987    and put it in TARGET if possible (if TARGET is nonzero).
 988    Regardless of TARGET, we return the rtx for where the value is placed.
 989    It may be a QUEUED.
 990
 991    STR_RTX is the structure containing the byte (a REG or MEM).
 992    UNSIGNEDP is nonzero if this is an unsigned bit field.
 993    MODE is the natural mode of the field value once extracted.
 994    TMODE is the mode the caller would like the value to have;
 995    but the value may be returned with type MODE instead.
 996
 997    TOTAL_SIZE is the size in bytes of the containing structure,
 998    or -1 if varying.
 999
1000    If a TARGET is specified and we can store in it at no extra cost,
1001    we do so, and return TARGET.
1002    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1003    if they are equally easy.  */
1004
1005 rtx
1006 extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
1007                    target, mode, tmode, total_size)
1008      rtx str_rtx;
1009      unsigned HOST_WIDE_INT bitsize;
1010      unsigned HOST_WIDE_INT bitnum;
1011      int unsignedp;
1012      rtx target;
1013      enum machine_mode mode, tmode;
1014      HOST_WIDE_INT total_size;
1015 {
1016   unsigned int unit
1017     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
1018   unsigned HOST_WIDE_INT offset = bitnum / unit;
1019   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1020   rtx op0 = str_rtx;
1021   rtx spec_target = target;
1022   rtx spec_target_subreg = 0;
1023   enum machine_mode int_mode;
1024   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1025   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1026   enum machine_mode mode1;
1027   int byte_offset;
1028
1029   /* Discount the part of the structure before the desired byte.
1030      We need to know how many bytes are safe to reference after it.  */
1031   if (total_size >= 0)
1032     total_size -= (bitpos / BIGGEST_ALIGNMENT
1033                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
1034
1035   if (tmode == VOIDmode)
1036     tmode = mode;
1037
1038   while (GET_CODE (op0) == SUBREG)
1039     {
1040       bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1041       if (bitpos > unit)
1042         {
1043           offset += (bitpos / unit);
1044           bitpos %= unit;
1045         }
1046       op0 = SUBREG_REG (op0);
1047     }
1048
1049   if (GET_CODE (op0) == REG
1050       && mode == GET_MODE (op0)
1051       && bitnum == 0
1052       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1053     {
1054       /* We're trying to extract a full register from itself.  */
1055       return op0;
1056     }
1057
1058   /* Make sure we are playing with integral modes.  Pun with subregs
1059      if we aren't.  */
1060   {
1061     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1062     if (imode != GET_MODE (op0))
1063       {
1064         if (GET_CODE (op0) == MEM)
1065           op0 = adjust_address (op0, imode, 0);
1066         else if (imode != BLKmode)
1067           op0 = gen_lowpart (imode, op0);
1068         else
1069           abort ();
1070       }
1071   }
1072
1073   /* We may be accessing data outside the field, which means
1074      we can alias adjacent data.  */
1075   if (GET_CODE (op0) == MEM)
1076     {
1077       op0 = shallow_copy_rtx (op0);
1078       set_mem_alias_set (op0, 0);
1079       set_mem_expr (op0, 0);
1080     }
1081
1082   /* Extraction of a full-word or multi-word value from a structure
1083      in a register or aligned memory can be done with just a SUBREG.
1084      A subword value in the least significant part of a register
1085      can also be extracted with a SUBREG.  For this, we need the
1086      byte offset of the value in op0.  */
1087
1088   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1089
1090   /* If OP0 is a register, BITPOS must count within a word.
1091      But as we have it, it counts within whatever size OP0 now has.
1092      On a bigendian machine, these are not the same, so convert.  */
1093   if (BYTES_BIG_ENDIAN
1094       && GET_CODE (op0) != MEM
1095       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1096     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1097
1098   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1099      If that's wrong, the solution is to test for it and set TARGET to 0
1100      if needed.  */
1101
1102   mode1  = (VECTOR_MODE_P (tmode)
1103             ? mode
1104             : mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0));
1105
1106   if (((GET_CODE (op0) != MEM
1107         && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1108                                   GET_MODE_BITSIZE (GET_MODE (op0)))
1109         && GET_MODE_SIZE (mode1) != 0
1110         && byte_offset % GET_MODE_SIZE (mode1) == 0)
1111        || (GET_CODE (op0) == MEM
1112            && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1113                || (offset * BITS_PER_UNIT % bitsize == 0
1114                    && MEM_ALIGN (op0) % bitsize == 0))))
1115       && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1116            && bitpos % BITS_PER_WORD == 0)
1117           || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
1118               /* ??? The big endian test here is wrong.  This is correct
1119                  if the value is in a register, and if mode_for_size is not
1120                  the same mode as op0.  This causes us to get unnecessarily
1121                  inefficient code from the Thumb port when -mbig-endian.  */
1122               && (BYTES_BIG_ENDIAN
1123                   ? bitpos + bitsize == BITS_PER_WORD
1124                   : bitpos == 0))))
1125     {
1126       if (mode1 != GET_MODE (op0))
1127         {
1128           if (GET_CODE (op0) == SUBREG)
1129             {
1130               if (GET_MODE (SUBREG_REG (op0)) == mode1
1131                   || GET_MODE_CLASS (mode1) == MODE_INT
1132                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1133                 op0 = SUBREG_REG (op0);
1134               else
1135                 /* Else we've got some float mode source being extracted into
1136                    a different float mode destination -- this combination of
1137                    subregs results in Severe Tire Damage.  */
1138                 goto no_subreg_mode_swap;
1139             }
1140           if (GET_CODE (op0) == REG)
1141             op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1142           else
1143             op0 = adjust_address (op0, mode1, offset);
1144         }
1145       if (mode1 != mode)
1146         return convert_to_mode (tmode, op0, unsignedp);
1147       return op0;
1148     }
1149  no_subreg_mode_swap:
1150
1151   /* Handle fields bigger than a word.  */
1152
1153   if (bitsize > BITS_PER_WORD)
1154     {
1155       /* Here we transfer the words of the field
1156          in the order least significant first.
1157          This is because the most significant word is the one which may
1158          be less than full.  */
1159
1160       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1161       unsigned int i;
1162
1163       if (target == 0 || GET_CODE (target) != REG)
1164         target = gen_reg_rtx (mode);
1165
1166       /* Indicate for flow that the entire target reg is being set.  */
1167       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1168
1169       for (i = 0; i < nwords; i++)
1170         {
1171           /* If I is 0, use the low-order word in both field and target;
1172              if I is 1, use the next to lowest word; and so on.  */
1173           /* Word number in TARGET to use.  */
1174           unsigned int wordnum
1175             = (WORDS_BIG_ENDIAN
1176                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1177                : i);
1178           /* Offset from start of field in OP0.  */
1179           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1180                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1181                                                 * (int) BITS_PER_WORD))
1182                                      : (int) i * BITS_PER_WORD);
1183           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1184           rtx result_part
1185             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1186                                            bitsize - i * BITS_PER_WORD),
1187                                  bitnum + bit_offset, 1, target_part, mode,
1188                                  word_mode, total_size);
1189
1190           if (target_part == 0)
1191             abort ();
1192
1193           if (result_part != target_part)
1194             emit_move_insn (target_part, result_part);
1195         }
1196
1197       if (unsignedp)
1198         {
1199           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1200              need to be zero'd out.  */
1201           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1202             {
1203               unsigned int i, total_words;
1204
1205               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1206               for (i = nwords; i < total_words; i++)
1207                 emit_move_insn
1208                   (operand_subword (target,
1209                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1210                                     1, VOIDmode),
1211                    const0_rtx);
1212             }
1213           return target;
1214         }
1215
1216       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1217       target = expand_shift (LSHIFT_EXPR, mode, target,
1218                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1219                              NULL_RTX, 0);
1220       return expand_shift (RSHIFT_EXPR, mode, target,
1221                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1222                            NULL_RTX, 0);
1223     }
1224
1225   /* From here on we know the desired field is smaller than a word.  */
1226
1227   /* Check if there is a correspondingly-sized integer field, so we can
1228      safely extract it as one size of integer, if necessary; then
1229      truncate or extend to the size that is wanted; then use SUBREGs or
1230      convert_to_mode to get one of the modes we really wanted.  */
1231
1232   int_mode = int_mode_for_mode (tmode);
1233   if (int_mode == BLKmode)
1234     int_mode = int_mode_for_mode (mode);
1235   if (int_mode == BLKmode)
1236     abort ();    /* Should probably push op0 out to memory and then
1237                     do a load.  */
1238
1239   /* OFFSET is the number of words or bytes (UNIT says which)
1240      from STR_RTX to the first word or byte containing part of the field.  */
1241
1242   if (GET_CODE (op0) != MEM)
1243     {
1244       if (offset != 0
1245           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1246         {
1247           if (GET_CODE (op0) != REG)
1248             op0 = copy_to_reg (op0);
1249           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1250                                 op0, (offset * UNITS_PER_WORD));
1251         }
1252       offset = 0;
1253     }
1254   else
1255     op0 = protect_from_queue (str_rtx, 1);
1256
1257   /* Now OFFSET is nonzero only for memory operands.  */
1258
1259   if (unsignedp)
1260     {
1261       if (HAVE_extzv
1262           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1263           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1264                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1265         {
1266           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1267           rtx bitsize_rtx, bitpos_rtx;
1268           rtx last = get_last_insn ();
1269           rtx xop0 = op0;
1270           rtx xtarget = target;
1271           rtx xspec_target = spec_target;
1272           rtx xspec_target_subreg = spec_target_subreg;
1273           rtx pat;
1274           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1275
1276           if (GET_CODE (xop0) == MEM)
1277             {
1278               int save_volatile_ok = volatile_ok;
1279               volatile_ok = 1;
1280
1281               /* Is the memory operand acceptable?  */
1282               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1283                      (xop0, GET_MODE (xop0))))
1284                 {
1285                   /* No, load into a reg and extract from there.  */
1286                   enum machine_mode bestmode;
1287
1288                   /* Get the mode to use for inserting into this field.  If
1289                      OP0 is BLKmode, get the smallest mode consistent with the
1290                      alignment. If OP0 is a non-BLKmode object that is no
1291                      wider than MAXMODE, use its mode. Otherwise, use the
1292                      smallest mode containing the field.  */
1293
1294                   if (GET_MODE (xop0) == BLKmode
1295                       || (GET_MODE_SIZE (GET_MODE (op0))
1296                           > GET_MODE_SIZE (maxmode)))
1297                     bestmode = get_best_mode (bitsize, bitnum,
1298                                               MEM_ALIGN (xop0), maxmode,
1299                                               MEM_VOLATILE_P (xop0));
1300                   else
1301                     bestmode = GET_MODE (xop0);
1302
1303                   if (bestmode == VOIDmode
1304                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1305                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1306                     goto extzv_loses;
1307
1308                   /* Compute offset as multiple of this unit,
1309                      counting in bytes.  */
1310                   unit = GET_MODE_BITSIZE (bestmode);
1311                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1312                   xbitpos = bitnum % unit;
1313                   xop0 = adjust_address (xop0, bestmode, xoffset);
1314
1315                   /* Fetch it to a register in that size.  */
1316                   xop0 = force_reg (bestmode, xop0);
1317
1318                   /* XBITPOS counts within UNIT, which is what is expected.  */
1319                 }
1320               else
1321                 /* Get ref to first byte containing part of the field.  */
1322                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1323
1324               volatile_ok = save_volatile_ok;
1325             }
1326
1327           /* If op0 is a register, we need it in MAXMODE (which is usually
1328              SImode). to make it acceptable to the format of extzv.  */
1329           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1330             goto extzv_loses;
1331           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1332             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1333
1334           /* On big-endian machines, we count bits from the most significant.
1335              If the bit field insn does not, we must invert.  */
1336           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1337             xbitpos = unit - bitsize - xbitpos;
1338
1339           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1340           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1341             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1342
1343           unit = GET_MODE_BITSIZE (maxmode);
1344
1345           if (xtarget == 0
1346               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1347             xtarget = xspec_target = gen_reg_rtx (tmode);
1348
1349           if (GET_MODE (xtarget) != maxmode)
1350             {
1351               if (GET_CODE (xtarget) == REG)
1352                 {
1353                   int wider = (GET_MODE_SIZE (maxmode)
1354                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1355                   xtarget = gen_lowpart (maxmode, xtarget);
1356                   if (wider)
1357                     xspec_target_subreg = xtarget;
1358                 }
1359               else
1360                 xtarget = gen_reg_rtx (maxmode);
1361             }
1362
1363           /* If this machine's extzv insists on a register target,
1364              make sure we have one.  */
1365           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1366                  (xtarget, maxmode)))
1367             xtarget = gen_reg_rtx (maxmode);
1368
1369           bitsize_rtx = GEN_INT (bitsize);
1370           bitpos_rtx = GEN_INT (xbitpos);
1371
1372           pat = gen_extzv (protect_from_queue (xtarget, 1),
1373                            xop0, bitsize_rtx, bitpos_rtx);
1374           if (pat)
1375             {
1376               emit_insn (pat);
1377               target = xtarget;
1378               spec_target = xspec_target;
1379               spec_target_subreg = xspec_target_subreg;
1380             }
1381           else
1382             {
1383               delete_insns_since (last);
1384               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1385                                                 bitpos, target, 1);
1386             }
1387         }
1388       else
1389       extzv_loses:
1390         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1391                                           bitpos, target, 1);
1392     }
1393   else
1394     {
1395       if (HAVE_extv
1396           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1397           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1398                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1399         {
1400           int xbitpos = bitpos, xoffset = offset;
1401           rtx bitsize_rtx, bitpos_rtx;
1402           rtx last = get_last_insn ();
1403           rtx xop0 = op0, xtarget = target;
1404           rtx xspec_target = spec_target;
1405           rtx xspec_target_subreg = spec_target_subreg;
1406           rtx pat;
1407           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1408
1409           if (GET_CODE (xop0) == MEM)
1410             {
1411               /* Is the memory operand acceptable?  */
1412               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1413                      (xop0, GET_MODE (xop0))))
1414                 {
1415                   /* No, load into a reg and extract from there.  */
1416                   enum machine_mode bestmode;
1417
1418                   /* Get the mode to use for inserting into this field.  If
1419                      OP0 is BLKmode, get the smallest mode consistent with the
1420                      alignment. If OP0 is a non-BLKmode object that is no
1421                      wider than MAXMODE, use its mode. Otherwise, use the
1422                      smallest mode containing the field.  */
1423
1424                   if (GET_MODE (xop0) == BLKmode
1425                       || (GET_MODE_SIZE (GET_MODE (op0))
1426                           > GET_MODE_SIZE (maxmode)))
1427                     bestmode = get_best_mode (bitsize, bitnum,
1428                                               MEM_ALIGN (xop0), maxmode,
1429                                               MEM_VOLATILE_P (xop0));
1430                   else
1431                     bestmode = GET_MODE (xop0);
1432
1433                   if (bestmode == VOIDmode
1434                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1435                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1436                     goto extv_loses;
1437
1438                   /* Compute offset as multiple of this unit,
1439                      counting in bytes.  */
1440                   unit = GET_MODE_BITSIZE (bestmode);
1441                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1442                   xbitpos = bitnum % unit;
1443                   xop0 = adjust_address (xop0, bestmode, xoffset);
1444
1445                   /* Fetch it to a register in that size.  */
1446                   xop0 = force_reg (bestmode, xop0);
1447
1448                   /* XBITPOS counts within UNIT, which is what is expected.  */
1449                 }
1450               else
1451                 /* Get ref to first byte containing part of the field.  */
1452                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1453             }
1454
1455           /* If op0 is a register, we need it in MAXMODE (which is usually
1456              SImode) to make it acceptable to the format of extv.  */
1457           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1458             goto extv_loses;
1459           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1460             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1461
1462           /* On big-endian machines, we count bits from the most significant.
1463              If the bit field insn does not, we must invert.  */
1464           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1465             xbitpos = unit - bitsize - xbitpos;
1466
1467           /* XBITPOS counts within a size of UNIT.
1468              Adjust to count within a size of MAXMODE.  */
1469           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1470             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1471
1472           unit = GET_MODE_BITSIZE (maxmode);
1473
1474           if (xtarget == 0
1475               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1476             xtarget = xspec_target = gen_reg_rtx (tmode);
1477
1478           if (GET_MODE (xtarget) != maxmode)
1479             {
1480               if (GET_CODE (xtarget) == REG)
1481                 {
1482                   int wider = (GET_MODE_SIZE (maxmode)
1483                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1484                   xtarget = gen_lowpart (maxmode, xtarget);
1485                   if (wider)
1486                     xspec_target_subreg = xtarget;
1487                 }
1488               else
1489                 xtarget = gen_reg_rtx (maxmode);
1490             }
1491
1492           /* If this machine's extv insists on a register target,
1493              make sure we have one.  */
1494           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1495                  (xtarget, maxmode)))
1496             xtarget = gen_reg_rtx (maxmode);
1497
1498           bitsize_rtx = GEN_INT (bitsize);
1499           bitpos_rtx = GEN_INT (xbitpos);
1500
1501           pat = gen_extv (protect_from_queue (xtarget, 1),
1502                           xop0, bitsize_rtx, bitpos_rtx);
1503           if (pat)
1504             {
1505               emit_insn (pat);
1506               target = xtarget;
1507               spec_target = xspec_target;
1508               spec_target_subreg = xspec_target_subreg;
1509             }
1510           else
1511             {
1512               delete_insns_since (last);
1513               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1514                                                 bitpos, target, 0);
1515             }
1516         }
1517       else
1518       extv_loses:
1519         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1520                                           bitpos, target, 0);
1521     }
1522   if (target == spec_target)
1523     return target;
1524   if (target == spec_target_subreg)
1525     return spec_target;
1526   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1527     {
1528       /* If the target mode is floating-point, first convert to the
1529          integer mode of that size and then access it as a floating-point
1530          value via a SUBREG.  */
1531       if (GET_MODE_CLASS (tmode) != MODE_INT
1532           && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1533         {
1534           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1535                                                    MODE_INT, 0),
1536                                     target, unsignedp);
1537           return gen_lowpart (tmode, target);
1538         }
1539       else
1540         return convert_to_mode (tmode, target, unsignedp);
1541     }
1542   return target;
1543 }
1544 \f
1545 /* Extract a bit field using shifts and boolean operations
1546    Returns an rtx to represent the value.
1547    OP0 addresses a register (word) or memory (byte).
1548    BITPOS says which bit within the word or byte the bit field starts in.
1549    OFFSET says how many bytes farther the bit field starts;
1550     it is 0 if OP0 is a register.
1551    BITSIZE says how many bits long the bit field is.
1552     (If OP0 is a register, it may be narrower than a full word,
1553      but BITPOS still counts within a full word,
1554      which is significant on bigendian machines.)
1555
1556    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1557    If TARGET is nonzero, attempts to store the value there
1558    and return TARGET, but this is not guaranteed.
1559    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1560
1561 static rtx
1562 extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1563                          target, unsignedp)
1564      enum machine_mode tmode;
1565      rtx op0, target;
1566      unsigned HOST_WIDE_INT offset, bitsize, bitpos;
1567      int unsignedp;
1568 {
1569   unsigned int total_bits = BITS_PER_WORD;
1570   enum machine_mode mode;
1571
1572   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1573     {
1574       /* Special treatment for a bit field split across two registers.  */
1575       if (bitsize + bitpos > BITS_PER_WORD)
1576         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1577     }
1578   else
1579     {
1580       /* Get the proper mode to use for this field.  We want a mode that
1581          includes the entire field.  If such a mode would be larger than
1582          a word, we won't be doing the extraction the normal way.  */
1583
1584       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1585                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1586
1587       if (mode == VOIDmode)
1588         /* The only way this should occur is if the field spans word
1589            boundaries.  */
1590         return extract_split_bit_field (op0, bitsize,
1591                                         bitpos + offset * BITS_PER_UNIT,
1592                                         unsignedp);
1593
1594       total_bits = GET_MODE_BITSIZE (mode);
1595
1596       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1597          be in the range 0 to total_bits-1, and put any excess bytes in
1598          OFFSET.  */
1599       if (bitpos >= total_bits)
1600         {
1601           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1602           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1603                      * BITS_PER_UNIT);
1604         }
1605
1606       /* Get ref to an aligned byte, halfword, or word containing the field.
1607          Adjust BITPOS to be position within a word,
1608          and OFFSET to be the offset of that word.
1609          Then alter OP0 to refer to that word.  */
1610       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1611       offset -= (offset % (total_bits / BITS_PER_UNIT));
1612       op0 = adjust_address (op0, mode, offset);
1613     }
1614
1615   mode = GET_MODE (op0);
1616
1617   if (BYTES_BIG_ENDIAN)
1618     /* BITPOS is the distance between our msb and that of OP0.
1619        Convert it to the distance from the lsb.  */
1620     bitpos = total_bits - bitsize - bitpos;
1621
1622   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1623      We have reduced the big-endian case to the little-endian case.  */
1624
1625   if (unsignedp)
1626     {
1627       if (bitpos)
1628         {
1629           /* If the field does not already start at the lsb,
1630              shift it so it does.  */
1631           tree amount = build_int_2 (bitpos, 0);
1632           /* Maybe propagate the target for the shift.  */
1633           /* But not if we will return it--could confuse integrate.c.  */
1634           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1635                            && !REG_FUNCTION_VALUE_P (target)
1636                            ? target : 0);
1637           if (tmode != mode) subtarget = 0;
1638           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1639         }
1640       /* Convert the value to the desired mode.  */
1641       if (mode != tmode)
1642         op0 = convert_to_mode (tmode, op0, 1);
1643
1644       /* Unless the msb of the field used to be the msb when we shifted,
1645          mask out the upper bits.  */
1646
1647       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1648         return expand_binop (GET_MODE (op0), and_optab, op0,
1649                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1650                              target, 1, OPTAB_LIB_WIDEN);
1651       return op0;
1652     }
1653
1654   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1655      then arithmetic-shift its lsb to the lsb of the word.  */
1656   op0 = force_reg (mode, op0);
1657   if (mode != tmode)
1658     target = 0;
1659
1660   /* Find the narrowest integer mode that contains the field.  */
1661
1662   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1663        mode = GET_MODE_WIDER_MODE (mode))
1664     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1665       {
1666         op0 = convert_to_mode (mode, op0, 0);
1667         break;
1668       }
1669
1670   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1671     {
1672       tree amount
1673         = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1674       /* Maybe propagate the target for the shift.  */
1675       /* But not if we will return the result--could confuse integrate.c.  */
1676       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1677                        && ! REG_FUNCTION_VALUE_P (target)
1678                        ? target : 0);
1679       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1680     }
1681
1682   return expand_shift (RSHIFT_EXPR, mode, op0,
1683                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1684                        target, 0);
1685 }
1686 \f
1687 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1688    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1689    complement of that if COMPLEMENT.  The mask is truncated if
1690    necessary to the width of mode MODE.  The mask is zero-extended if
1691    BITSIZE+BITPOS is too small for MODE.  */
1692
1693 static rtx
1694 mask_rtx (mode, bitpos, bitsize, complement)
1695      enum machine_mode mode;
1696      int bitpos, bitsize, complement;
1697 {
1698   HOST_WIDE_INT masklow, maskhigh;
1699
1700   if (bitpos < HOST_BITS_PER_WIDE_INT)
1701     masklow = (HOST_WIDE_INT) -1 << bitpos;
1702   else
1703     masklow = 0;
1704
1705   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1706     masklow &= ((unsigned HOST_WIDE_INT) -1
1707                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1708
1709   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1710     maskhigh = -1;
1711   else
1712     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1713
1714   if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1715     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1716                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1717   else
1718     maskhigh = 0;
1719
1720   if (complement)
1721     {
1722       maskhigh = ~maskhigh;
1723       masklow = ~masklow;
1724     }
1725
1726   return immed_double_const (masklow, maskhigh, mode);
1727 }
1728
1729 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1730    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1731
1732 static rtx
1733 lshift_value (mode, value, bitpos, bitsize)
1734      enum machine_mode mode;
1735      rtx value;
1736      int bitpos, bitsize;
1737 {
1738   unsigned HOST_WIDE_INT v = INTVAL (value);
1739   HOST_WIDE_INT low, high;
1740
1741   if (bitsize < HOST_BITS_PER_WIDE_INT)
1742     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1743
1744   if (bitpos < HOST_BITS_PER_WIDE_INT)
1745     {
1746       low = v << bitpos;
1747       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1748     }
1749   else
1750     {
1751       low = 0;
1752       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1753     }
1754
1755   return immed_double_const (low, high, mode);
1756 }
1757 \f
1758 /* Extract a bit field that is split across two words
1759    and return an RTX for the result.
1760
1761    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1762    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1763    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1764
1765 static rtx
1766 extract_split_bit_field (op0, bitsize, bitpos, unsignedp)
1767      rtx op0;
1768      unsigned HOST_WIDE_INT bitsize, bitpos;
1769      int unsignedp;
1770 {
1771   unsigned int unit;
1772   unsigned int bitsdone = 0;
1773   rtx result = NULL_RTX;
1774   int first = 1;
1775
1776   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1777      much at a time.  */
1778   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1779     unit = BITS_PER_WORD;
1780   else
1781     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1782
1783   while (bitsdone < bitsize)
1784     {
1785       unsigned HOST_WIDE_INT thissize;
1786       rtx part, word;
1787       unsigned HOST_WIDE_INT thispos;
1788       unsigned HOST_WIDE_INT offset;
1789
1790       offset = (bitpos + bitsdone) / unit;
1791       thispos = (bitpos + bitsdone) % unit;
1792
1793       /* THISSIZE must not overrun a word boundary.  Otherwise,
1794          extract_fixed_bit_field will call us again, and we will mutually
1795          recurse forever.  */
1796       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1797       thissize = MIN (thissize, unit - thispos);
1798
1799       /* If OP0 is a register, then handle OFFSET here.
1800
1801          When handling multiword bitfields, extract_bit_field may pass
1802          down a word_mode SUBREG of a larger REG for a bitfield that actually
1803          crosses a word boundary.  Thus, for a SUBREG, we must find
1804          the current word starting from the base register.  */
1805       if (GET_CODE (op0) == SUBREG)
1806         {
1807           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1808           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1809                                         GET_MODE (SUBREG_REG (op0)));
1810           offset = 0;
1811         }
1812       else if (GET_CODE (op0) == REG)
1813         {
1814           word = operand_subword_force (op0, offset, GET_MODE (op0));
1815           offset = 0;
1816         }
1817       else
1818         word = op0;
1819
1820       /* Extract the parts in bit-counting order,
1821          whose meaning is determined by BYTES_PER_UNIT.
1822          OFFSET is in UNITs, and UNIT is in bits.
1823          extract_fixed_bit_field wants offset in bytes.  */
1824       part = extract_fixed_bit_field (word_mode, word,
1825                                       offset * unit / BITS_PER_UNIT,
1826                                       thissize, thispos, 0, 1);
1827       bitsdone += thissize;
1828
1829       /* Shift this part into place for the result.  */
1830       if (BYTES_BIG_ENDIAN)
1831         {
1832           if (bitsize != bitsdone)
1833             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1834                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1835         }
1836       else
1837         {
1838           if (bitsdone != thissize)
1839             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1840                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1841         }
1842
1843       if (first)
1844         result = part;
1845       else
1846         /* Combine the parts with bitwise or.  This works
1847            because we extracted each part as an unsigned bit field.  */
1848         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1849                                OPTAB_LIB_WIDEN);
1850
1851       first = 0;
1852     }
1853
1854   /* Unsigned bit field: we are done.  */
1855   if (unsignedp)
1856     return result;
1857   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1858   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1859                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1860                          NULL_RTX, 0);
1861   return expand_shift (RSHIFT_EXPR, word_mode, result,
1862                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1863 }
1864 \f
1865 /* Add INC into TARGET.  */
1866
1867 void
1868 expand_inc (target, inc)
1869      rtx target, inc;
1870 {
1871   rtx value = expand_binop (GET_MODE (target), add_optab,
1872                             target, inc,
1873                             target, 0, OPTAB_LIB_WIDEN);
1874   if (value != target)
1875     emit_move_insn (target, value);
1876 }
1877
1878 /* Subtract DEC from TARGET.  */
1879
1880 void
1881 expand_dec (target, dec)
1882      rtx target, dec;
1883 {
1884   rtx value = expand_binop (GET_MODE (target), sub_optab,
1885                             target, dec,
1886                             target, 0, OPTAB_LIB_WIDEN);
1887   if (value != target)
1888     emit_move_insn (target, value);
1889 }
1890 \f
1891 /* Output a shift instruction for expression code CODE,
1892    with SHIFTED being the rtx for the value to shift,
1893    and AMOUNT the tree for the amount to shift by.
1894    Store the result in the rtx TARGET, if that is convenient.
1895    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1896    Return the rtx for where the value is.  */
1897
1898 rtx
1899 expand_shift (code, mode, shifted, amount, target, unsignedp)
1900      enum tree_code code;
1901      enum machine_mode mode;
1902      rtx shifted;
1903      tree amount;
1904      rtx target;
1905      int unsignedp;
1906 {
1907   rtx op1, temp = 0;
1908   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1909   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1910   int try;
1911
1912   /* Previously detected shift-counts computed by NEGATE_EXPR
1913      and shifted in the other direction; but that does not work
1914      on all machines.  */
1915
1916   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1917
1918 #ifdef SHIFT_COUNT_TRUNCATED
1919   if (SHIFT_COUNT_TRUNCATED)
1920     {
1921       if (GET_CODE (op1) == CONST_INT
1922           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
1923               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
1924         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1925                        % GET_MODE_BITSIZE (mode));
1926       else if (GET_CODE (op1) == SUBREG
1927                && subreg_lowpart_p (op1))
1928         op1 = SUBREG_REG (op1);
1929     }
1930 #endif
1931
1932   if (op1 == const0_rtx)
1933     return shifted;
1934
1935   for (try = 0; temp == 0 && try < 3; try++)
1936     {
1937       enum optab_methods methods;
1938
1939       if (try == 0)
1940         methods = OPTAB_DIRECT;
1941       else if (try == 1)
1942         methods = OPTAB_WIDEN;
1943       else
1944         methods = OPTAB_LIB_WIDEN;
1945
1946       if (rotate)
1947         {
1948           /* Widening does not work for rotation.  */
1949           if (methods == OPTAB_WIDEN)
1950             continue;
1951           else if (methods == OPTAB_LIB_WIDEN)
1952             {
1953               /* If we have been unable to open-code this by a rotation,
1954                  do it as the IOR of two shifts.  I.e., to rotate A
1955                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1956                  where C is the bitsize of A.
1957
1958                  It is theoretically possible that the target machine might
1959                  not be able to perform either shift and hence we would
1960                  be making two libcalls rather than just the one for the
1961                  shift (similarly if IOR could not be done).  We will allow
1962                  this extremely unlikely lossage to avoid complicating the
1963                  code below.  */
1964
1965               rtx subtarget = target == shifted ? 0 : target;
1966               rtx temp1;
1967               tree type = TREE_TYPE (amount);
1968               tree new_amount = make_tree (type, op1);
1969               tree other_amount
1970                 = fold (build (MINUS_EXPR, type,
1971                                convert (type,
1972                                         build_int_2 (GET_MODE_BITSIZE (mode),
1973                                                      0)),
1974                                amount));
1975
1976               shifted = force_reg (mode, shifted);
1977
1978               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1979                                    mode, shifted, new_amount, subtarget, 1);
1980               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1981                                     mode, shifted, other_amount, 0, 1);
1982               return expand_binop (mode, ior_optab, temp, temp1, target,
1983                                    unsignedp, methods);
1984             }
1985
1986           temp = expand_binop (mode,
1987                                left ? rotl_optab : rotr_optab,
1988                                shifted, op1, target, unsignedp, methods);
1989
1990           /* If we don't have the rotate, but we are rotating by a constant
1991              that is in range, try a rotate in the opposite direction.  */
1992
1993           if (temp == 0 && GET_CODE (op1) == CONST_INT
1994               && INTVAL (op1) > 0
1995               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
1996             temp = expand_binop (mode,
1997                                  left ? rotr_optab : rotl_optab,
1998                                  shifted,
1999                                  GEN_INT (GET_MODE_BITSIZE (mode)
2000                                           - INTVAL (op1)),
2001                                  target, unsignedp, methods);
2002         }
2003       else if (unsignedp)
2004         temp = expand_binop (mode,
2005                              left ? ashl_optab : lshr_optab,
2006                              shifted, op1, target, unsignedp, methods);
2007
2008       /* Do arithmetic shifts.
2009          Also, if we are going to widen the operand, we can just as well
2010          use an arithmetic right-shift instead of a logical one.  */
2011       if (temp == 0 && ! rotate
2012           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2013         {
2014           enum optab_methods methods1 = methods;
2015
2016           /* If trying to widen a log shift to an arithmetic shift,
2017              don't accept an arithmetic shift of the same size.  */
2018           if (unsignedp)
2019             methods1 = OPTAB_MUST_WIDEN;
2020
2021           /* Arithmetic shift */
2022
2023           temp = expand_binop (mode,
2024                                left ? ashl_optab : ashr_optab,
2025                                shifted, op1, target, unsignedp, methods1);
2026         }
2027
2028       /* We used to try extzv here for logical right shifts, but that was
2029          only useful for one machine, the VAX, and caused poor code
2030          generation there for lshrdi3, so the code was deleted and a
2031          define_expand for lshrsi3 was added to vax.md.  */
2032     }
2033
2034   if (temp == 0)
2035     abort ();
2036   return temp;
2037 }
2038 \f
2039 enum alg_code { alg_zero, alg_m, alg_shift,
2040                   alg_add_t_m2, alg_sub_t_m2,
2041                   alg_add_factor, alg_sub_factor,
2042                   alg_add_t2_m, alg_sub_t2_m,
2043                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2044
2045 /* This structure records a sequence of operations.
2046    `ops' is the number of operations recorded.
2047    `cost' is their total cost.
2048    The operations are stored in `op' and the corresponding
2049    logarithms of the integer coefficients in `log'.
2050
2051    These are the operations:
2052    alg_zero             total := 0;
2053    alg_m                total := multiplicand;
2054    alg_shift            total := total * coeff
2055    alg_add_t_m2         total := total + multiplicand * coeff;
2056    alg_sub_t_m2         total := total - multiplicand * coeff;
2057    alg_add_factor       total := total * coeff + total;
2058    alg_sub_factor       total := total * coeff - total;
2059    alg_add_t2_m         total := total * coeff + multiplicand;
2060    alg_sub_t2_m         total := total * coeff - multiplicand;
2061
2062    The first operand must be either alg_zero or alg_m.  */
2063
2064 struct algorithm
2065 {
2066   short cost;
2067   short ops;
2068   /* The size of the OP and LOG fields are not directly related to the
2069      word size, but the worst-case algorithms will be if we have few
2070      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2071      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2072      in total wordsize operations.  */
2073   enum alg_code op[MAX_BITS_PER_WORD];
2074   char log[MAX_BITS_PER_WORD];
2075 };
2076
2077 static void synth_mult                  PARAMS ((struct algorithm *,
2078                                                  unsigned HOST_WIDE_INT,
2079                                                  int));
2080 static unsigned HOST_WIDE_INT choose_multiplier PARAMS ((unsigned HOST_WIDE_INT,
2081                                                          int, int,
2082                                                          unsigned HOST_WIDE_INT *,
2083                                                          int *, int *));
2084 static unsigned HOST_WIDE_INT invert_mod2n      PARAMS ((unsigned HOST_WIDE_INT,
2085                                                          int));
2086 /* Compute and return the best algorithm for multiplying by T.
2087    The algorithm must cost less than cost_limit
2088    If retval.cost >= COST_LIMIT, no algorithm was found and all
2089    other field of the returned struct are undefined.  */
2090
2091 static void
2092 synth_mult (alg_out, t, cost_limit)
2093      struct algorithm *alg_out;
2094      unsigned HOST_WIDE_INT t;
2095      int cost_limit;
2096 {
2097   int m;
2098   struct algorithm *alg_in, *best_alg;
2099   int cost;
2100   unsigned HOST_WIDE_INT q;
2101
2102   /* Indicate that no algorithm is yet found.  If no algorithm
2103      is found, this value will be returned and indicate failure.  */
2104   alg_out->cost = cost_limit;
2105
2106   if (cost_limit <= 0)
2107     return;
2108
2109   /* t == 1 can be done in zero cost.  */
2110   if (t == 1)
2111     {
2112       alg_out->ops = 1;
2113       alg_out->cost = 0;
2114       alg_out->op[0] = alg_m;
2115       return;
2116     }
2117
2118   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2119      fail now.  */
2120   if (t == 0)
2121     {
2122       if (zero_cost >= cost_limit)
2123         return;
2124       else
2125         {
2126           alg_out->ops = 1;
2127           alg_out->cost = zero_cost;
2128           alg_out->op[0] = alg_zero;
2129           return;
2130         }
2131     }
2132
2133   /* We'll be needing a couple extra algorithm structures now.  */
2134
2135   alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
2136   best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
2137
2138   /* If we have a group of zero bits at the low-order part of T, try
2139      multiplying by the remaining bits and then doing a shift.  */
2140
2141   if ((t & 1) == 0)
2142     {
2143       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2144       if (m < BITS_PER_WORD)
2145         {
2146           q = t >> m;
2147           cost = shift_cost[m];
2148           synth_mult (alg_in, q, cost_limit - cost);
2149
2150           cost += alg_in->cost;
2151           if (cost < cost_limit)
2152             {
2153               struct algorithm *x;
2154               x = alg_in, alg_in = best_alg, best_alg = x;
2155               best_alg->log[best_alg->ops] = m;
2156               best_alg->op[best_alg->ops] = alg_shift;
2157               cost_limit = cost;
2158             }
2159         }
2160     }
2161
2162   /* If we have an odd number, add or subtract one.  */
2163   if ((t & 1) != 0)
2164     {
2165       unsigned HOST_WIDE_INT w;
2166
2167       for (w = 1; (w & t) != 0; w <<= 1)
2168         ;
2169       /* If T was -1, then W will be zero after the loop.  This is another
2170          case where T ends with ...111.  Handling this with (T + 1) and
2171          subtract 1 produces slightly better code and results in algorithm
2172          selection much faster than treating it like the ...0111 case
2173          below.  */
2174       if (w == 0
2175           || (w > 2
2176               /* Reject the case where t is 3.
2177                  Thus we prefer addition in that case.  */
2178               && t != 3))
2179         {
2180           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2181
2182           cost = add_cost;
2183           synth_mult (alg_in, t + 1, cost_limit - cost);
2184
2185           cost += alg_in->cost;
2186           if (cost < cost_limit)
2187             {
2188               struct algorithm *x;
2189               x = alg_in, alg_in = best_alg, best_alg = x;
2190               best_alg->log[best_alg->ops] = 0;
2191               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2192               cost_limit = cost;
2193             }
2194         }
2195       else
2196         {
2197           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2198
2199           cost = add_cost;
2200           synth_mult (alg_in, t - 1, cost_limit - cost);
2201
2202           cost += alg_in->cost;
2203           if (cost < cost_limit)
2204             {
2205               struct algorithm *x;
2206               x = alg_in, alg_in = best_alg, best_alg = x;
2207               best_alg->log[best_alg->ops] = 0;
2208               best_alg->op[best_alg->ops] = alg_add_t_m2;
2209               cost_limit = cost;
2210             }
2211         }
2212     }
2213
2214   /* Look for factors of t of the form
2215      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2216      If we find such a factor, we can multiply by t using an algorithm that
2217      multiplies by q, shift the result by m and add/subtract it to itself.
2218
2219      We search for large factors first and loop down, even if large factors
2220      are less probable than small; if we find a large factor we will find a
2221      good sequence quickly, and therefore be able to prune (by decreasing
2222      COST_LIMIT) the search.  */
2223
2224   for (m = floor_log2 (t - 1); m >= 2; m--)
2225     {
2226       unsigned HOST_WIDE_INT d;
2227
2228       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2229       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2230         {
2231           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2232           synth_mult (alg_in, t / d, cost_limit - cost);
2233
2234           cost += alg_in->cost;
2235           if (cost < cost_limit)
2236             {
2237               struct algorithm *x;
2238               x = alg_in, alg_in = best_alg, best_alg = x;
2239               best_alg->log[best_alg->ops] = m;
2240               best_alg->op[best_alg->ops] = alg_add_factor;
2241               cost_limit = cost;
2242             }
2243           /* Other factors will have been taken care of in the recursion.  */
2244           break;
2245         }
2246
2247       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2248       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2249         {
2250           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2251           synth_mult (alg_in, t / d, cost_limit - cost);
2252
2253           cost += alg_in->cost;
2254           if (cost < cost_limit)
2255             {
2256               struct algorithm *x;
2257               x = alg_in, alg_in = best_alg, best_alg = x;
2258               best_alg->log[best_alg->ops] = m;
2259               best_alg->op[best_alg->ops] = alg_sub_factor;
2260               cost_limit = cost;
2261             }
2262           break;
2263         }
2264     }
2265
2266   /* Try shift-and-add (load effective address) instructions,
2267      i.e. do a*3, a*5, a*9.  */
2268   if ((t & 1) != 0)
2269     {
2270       q = t - 1;
2271       q = q & -q;
2272       m = exact_log2 (q);
2273       if (m >= 0 && m < BITS_PER_WORD)
2274         {
2275           cost = shiftadd_cost[m];
2276           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2277
2278           cost += alg_in->cost;
2279           if (cost < cost_limit)
2280             {
2281               struct algorithm *x;
2282               x = alg_in, alg_in = best_alg, best_alg = x;
2283               best_alg->log[best_alg->ops] = m;
2284               best_alg->op[best_alg->ops] = alg_add_t2_m;
2285               cost_limit = cost;
2286             }
2287         }
2288
2289       q = t + 1;
2290       q = q & -q;
2291       m = exact_log2 (q);
2292       if (m >= 0 && m < BITS_PER_WORD)
2293         {
2294           cost = shiftsub_cost[m];
2295           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2296
2297           cost += alg_in->cost;
2298           if (cost < cost_limit)
2299             {
2300               struct algorithm *x;
2301               x = alg_in, alg_in = best_alg, best_alg = x;
2302               best_alg->log[best_alg->ops] = m;
2303               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2304               cost_limit = cost;
2305             }
2306         }
2307     }
2308
2309   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2310      we have not found any algorithm.  */
2311   if (cost_limit == alg_out->cost)
2312     return;
2313
2314   /* If we are getting a too long sequence for `struct algorithm'
2315      to record, make this search fail.  */
2316   if (best_alg->ops == MAX_BITS_PER_WORD)
2317     return;
2318
2319   /* Copy the algorithm from temporary space to the space at alg_out.
2320      We avoid using structure assignment because the majority of
2321      best_alg is normally undefined, and this is a critical function.  */
2322   alg_out->ops = best_alg->ops + 1;
2323   alg_out->cost = cost_limit;
2324   memcpy (alg_out->op, best_alg->op,
2325           alg_out->ops * sizeof *alg_out->op);
2326   memcpy (alg_out->log, best_alg->log,
2327           alg_out->ops * sizeof *alg_out->log);
2328 }
2329 \f
2330 /* Perform a multiplication and return an rtx for the result.
2331    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2332    TARGET is a suggestion for where to store the result (an rtx).
2333
2334    We check specially for a constant integer as OP1.
2335    If you want this check for OP0 as well, then before calling
2336    you should swap the two operands if OP0 would be constant.  */
2337
2338 rtx
2339 expand_mult (mode, op0, op1, target, unsignedp)
2340      enum machine_mode mode;
2341      rtx op0, op1, target;
2342      int unsignedp;
2343 {
2344   rtx const_op1 = op1;
2345
2346   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2347      less than or equal in size to `unsigned int' this doesn't matter.
2348      If the mode is larger than `unsigned int', then synth_mult works only
2349      if the constant value exactly fits in an `unsigned int' without any
2350      truncation.  This means that multiplying by negative values does
2351      not work; results are off by 2^32 on a 32 bit machine.  */
2352
2353   /* If we are multiplying in DImode, it may still be a win
2354      to try to work with shifts and adds.  */
2355   if (GET_CODE (op1) == CONST_DOUBLE
2356       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2357       && HOST_BITS_PER_INT >= BITS_PER_WORD
2358       && CONST_DOUBLE_HIGH (op1) == 0)
2359     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2360   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2361            && GET_CODE (op1) == CONST_INT
2362            && INTVAL (op1) < 0)
2363     const_op1 = 0;
2364
2365   /* We used to test optimize here, on the grounds that it's better to
2366      produce a smaller program when -O is not used.
2367      But this causes such a terrible slowdown sometimes
2368      that it seems better to use synth_mult always.  */
2369
2370   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2371       && (unsignedp || ! flag_trapv))
2372     {
2373       struct algorithm alg;
2374       struct algorithm alg2;
2375       HOST_WIDE_INT val = INTVAL (op1);
2376       HOST_WIDE_INT val_so_far;
2377       rtx insn;
2378       int mult_cost;
2379       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2380
2381       /* op0 must be register to make mult_cost match the precomputed
2382          shiftadd_cost array.  */
2383       op0 = force_reg (mode, op0);
2384
2385       /* Try to do the computation three ways: multiply by the negative of OP1
2386          and then negate, do the multiplication directly, or do multiplication
2387          by OP1 - 1.  */
2388
2389       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2390       mult_cost = MIN (12 * add_cost, mult_cost);
2391
2392       synth_mult (&alg, val, mult_cost);
2393
2394       /* This works only if the inverted value actually fits in an
2395          `unsigned int' */
2396       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2397         {
2398           synth_mult (&alg2, - val,
2399                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2400           if (alg2.cost + negate_cost < alg.cost)
2401             alg = alg2, variant = negate_variant;
2402         }
2403
2404       /* This proves very useful for division-by-constant.  */
2405       synth_mult (&alg2, val - 1,
2406                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2407       if (alg2.cost + add_cost < alg.cost)
2408         alg = alg2, variant = add_variant;
2409
2410       if (alg.cost < mult_cost)
2411         {
2412           /* We found something cheaper than a multiply insn.  */
2413           int opno;
2414           rtx accum, tem;
2415           enum machine_mode nmode;
2416
2417           op0 = protect_from_queue (op0, 0);
2418
2419           /* Avoid referencing memory over and over.
2420              For speed, but also for correctness when mem is volatile.  */
2421           if (GET_CODE (op0) == MEM)
2422             op0 = force_reg (mode, op0);
2423
2424           /* ACCUM starts out either as OP0 or as a zero, depending on
2425              the first operation.  */
2426
2427           if (alg.op[0] == alg_zero)
2428             {
2429               accum = copy_to_mode_reg (mode, const0_rtx);
2430               val_so_far = 0;
2431             }
2432           else if (alg.op[0] == alg_m)
2433             {
2434               accum = copy_to_mode_reg (mode, op0);
2435               val_so_far = 1;
2436             }
2437           else
2438             abort ();
2439
2440           for (opno = 1; opno < alg.ops; opno++)
2441             {
2442               int log = alg.log[opno];
2443               int preserve = preserve_subexpressions_p ();
2444               rtx shift_subtarget = preserve ? 0 : accum;
2445               rtx add_target
2446                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2447                    && ! preserve)
2448                   ? target : 0;
2449               rtx accum_target = preserve ? 0 : accum;
2450
2451               switch (alg.op[opno])
2452                 {
2453                 case alg_shift:
2454                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2455                                         build_int_2 (log, 0), NULL_RTX, 0);
2456                   val_so_far <<= log;
2457                   break;
2458
2459                 case alg_add_t_m2:
2460                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2461                                       build_int_2 (log, 0), NULL_RTX, 0);
2462                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2463                                          add_target
2464                                          ? add_target : accum_target);
2465                   val_so_far += (HOST_WIDE_INT) 1 << log;
2466                   break;
2467
2468                 case alg_sub_t_m2:
2469                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2470                                       build_int_2 (log, 0), NULL_RTX, 0);
2471                   accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2472                                          add_target
2473                                          ? add_target : accum_target);
2474                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2475                   break;
2476
2477                 case alg_add_t2_m:
2478                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2479                                         build_int_2 (log, 0), shift_subtarget,
2480                                         0);
2481                   accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2482                                          add_target
2483                                          ? add_target : accum_target);
2484                   val_so_far = (val_so_far << log) + 1;
2485                   break;
2486
2487                 case alg_sub_t2_m:
2488                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2489                                         build_int_2 (log, 0), shift_subtarget,
2490                                         0);
2491                   accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2492                                          add_target
2493                                          ? add_target : accum_target);
2494                   val_so_far = (val_so_far << log) - 1;
2495                   break;
2496
2497                 case alg_add_factor:
2498                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2499                                       build_int_2 (log, 0), NULL_RTX, 0);
2500                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2501                                          add_target
2502                                          ? add_target : accum_target);
2503                   val_so_far += val_so_far << log;
2504                   break;
2505
2506                 case alg_sub_factor:
2507                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2508                                       build_int_2 (log, 0), NULL_RTX, 0);
2509                   accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2510                                          (add_target ? add_target
2511                                           : preserve ? 0 : tem));
2512                   val_so_far = (val_so_far << log) - val_so_far;
2513                   break;
2514
2515                 default:
2516                   abort ();
2517                 }
2518
2519               /* Write a REG_EQUAL note on the last insn so that we can cse
2520                  multiplication sequences.  Note that if ACCUM is a SUBREG,
2521                  we've set the inner register and must properly indicate
2522                  that.  */
2523
2524               tem = op0, nmode = mode;
2525               if (GET_CODE (accum) == SUBREG)
2526                 {
2527                   nmode = GET_MODE (SUBREG_REG (accum));
2528                   tem = gen_lowpart (nmode, op0);
2529                 }
2530
2531               insn = get_last_insn ();
2532               set_unique_reg_note (insn,
2533                                    REG_EQUAL,
2534                                    gen_rtx_MULT (nmode, tem,
2535                                                  GEN_INT (val_so_far)));
2536             }
2537
2538           if (variant == negate_variant)
2539             {
2540               val_so_far = - val_so_far;
2541               accum = expand_unop (mode, neg_optab, accum, target, 0);
2542             }
2543           else if (variant == add_variant)
2544             {
2545               val_so_far = val_so_far + 1;
2546               accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2547             }
2548
2549           if (val != val_so_far)
2550             abort ();
2551
2552           return accum;
2553         }
2554     }
2555
2556   /* This used to use umul_optab if unsigned, but for non-widening multiply
2557      there is no difference between signed and unsigned.  */
2558   op0 = expand_binop (mode,
2559                       ! unsignedp
2560                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2561                       ? smulv_optab : smul_optab,
2562                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2563   if (op0 == 0)
2564     abort ();
2565   return op0;
2566 }
2567 \f
2568 /* Return the smallest n such that 2**n >= X.  */
2569
2570 int
2571 ceil_log2 (x)
2572      unsigned HOST_WIDE_INT x;
2573 {
2574   return floor_log2 (x - 1) + 1;
2575 }
2576
2577 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2578    replace division by D, and put the least significant N bits of the result
2579    in *MULTIPLIER_PTR and return the most significant bit.
2580
2581    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2582    needed precision is in PRECISION (should be <= N).
2583
2584    PRECISION should be as small as possible so this function can choose
2585    multiplier more freely.
2586
2587    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2588    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2589
2590    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2591    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2592
2593 static
2594 unsigned HOST_WIDE_INT
2595 choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
2596      unsigned HOST_WIDE_INT d;
2597      int n;
2598      int precision;
2599      unsigned HOST_WIDE_INT *multiplier_ptr;
2600      int *post_shift_ptr;
2601      int *lgup_ptr;
2602 {
2603   HOST_WIDE_INT mhigh_hi, mlow_hi;
2604   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2605   int lgup, post_shift;
2606   int pow, pow2;
2607   unsigned HOST_WIDE_INT nl, dummy1;
2608   HOST_WIDE_INT nh, dummy2;
2609
2610   /* lgup = ceil(log2(divisor)); */
2611   lgup = ceil_log2 (d);
2612
2613   if (lgup > n)
2614     abort ();
2615
2616   pow = n + lgup;
2617   pow2 = n + lgup - precision;
2618
2619   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2620     {
2621       /* We could handle this with some effort, but this case is much better
2622          handled directly with a scc insn, so rely on caller using that.  */
2623       abort ();
2624     }
2625
2626   /* mlow = 2^(N + lgup)/d */
2627  if (pow >= HOST_BITS_PER_WIDE_INT)
2628     {
2629       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2630       nl = 0;
2631     }
2632   else
2633     {
2634       nh = 0;
2635       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2636     }
2637   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2638                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2639
2640   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2641   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2642     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2643   else
2644     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2645   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2646                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2647
2648   if (mhigh_hi && nh - d >= d)
2649     abort ();
2650   if (mhigh_hi > 1 || mlow_hi > 1)
2651     abort ();
2652   /* assert that mlow < mhigh.  */
2653   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2654     abort ();
2655
2656   /* If precision == N, then mlow, mhigh exceed 2^N
2657      (but they do not exceed 2^(N+1)).  */
2658
2659   /* Reduce to lowest terms */
2660   for (post_shift = lgup; post_shift > 0; post_shift--)
2661     {
2662       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2663       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2664       if (ml_lo >= mh_lo)
2665         break;
2666
2667       mlow_hi = 0;
2668       mlow_lo = ml_lo;
2669       mhigh_hi = 0;
2670       mhigh_lo = mh_lo;
2671     }
2672
2673   *post_shift_ptr = post_shift;
2674   *lgup_ptr = lgup;
2675   if (n < HOST_BITS_PER_WIDE_INT)
2676     {
2677       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2678       *multiplier_ptr = mhigh_lo & mask;
2679       return mhigh_lo >= mask;
2680     }
2681   else
2682     {
2683       *multiplier_ptr = mhigh_lo;
2684       return mhigh_hi;
2685     }
2686 }
2687
2688 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2689    congruent to 1 (mod 2**N).  */
2690
2691 static unsigned HOST_WIDE_INT
2692 invert_mod2n (x, n)
2693      unsigned HOST_WIDE_INT x;
2694      int n;
2695 {
2696   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2697
2698   /* The algorithm notes that the choice y = x satisfies
2699      x*y == 1 mod 2^3, since x is assumed odd.
2700      Each iteration doubles the number of bits of significance in y.  */
2701
2702   unsigned HOST_WIDE_INT mask;
2703   unsigned HOST_WIDE_INT y = x;
2704   int nbit = 3;
2705
2706   mask = (n == HOST_BITS_PER_WIDE_INT
2707           ? ~(unsigned HOST_WIDE_INT) 0
2708           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2709
2710   while (nbit < n)
2711     {
2712       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2713       nbit *= 2;
2714     }
2715   return y;
2716 }
2717
2718 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2719    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2720    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2721    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2722    become signed.
2723
2724    The result is put in TARGET if that is convenient.
2725
2726    MODE is the mode of operation.  */
2727
2728 rtx
2729 expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
2730      enum machine_mode mode;
2731      rtx adj_operand, op0, op1, target;
2732      int unsignedp;
2733 {
2734   rtx tem;
2735   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2736
2737   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2738                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2739                       NULL_RTX, 0);
2740   tem = expand_and (mode, tem, op1, NULL_RTX);
2741   adj_operand
2742     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2743                      adj_operand);
2744
2745   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2746                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2747                       NULL_RTX, 0);
2748   tem = expand_and (mode, tem, op0, NULL_RTX);
2749   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2750                           target);
2751
2752   return target;
2753 }
2754
2755 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2756    in TARGET if that is convenient, and return where the result is.  If the
2757    operation can not be performed, 0 is returned.
2758
2759    MODE is the mode of operation and result.
2760
2761    UNSIGNEDP nonzero means unsigned multiply.
2762
2763    MAX_COST is the total allowed cost for the expanded RTL.  */
2764
2765 rtx
2766 expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
2767      enum machine_mode mode;
2768      rtx op0, target;
2769      unsigned HOST_WIDE_INT cnst1;
2770      int unsignedp;
2771      int max_cost;
2772 {
2773   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2774   optab mul_highpart_optab;
2775   optab moptab;
2776   rtx tem;
2777   int size = GET_MODE_BITSIZE (mode);
2778   rtx op1, wide_op1;
2779
2780   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2781   if (size > HOST_BITS_PER_WIDE_INT)
2782     abort ();
2783
2784   op1 = gen_int_mode (cnst1, mode);
2785
2786   wide_op1
2787     = immed_double_const (cnst1,
2788                           (unsignedp
2789                            ? (HOST_WIDE_INT) 0
2790                            : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2791                           wider_mode);
2792
2793   /* expand_mult handles constant multiplication of word_mode
2794      or narrower.  It does a poor job for large modes.  */
2795   if (size < BITS_PER_WORD
2796       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2797     {
2798       /* We have to do this, since expand_binop doesn't do conversion for
2799          multiply.  Maybe change expand_binop to handle widening multiply?  */
2800       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2801
2802       /* We know that this can't have signed overflow, so pretend this is
2803          an unsigned multiply.  */
2804       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
2805       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2806                           build_int_2 (size, 0), NULL_RTX, 1);
2807       return convert_modes (mode, wider_mode, tem, unsignedp);
2808     }
2809
2810   if (target == 0)
2811     target = gen_reg_rtx (mode);
2812
2813   /* Firstly, try using a multiplication insn that only generates the needed
2814      high part of the product, and in the sign flavor of unsignedp.  */
2815   if (mul_highpart_cost[(int) mode] < max_cost)
2816     {
2817       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2818       target = expand_binop (mode, mul_highpart_optab,
2819                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2820       if (target)
2821         return target;
2822     }
2823
2824   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2825      Need to adjust the result after the multiplication.  */
2826   if (size - 1 < BITS_PER_WORD
2827       && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
2828           < max_cost))
2829     {
2830       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2831       target = expand_binop (mode, mul_highpart_optab,
2832                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2833       if (target)
2834         /* We used the wrong signedness.  Adjust the result.  */
2835         return expand_mult_highpart_adjust (mode, target, op0,
2836                                             op1, target, unsignedp);
2837     }
2838
2839   /* Try widening multiplication.  */
2840   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2841   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2842       && mul_widen_cost[(int) wider_mode] < max_cost)
2843     {
2844       op1 = force_reg (mode, op1);
2845       goto try;
2846     }
2847
2848   /* Try widening the mode and perform a non-widening multiplication.  */
2849   moptab = smul_optab;
2850   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2851       && size - 1 < BITS_PER_WORD
2852       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2853     {
2854       op1 = wide_op1;
2855       goto try;
2856     }
2857
2858   /* Try widening multiplication of opposite signedness, and adjust.  */
2859   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2860   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2861       && size - 1 < BITS_PER_WORD
2862       && (mul_widen_cost[(int) wider_mode]
2863           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2864     {
2865       rtx regop1 = force_reg (mode, op1);
2866       tem = expand_binop (wider_mode, moptab, op0, regop1,
2867                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2868       if (tem != 0)
2869         {
2870           /* Extract the high half of the just generated product.  */
2871           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2872                               build_int_2 (size, 0), NULL_RTX, 1);
2873           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2874           /* We used the wrong signedness.  Adjust the result.  */
2875           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2876                                               target, unsignedp);
2877         }
2878     }
2879
2880   return 0;
2881
2882  try:
2883   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2884   tem = expand_binop (wider_mode, moptab, op0, op1,
2885                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2886   if (tem == 0)
2887     return 0;
2888
2889   /* Extract the high half of the just generated product.  */
2890   if (mode == word_mode)
2891     {
2892       return gen_highpart (mode, tem);
2893     }
2894   else
2895     {
2896       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2897                           build_int_2 (size, 0), NULL_RTX, 1);
2898       return convert_modes (mode, wider_mode, tem, unsignedp);
2899     }
2900 }
2901 \f
2902 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2903    if that is convenient, and returning where the result is.
2904    You may request either the quotient or the remainder as the result;
2905    specify REM_FLAG nonzero to get the remainder.
2906
2907    CODE is the expression code for which kind of division this is;
2908    it controls how rounding is done.  MODE is the machine mode to use.
2909    UNSIGNEDP nonzero means do unsigned division.  */
2910
2911 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2912    and then correct it by or'ing in missing high bits
2913    if result of ANDI is nonzero.
2914    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2915    This could optimize to a bfexts instruction.
2916    But C doesn't use these operations, so their optimizations are
2917    left for later.  */
2918 /* ??? For modulo, we don't actually need the highpart of the first product,
2919    the low part will do nicely.  And for small divisors, the second multiply
2920    can also be a low-part only multiply or even be completely left out.
2921    E.g. to calculate the remainder of a division by 3 with a 32 bit
2922    multiply, multiply with 0x55555556 and extract the upper two bits;
2923    the result is exact for inputs up to 0x1fffffff.
2924    The input range can be reduced by using cross-sum rules.
2925    For odd divisors >= 3, the following table gives right shift counts
2926    so that if an number is shifted by an integer multiple of the given
2927    amount, the remainder stays the same:
2928    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
2929    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
2930    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
2931    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
2932    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
2933
2934    Cross-sum rules for even numbers can be derived by leaving as many bits
2935    to the right alone as the divisor has zeros to the right.
2936    E.g. if x is an unsigned 32 bit number:
2937    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
2938    */
2939
2940 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2941
2942 rtx
2943 expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
2944      int rem_flag;
2945      enum tree_code code;
2946      enum machine_mode mode;
2947      rtx op0, op1, target;
2948      int unsignedp;
2949 {
2950   enum machine_mode compute_mode;
2951   rtx tquotient;
2952   rtx quotient = 0, remainder = 0;
2953   rtx last;
2954   int size;
2955   rtx insn, set;
2956   optab optab1, optab2;
2957   int op1_is_constant, op1_is_pow2;
2958   int max_cost, extra_cost;
2959   static HOST_WIDE_INT last_div_const = 0;
2960
2961   op1_is_constant = GET_CODE (op1) == CONST_INT;
2962   op1_is_pow2 = (op1_is_constant
2963                  && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2964                       || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1))))));
2965
2966   /*
2967      This is the structure of expand_divmod:
2968
2969      First comes code to fix up the operands so we can perform the operations
2970      correctly and efficiently.
2971
2972      Second comes a switch statement with code specific for each rounding mode.
2973      For some special operands this code emits all RTL for the desired
2974      operation, for other cases, it generates only a quotient and stores it in
2975      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2976      to indicate that it has not done anything.
2977
2978      Last comes code that finishes the operation.  If QUOTIENT is set and
2979      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2980      QUOTIENT is not set, it is computed using trunc rounding.
2981
2982      We try to generate special code for division and remainder when OP1 is a
2983      constant.  If |OP1| = 2**n we can use shifts and some other fast
2984      operations.  For other values of OP1, we compute a carefully selected
2985      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2986      by m.
2987
2988      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2989      half of the product.  Different strategies for generating the product are
2990      implemented in expand_mult_highpart.
2991
2992      If what we actually want is the remainder, we generate that by another
2993      by-constant multiplication and a subtraction.  */
2994
2995   /* We shouldn't be called with OP1 == const1_rtx, but some of the
2996      code below will malfunction if we are, so check here and handle
2997      the special case if so.  */
2998   if (op1 == const1_rtx)
2999     return rem_flag ? const0_rtx : op0;
3000
3001     /* When dividing by -1, we could get an overflow.
3002      negv_optab can handle overflows.  */
3003   if (! unsignedp && op1 == constm1_rtx)
3004     {
3005       if (rem_flag)
3006         return const0_rtx;
3007       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3008                           ? negv_optab : neg_optab, op0, target, 0);
3009     }
3010
3011   if (target
3012       /* Don't use the function value register as a target
3013          since we have to read it as well as write it,
3014          and function-inlining gets confused by this.  */
3015       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3016           /* Don't clobber an operand while doing a multi-step calculation.  */
3017           || ((rem_flag || op1_is_constant)
3018               && (reg_mentioned_p (target, op0)
3019                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
3020           || reg_mentioned_p (target, op1)
3021           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
3022     target = 0;
3023
3024   /* Get the mode in which to perform this computation.  Normally it will
3025      be MODE, but sometimes we can't do the desired operation in MODE.
3026      If so, pick a wider mode in which we can do the operation.  Convert
3027      to that mode at the start to avoid repeated conversions.
3028
3029      First see what operations we need.  These depend on the expression
3030      we are evaluating.  (We assume that divxx3 insns exist under the
3031      same conditions that modxx3 insns and that these insns don't normally
3032      fail.  If these assumptions are not correct, we may generate less
3033      efficient code in some cases.)
3034
3035      Then see if we find a mode in which we can open-code that operation
3036      (either a division, modulus, or shift).  Finally, check for the smallest
3037      mode for which we can do the operation with a library call.  */
3038
3039   /* We might want to refine this now that we have division-by-constant
3040      optimization.  Since expand_mult_highpart tries so many variants, it is
3041      not straightforward to generalize this.  Maybe we should make an array
3042      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3043
3044   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3045             ? (unsignedp ? lshr_optab : ashr_optab)
3046             : (unsignedp ? udiv_optab : sdiv_optab));
3047   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3048             ? optab1
3049             : (unsignedp ? udivmod_optab : sdivmod_optab));
3050
3051   for (compute_mode = mode; compute_mode != VOIDmode;
3052        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3053     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
3054         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
3055       break;
3056
3057   if (compute_mode == VOIDmode)
3058     for (compute_mode = mode; compute_mode != VOIDmode;
3059          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3060       if (optab1->handlers[(int) compute_mode].libfunc
3061           || optab2->handlers[(int) compute_mode].libfunc)
3062         break;
3063
3064   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3065      in expand_binop.  */
3066   if (compute_mode == VOIDmode)
3067     compute_mode = mode;
3068
3069   if (target && GET_MODE (target) == compute_mode)
3070     tquotient = target;
3071   else
3072     tquotient = gen_reg_rtx (compute_mode);
3073
3074   size = GET_MODE_BITSIZE (compute_mode);
3075 #if 0
3076   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3077      (mode), and thereby get better code when OP1 is a constant.  Do that
3078      later.  It will require going over all usages of SIZE below.  */
3079   size = GET_MODE_BITSIZE (mode);
3080 #endif
3081
3082   /* Only deduct something for a REM if the last divide done was
3083      for a different constant.   Then set the constant of the last
3084      divide.  */
3085   max_cost = div_cost[(int) compute_mode]
3086     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3087                       && INTVAL (op1) == last_div_const)
3088        ? mul_cost[(int) compute_mode] + add_cost : 0);
3089
3090   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3091
3092   /* Now convert to the best mode to use.  */
3093   if (compute_mode != mode)
3094     {
3095       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3096       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3097
3098       /* convert_modes may have placed op1 into a register, so we
3099          must recompute the following.  */
3100       op1_is_constant = GET_CODE (op1) == CONST_INT;
3101       op1_is_pow2 = (op1_is_constant
3102                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3103                           || (! unsignedp
3104                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3105     }
3106
3107   /* If one of the operands is a volatile MEM, copy it into a register.  */
3108
3109   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3110     op0 = force_reg (compute_mode, op0);
3111   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3112     op1 = force_reg (compute_mode, op1);
3113
3114   /* If we need the remainder or if OP1 is constant, we need to
3115      put OP0 in a register in case it has any queued subexpressions.  */
3116   if (rem_flag || op1_is_constant)
3117     op0 = force_reg (compute_mode, op0);
3118
3119   last = get_last_insn ();
3120
3121   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3122   if (unsignedp)
3123     {
3124       if (code == FLOOR_DIV_EXPR)
3125         code = TRUNC_DIV_EXPR;
3126       if (code == FLOOR_MOD_EXPR)
3127         code = TRUNC_MOD_EXPR;
3128       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3129         code = TRUNC_DIV_EXPR;
3130     }
3131
3132   if (op1 != const0_rtx)
3133     switch (code)
3134       {
3135       case TRUNC_MOD_EXPR:
3136       case TRUNC_DIV_EXPR:
3137         if (op1_is_constant)
3138           {
3139             if (unsignedp)
3140               {
3141                 unsigned HOST_WIDE_INT mh, ml;
3142                 int pre_shift, post_shift;
3143                 int dummy;
3144                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3145
3146                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3147                   {
3148                     pre_shift = floor_log2 (d);
3149                     if (rem_flag)
3150                       {
3151                         remainder
3152                           = expand_binop (compute_mode, and_optab, op0,
3153                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3154                                           remainder, 1,
3155                                           OPTAB_LIB_WIDEN);
3156                         if (remainder)
3157                           return gen_lowpart (mode, remainder);
3158                       }
3159                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3160                                              build_int_2 (pre_shift, 0),
3161                                              tquotient, 1);
3162                   }
3163                 else if (size <= HOST_BITS_PER_WIDE_INT)
3164                   {
3165                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3166                       {
3167                         /* Most significant bit of divisor is set; emit an scc
3168                            insn.  */
3169                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3170                                                     compute_mode, 1, 1);
3171                         if (quotient == 0)
3172                           goto fail1;
3173                       }
3174                     else
3175                       {
3176                         /* Find a suitable multiplier and right shift count
3177                            instead of multiplying with D.  */
3178
3179                         mh = choose_multiplier (d, size, size,
3180                                                 &ml, &post_shift, &dummy);
3181
3182                         /* If the suggested multiplier is more than SIZE bits,
3183                            we can do better for even divisors, using an
3184                            initial right shift.  */
3185                         if (mh != 0 && (d & 1) == 0)
3186                           {
3187                             pre_shift = floor_log2 (d & -d);
3188                             mh = choose_multiplier (d >> pre_shift, size,
3189                                                     size - pre_shift,
3190                                                     &ml, &post_shift, &dummy);
3191                             if (mh)
3192                               abort ();
3193                           }
3194                         else
3195                           pre_shift = 0;
3196
3197                         if (mh != 0)
3198                           {
3199                             rtx t1, t2, t3, t4;
3200
3201                             if (post_shift - 1 >= BITS_PER_WORD)
3202                               goto fail1;
3203
3204                             extra_cost = (shift_cost[post_shift - 1]
3205                                           + shift_cost[1] + 2 * add_cost);
3206                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3207                                                        NULL_RTX, 1,
3208                                                        max_cost - extra_cost);
3209                             if (t1 == 0)
3210                               goto fail1;
3211                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3212                                                                op0, t1),
3213                                                 NULL_RTX);
3214                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3215                                                build_int_2 (1, 0), NULL_RTX,1);
3216                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3217                                                               t1, t3),
3218                                                 NULL_RTX);
3219                             quotient
3220                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3221                                               build_int_2 (post_shift - 1, 0),
3222                                               tquotient, 1);
3223                           }
3224                         else
3225                           {
3226                             rtx t1, t2;
3227
3228                             if (pre_shift >= BITS_PER_WORD
3229                                 || post_shift >= BITS_PER_WORD)
3230                               goto fail1;
3231
3232                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3233                                                build_int_2 (pre_shift, 0),
3234                                                NULL_RTX, 1);
3235                             extra_cost = (shift_cost[pre_shift]
3236                                           + shift_cost[post_shift]);
3237                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3238                                                        NULL_RTX, 1,
3239                                                        max_cost - extra_cost);
3240                             if (t2 == 0)
3241                               goto fail1;
3242                             quotient
3243                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3244                                               build_int_2 (post_shift, 0),
3245                                               tquotient, 1);
3246                           }
3247                       }
3248                   }
3249                 else            /* Too wide mode to use tricky code */
3250                   break;
3251
3252                 insn = get_last_insn ();
3253                 if (insn != last
3254                     && (set = single_set (insn)) != 0
3255                     && SET_DEST (set) == quotient)
3256                   set_unique_reg_note (insn,
3257                                        REG_EQUAL,
3258                                        gen_rtx_UDIV (compute_mode, op0, op1));
3259               }
3260             else                /* TRUNC_DIV, signed */
3261               {
3262                 unsigned HOST_WIDE_INT ml;
3263                 int lgup, post_shift;
3264                 HOST_WIDE_INT d = INTVAL (op1);
3265                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3266
3267                 /* n rem d = n rem -d */
3268                 if (rem_flag && d < 0)
3269                   {
3270                     d = abs_d;
3271                     op1 = gen_int_mode (abs_d, compute_mode);
3272                   }
3273
3274                 if (d == 1)
3275                   quotient = op0;
3276                 else if (d == -1)
3277                   quotient = expand_unop (compute_mode, neg_optab, op0,
3278                                           tquotient, 0);
3279                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3280                   {
3281                     /* This case is not handled correctly below.  */
3282                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3283                                                 compute_mode, 1, 1);
3284                     if (quotient == 0)
3285                       goto fail1;
3286                   }
3287                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3288                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap)
3289                          /* ??? The cheap metric is computed only for
3290                             word_mode.  If this operation is wider, this may
3291                             not be so.  Assume true if the optab has an
3292                             expander for this mode.  */
3293                          && (((rem_flag ? smod_optab : sdiv_optab)
3294                               ->handlers[(int) compute_mode].insn_code
3295                               != CODE_FOR_nothing)
3296                              || (sdivmod_optab->handlers[(int) compute_mode]
3297                                  .insn_code != CODE_FOR_nothing)))
3298                   ;
3299                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3300                   {
3301                     lgup = floor_log2 (abs_d);
3302                     if (BRANCH_COST < 1 || (abs_d != 2 && BRANCH_COST < 3))
3303                       {
3304                         rtx label = gen_label_rtx ();
3305                         rtx t1;
3306
3307                         t1 = copy_to_mode_reg (compute_mode, op0);
3308                         do_cmp_and_jump (t1, const0_rtx, GE,
3309                                          compute_mode, label);
3310                         expand_inc (t1, gen_int_mode (abs_d - 1,
3311                                                       compute_mode));
3312                         emit_label (label);
3313                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3314                                                  build_int_2 (lgup, 0),
3315                                                  tquotient, 0);
3316                       }
3317                     else
3318                       {
3319                         rtx t1, t2, t3;
3320                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3321                                            build_int_2 (size - 1, 0),
3322                                            NULL_RTX, 0);
3323                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3324                                            build_int_2 (size - lgup, 0),
3325                                            NULL_RTX, 1);
3326                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3327                                                           op0, t2),
3328                                             NULL_RTX);
3329                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3330                                                  build_int_2 (lgup, 0),
3331                                                  tquotient, 0);
3332                       }
3333
3334                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3335                        the quotient.  */
3336                     if (d < 0)
3337                       {
3338                         insn = get_last_insn ();
3339                         if (insn != last
3340                             && (set = single_set (insn)) != 0
3341                             && SET_DEST (set) == quotient
3342                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3343                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3344                           set_unique_reg_note (insn,
3345                                                REG_EQUAL,
3346                                                gen_rtx_DIV (compute_mode,
3347                                                             op0,
3348                                                             GEN_INT
3349                                                             (trunc_int_for_mode
3350                                                              (abs_d,
3351                                                               compute_mode))));
3352
3353                         quotient = expand_unop (compute_mode, neg_optab,
3354                                                 quotient, quotient, 0);
3355                       }
3356                   }
3357                 else if (size <= HOST_BITS_PER_WIDE_INT)
3358                   {
3359                     choose_multiplier (abs_d, size, size - 1,
3360                                        &ml, &post_shift, &lgup);
3361                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3362                       {
3363                         rtx t1, t2, t3;
3364
3365                         if (post_shift >= BITS_PER_WORD
3366                             || size - 1 >= BITS_PER_WORD)
3367                           goto fail1;
3368
3369                         extra_cost = (shift_cost[post_shift]
3370                                       + shift_cost[size - 1] + add_cost);
3371                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3372                                                    NULL_RTX, 0,
3373                                                    max_cost - extra_cost);
3374                         if (t1 == 0)
3375                           goto fail1;
3376                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3377                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3378                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3379                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3380                         if (d < 0)
3381                           quotient
3382                             = force_operand (gen_rtx_MINUS (compute_mode,
3383                                                             t3, t2),
3384                                              tquotient);
3385                         else
3386                           quotient
3387                             = force_operand (gen_rtx_MINUS (compute_mode,
3388                                                             t2, t3),
3389                                              tquotient);
3390                       }
3391                     else
3392                       {
3393                         rtx t1, t2, t3, t4;
3394
3395                         if (post_shift >= BITS_PER_WORD
3396                             || size - 1 >= BITS_PER_WORD)
3397                           goto fail1;
3398
3399                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3400                         extra_cost = (shift_cost[post_shift]
3401                                       + shift_cost[size - 1] + 2 * add_cost);
3402                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3403                                                    NULL_RTX, 0,
3404                                                    max_cost - extra_cost);
3405                         if (t1 == 0)
3406                           goto fail1;
3407                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3408                                                           t1, op0),
3409                                             NULL_RTX);
3410                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3411                                            build_int_2 (post_shift, 0),
3412                                            NULL_RTX, 0);
3413                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3414                                            build_int_2 (size - 1, 0),
3415                                            NULL_RTX, 0);
3416                         if (d < 0)
3417                           quotient
3418                             = force_operand (gen_rtx_MINUS (compute_mode,
3419                                                             t4, t3),
3420                                              tquotient);
3421                         else
3422                           quotient
3423                             = force_operand (gen_rtx_MINUS (compute_mode,
3424                                                             t3, t4),
3425                                              tquotient);
3426                       }
3427                   }
3428                 else            /* Too wide mode to use tricky code */
3429                   break;
3430
3431                 insn = get_last_insn ();
3432                 if (insn != last
3433                     && (set = single_set (insn)) != 0
3434                     && SET_DEST (set) == quotient)
3435                   set_unique_reg_note (insn,
3436                                        REG_EQUAL,
3437                                        gen_rtx_DIV (compute_mode, op0, op1));
3438               }
3439             break;
3440           }
3441       fail1:
3442         delete_insns_since (last);
3443         break;
3444
3445       case FLOOR_DIV_EXPR:
3446       case FLOOR_MOD_EXPR:
3447       /* We will come here only for signed operations.  */
3448         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3449           {
3450             unsigned HOST_WIDE_INT mh, ml;
3451             int pre_shift, lgup, post_shift;
3452             HOST_WIDE_INT d = INTVAL (op1);
3453
3454             if (d > 0)
3455               {
3456                 /* We could just as easily deal with negative constants here,
3457                    but it does not seem worth the trouble for GCC 2.6.  */
3458                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3459                   {
3460                     pre_shift = floor_log2 (d);
3461                     if (rem_flag)
3462                       {
3463                         remainder = expand_binop (compute_mode, and_optab, op0,
3464                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3465                                                   remainder, 0, OPTAB_LIB_WIDEN);
3466                         if (remainder)
3467                           return gen_lowpart (mode, remainder);
3468                       }
3469                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3470                                              build_int_2 (pre_shift, 0),
3471                                              tquotient, 0);
3472                   }
3473                 else
3474                   {
3475                     rtx t1, t2, t3, t4;
3476
3477                     mh = choose_multiplier (d, size, size - 1,
3478                                             &ml, &post_shift, &lgup);
3479                     if (mh)
3480                       abort ();
3481
3482                     if (post_shift < BITS_PER_WORD
3483                         && size - 1 < BITS_PER_WORD)
3484                       {
3485                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3486                                            build_int_2 (size - 1, 0),
3487                                            NULL_RTX, 0);
3488                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3489                                            NULL_RTX, 0, OPTAB_WIDEN);
3490                         extra_cost = (shift_cost[post_shift]
3491                                       + shift_cost[size - 1] + 2 * add_cost);
3492                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3493                                                    NULL_RTX, 1,
3494                                                    max_cost - extra_cost);
3495                         if (t3 != 0)
3496                           {
3497                             t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3498                                                build_int_2 (post_shift, 0),
3499                                                NULL_RTX, 1);
3500                             quotient = expand_binop (compute_mode, xor_optab,
3501                                                      t4, t1, tquotient, 0,
3502                                                      OPTAB_WIDEN);
3503                           }
3504                       }
3505                   }
3506               }
3507             else
3508               {
3509                 rtx nsign, t1, t2, t3, t4;
3510                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3511                                                   op0, constm1_rtx), NULL_RTX);
3512                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3513                                    0, OPTAB_WIDEN);
3514                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3515                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3516                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3517                                     NULL_RTX);
3518                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3519                                     NULL_RTX, 0);
3520                 if (t4)
3521                   {
3522                     rtx t5;
3523                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3524                                       NULL_RTX, 0);
3525                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3526                                                             t4, t5),
3527                                               tquotient);
3528                   }
3529               }
3530           }
3531
3532         if (quotient != 0)
3533           break;
3534         delete_insns_since (last);
3535
3536         /* Try using an instruction that produces both the quotient and
3537            remainder, using truncation.  We can easily compensate the quotient
3538            or remainder to get floor rounding, once we have the remainder.
3539            Notice that we compute also the final remainder value here,
3540            and return the result right away.  */
3541         if (target == 0 || GET_MODE (target) != compute_mode)
3542           target = gen_reg_rtx (compute_mode);
3543
3544         if (rem_flag)
3545           {
3546             remainder
3547               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3548             quotient = gen_reg_rtx (compute_mode);
3549           }
3550         else
3551           {
3552             quotient
3553               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3554             remainder = gen_reg_rtx (compute_mode);
3555           }
3556
3557         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3558                                  quotient, remainder, 0))
3559           {
3560             /* This could be computed with a branch-less sequence.
3561                Save that for later.  */
3562             rtx tem;
3563             rtx label = gen_label_rtx ();
3564             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3565             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3566                                 NULL_RTX, 0, OPTAB_WIDEN);
3567             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3568             expand_dec (quotient, const1_rtx);
3569             expand_inc (remainder, op1);
3570             emit_label (label);
3571             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3572           }
3573
3574         /* No luck with division elimination or divmod.  Have to do it
3575            by conditionally adjusting op0 *and* the result.  */
3576         {
3577           rtx label1, label2, label3, label4, label5;
3578           rtx adjusted_op0;
3579           rtx tem;
3580
3581           quotient = gen_reg_rtx (compute_mode);
3582           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3583           label1 = gen_label_rtx ();
3584           label2 = gen_label_rtx ();
3585           label3 = gen_label_rtx ();
3586           label4 = gen_label_rtx ();
3587           label5 = gen_label_rtx ();
3588           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3589           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3590           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3591                               quotient, 0, OPTAB_LIB_WIDEN);
3592           if (tem != quotient)
3593             emit_move_insn (quotient, tem);
3594           emit_jump_insn (gen_jump (label5));
3595           emit_barrier ();
3596           emit_label (label1);
3597           expand_inc (adjusted_op0, const1_rtx);
3598           emit_jump_insn (gen_jump (label4));
3599           emit_barrier ();
3600           emit_label (label2);
3601           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3602           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3603                               quotient, 0, OPTAB_LIB_WIDEN);
3604           if (tem != quotient)
3605             emit_move_insn (quotient, tem);
3606           emit_jump_insn (gen_jump (label5));
3607           emit_barrier ();
3608           emit_label (label3);
3609           expand_dec (adjusted_op0, const1_rtx);
3610           emit_label (label4);
3611           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3612                               quotient, 0, OPTAB_LIB_WIDEN);
3613           if (tem != quotient)
3614             emit_move_insn (quotient, tem);
3615           expand_dec (quotient, const1_rtx);
3616           emit_label (label5);
3617         }
3618         break;
3619
3620       case CEIL_DIV_EXPR:
3621       case CEIL_MOD_EXPR:
3622         if (unsignedp)
3623           {
3624             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3625               {
3626                 rtx t1, t2, t3;
3627                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3628                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3629                                    build_int_2 (floor_log2 (d), 0),
3630                                    tquotient, 1);
3631                 t2 = expand_binop (compute_mode, and_optab, op0,
3632                                    GEN_INT (d - 1),
3633                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3634                 t3 = gen_reg_rtx (compute_mode);
3635                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3636                                       compute_mode, 1, 1);
3637                 if (t3 == 0)
3638                   {
3639                     rtx lab;
3640                     lab = gen_label_rtx ();
3641                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3642                     expand_inc (t1, const1_rtx);
3643                     emit_label (lab);
3644                     quotient = t1;
3645                   }
3646                 else
3647                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3648                                                           t1, t3),
3649                                             tquotient);
3650                 break;
3651               }
3652
3653             /* Try using an instruction that produces both the quotient and
3654                remainder, using truncation.  We can easily compensate the
3655                quotient or remainder to get ceiling rounding, once we have the
3656                remainder.  Notice that we compute also the final remainder
3657                value here, and return the result right away.  */
3658             if (target == 0 || GET_MODE (target) != compute_mode)
3659               target = gen_reg_rtx (compute_mode);
3660
3661             if (rem_flag)
3662               {
3663                 remainder = (GET_CODE (target) == REG
3664                              ? target : gen_reg_rtx (compute_mode));
3665                 quotient = gen_reg_rtx (compute_mode);
3666               }
3667             else
3668               {
3669                 quotient = (GET_CODE (target) == REG
3670                             ? target : gen_reg_rtx (compute_mode));
3671                 remainder = gen_reg_rtx (compute_mode);
3672               }
3673
3674             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3675                                      remainder, 1))
3676               {
3677                 /* This could be computed with a branch-less sequence.
3678                    Save that for later.  */
3679                 rtx label = gen_label_rtx ();
3680                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3681                                  compute_mode, label);
3682                 expand_inc (quotient, const1_rtx);
3683                 expand_dec (remainder, op1);
3684                 emit_label (label);
3685                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3686               }
3687
3688             /* No luck with division elimination or divmod.  Have to do it
3689                by conditionally adjusting op0 *and* the result.  */
3690             {
3691               rtx label1, label2;
3692               rtx adjusted_op0, tem;
3693
3694               quotient = gen_reg_rtx (compute_mode);
3695               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3696               label1 = gen_label_rtx ();
3697               label2 = gen_label_rtx ();
3698               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3699                                compute_mode, label1);
3700               emit_move_insn  (quotient, const0_rtx);
3701               emit_jump_insn (gen_jump (label2));
3702               emit_barrier ();
3703               emit_label (label1);
3704               expand_dec (adjusted_op0, const1_rtx);
3705               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3706                                   quotient, 1, OPTAB_LIB_WIDEN);
3707               if (tem != quotient)
3708                 emit_move_insn (quotient, tem);
3709               expand_inc (quotient, const1_rtx);
3710               emit_label (label2);
3711             }
3712           }
3713         else /* signed */
3714           {
3715             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3716                 && INTVAL (op1) >= 0)
3717               {
3718                 /* This is extremely similar to the code for the unsigned case
3719                    above.  For 2.7 we should merge these variants, but for
3720                    2.6.1 I don't want to touch the code for unsigned since that
3721                    get used in C.  The signed case will only be used by other
3722                    languages (Ada).  */
3723
3724                 rtx t1, t2, t3;
3725                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3726                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3727                                    build_int_2 (floor_log2 (d), 0),
3728                                    tquotient, 0);
3729                 t2 = expand_binop (compute_mode, and_optab, op0,
3730                                    GEN_INT (d - 1),
3731                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3732                 t3 = gen_reg_rtx (compute_mode);
3733                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3734                                       compute_mode, 1, 1);
3735                 if (t3 == 0)
3736                   {
3737                     rtx lab;
3738                     lab = gen_label_rtx ();
3739                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3740                     expand_inc (t1, const1_rtx);
3741                     emit_label (lab);
3742                     quotient = t1;
3743                   }
3744                 else
3745                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3746                                                           t1, t3),
3747                                             tquotient);
3748                 break;
3749               }
3750
3751             /* Try using an instruction that produces both the quotient and
3752                remainder, using truncation.  We can easily compensate the
3753                quotient or remainder to get ceiling rounding, once we have the
3754                remainder.  Notice that we compute also the final remainder
3755                value here, and return the result right away.  */
3756             if (target == 0 || GET_MODE (target) != compute_mode)
3757               target = gen_reg_rtx (compute_mode);
3758             if (rem_flag)
3759               {
3760                 remainder= (GET_CODE (target) == REG
3761                             ? target : gen_reg_rtx (compute_mode));
3762                 quotient = gen_reg_rtx (compute_mode);
3763               }
3764             else
3765               {
3766                 quotient = (GET_CODE (target) == REG
3767                             ? target : gen_reg_rtx (compute_mode));
3768                 remainder = gen_reg_rtx (compute_mode);
3769               }
3770
3771             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3772                                      remainder, 0))
3773               {
3774                 /* This could be computed with a branch-less sequence.
3775                    Save that for later.  */
3776                 rtx tem;
3777                 rtx label = gen_label_rtx ();
3778                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3779                                  compute_mode, label);
3780                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3781                                     NULL_RTX, 0, OPTAB_WIDEN);
3782                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3783                 expand_inc (quotient, const1_rtx);
3784                 expand_dec (remainder, op1);
3785                 emit_label (label);
3786                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3787               }
3788
3789             /* No luck with division elimination or divmod.  Have to do it
3790                by conditionally adjusting op0 *and* the result.  */
3791             {
3792               rtx label1, label2, label3, label4, label5;
3793               rtx adjusted_op0;
3794               rtx tem;
3795
3796               quotient = gen_reg_rtx (compute_mode);
3797               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3798               label1 = gen_label_rtx ();
3799               label2 = gen_label_rtx ();
3800               label3 = gen_label_rtx ();
3801               label4 = gen_label_rtx ();
3802               label5 = gen_label_rtx ();
3803               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3804               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3805                                compute_mode, label1);
3806               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3807                                   quotient, 0, OPTAB_LIB_WIDEN);
3808               if (tem != quotient)
3809                 emit_move_insn (quotient, tem);
3810               emit_jump_insn (gen_jump (label5));
3811               emit_barrier ();
3812               emit_label (label1);
3813               expand_dec (adjusted_op0, const1_rtx);
3814               emit_jump_insn (gen_jump (label4));
3815               emit_barrier ();
3816               emit_label (label2);
3817               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3818                                compute_mode, label3);
3819               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3820                                   quotient, 0, OPTAB_LIB_WIDEN);
3821               if (tem != quotient)
3822                 emit_move_insn (quotient, tem);
3823               emit_jump_insn (gen_jump (label5));
3824               emit_barrier ();
3825               emit_label (label3);
3826               expand_inc (adjusted_op0, const1_rtx);
3827               emit_label (label4);
3828               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3829                                   quotient, 0, OPTAB_LIB_WIDEN);
3830               if (tem != quotient)
3831                 emit_move_insn (quotient, tem);
3832               expand_inc (quotient, const1_rtx);
3833               emit_label (label5);
3834             }
3835           }
3836         break;
3837
3838       case EXACT_DIV_EXPR:
3839         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3840           {
3841             HOST_WIDE_INT d = INTVAL (op1);
3842             unsigned HOST_WIDE_INT ml;
3843             int pre_shift;
3844             rtx t1;
3845
3846             pre_shift = floor_log2 (d & -d);
3847             ml = invert_mod2n (d >> pre_shift, size);
3848             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3849                                build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
3850             quotient = expand_mult (compute_mode, t1,
3851                                     gen_int_mode (ml, compute_mode),
3852                                     NULL_RTX, 0);
3853
3854             insn = get_last_insn ();
3855             set_unique_reg_note (insn,
3856                                  REG_EQUAL,
3857                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3858                                                  compute_mode,
3859                                                  op0, op1));
3860           }
3861         break;
3862
3863       case ROUND_DIV_EXPR:
3864       case ROUND_MOD_EXPR:
3865         if (unsignedp)
3866           {
3867             rtx tem;
3868             rtx label;
3869             label = gen_label_rtx ();
3870             quotient = gen_reg_rtx (compute_mode);
3871             remainder = gen_reg_rtx (compute_mode);
3872             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3873               {
3874                 rtx tem;
3875                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3876                                          quotient, 1, OPTAB_LIB_WIDEN);
3877                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3878                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3879                                           remainder, 1, OPTAB_LIB_WIDEN);
3880               }
3881             tem = plus_constant (op1, -1);
3882             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3883                                 build_int_2 (1, 0), NULL_RTX, 1);
3884             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3885             expand_inc (quotient, const1_rtx);
3886             expand_dec (remainder, op1);
3887             emit_label (label);
3888           }
3889         else
3890           {
3891             rtx abs_rem, abs_op1, tem, mask;
3892             rtx label;
3893             label = gen_label_rtx ();
3894             quotient = gen_reg_rtx (compute_mode);
3895             remainder = gen_reg_rtx (compute_mode);
3896             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3897               {
3898                 rtx tem;
3899                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3900                                          quotient, 0, OPTAB_LIB_WIDEN);
3901                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3902                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3903                                           remainder, 0, OPTAB_LIB_WIDEN);
3904               }
3905             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
3906             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
3907             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3908                                 build_int_2 (1, 0), NULL_RTX, 1);
3909             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3910             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3911                                 NULL_RTX, 0, OPTAB_WIDEN);
3912             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3913                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3914             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3915                                 NULL_RTX, 0, OPTAB_WIDEN);
3916             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3917                                 NULL_RTX, 0, OPTAB_WIDEN);
3918             expand_inc (quotient, tem);
3919             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3920                                 NULL_RTX, 0, OPTAB_WIDEN);
3921             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3922                                 NULL_RTX, 0, OPTAB_WIDEN);
3923             expand_dec (remainder, tem);
3924             emit_label (label);
3925           }
3926         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3927
3928       default:
3929         abort ();
3930       }
3931
3932   if (quotient == 0)
3933     {
3934       if (target && GET_MODE (target) != compute_mode)
3935         target = 0;
3936
3937       if (rem_flag)
3938         {
3939           /* Try to produce the remainder without producing the quotient.
3940              If we seem to have a divmod pattern that does not require widening,
3941              don't try widening here.  We should really have an WIDEN argument
3942              to expand_twoval_binop, since what we'd really like to do here is
3943              1) try a mod insn in compute_mode
3944              2) try a divmod insn in compute_mode
3945              3) try a div insn in compute_mode and multiply-subtract to get
3946                 remainder
3947              4) try the same things with widening allowed.  */
3948           remainder
3949             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3950                                  op0, op1, target,
3951                                  unsignedp,
3952                                  ((optab2->handlers[(int) compute_mode].insn_code
3953                                    != CODE_FOR_nothing)
3954                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
3955           if (remainder == 0)
3956             {
3957               /* No luck there.  Can we do remainder and divide at once
3958                  without a library call?  */
3959               remainder = gen_reg_rtx (compute_mode);
3960               if (! expand_twoval_binop ((unsignedp
3961                                           ? udivmod_optab
3962                                           : sdivmod_optab),
3963                                          op0, op1,
3964                                          NULL_RTX, remainder, unsignedp))
3965                 remainder = 0;
3966             }
3967
3968           if (remainder)
3969             return gen_lowpart (mode, remainder);
3970         }
3971
3972       /* Produce the quotient.  Try a quotient insn, but not a library call.
3973          If we have a divmod in this mode, use it in preference to widening
3974          the div (for this test we assume it will not fail). Note that optab2
3975          is set to the one of the two optabs that the call below will use.  */
3976       quotient
3977         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3978                              op0, op1, rem_flag ? NULL_RTX : target,
3979                              unsignedp,
3980                              ((optab2->handlers[(int) compute_mode].insn_code
3981                                != CODE_FOR_nothing)
3982                               ? OPTAB_DIRECT : OPTAB_WIDEN));
3983
3984       if (quotient == 0)
3985         {
3986           /* No luck there.  Try a quotient-and-remainder insn,
3987              keeping the quotient alone.  */
3988           quotient = gen_reg_rtx (compute_mode);
3989           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3990                                      op0, op1,
3991                                      quotient, NULL_RTX, unsignedp))
3992             {
3993               quotient = 0;
3994               if (! rem_flag)
3995                 /* Still no luck.  If we are not computing the remainder,
3996                    use a library call for the quotient.  */
3997                 quotient = sign_expand_binop (compute_mode,
3998                                               udiv_optab, sdiv_optab,
3999                                               op0, op1, target,
4000                                               unsignedp, OPTAB_LIB_WIDEN);
4001             }
4002         }
4003     }
4004
4005   if (rem_flag)
4006     {
4007       if (target && GET_MODE (target) != compute_mode)
4008         target = 0;
4009
4010       if (quotient == 0)
4011         /* No divide instruction either.  Use library for remainder.  */
4012         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4013                                        op0, op1, target,
4014                                        unsignedp, OPTAB_LIB_WIDEN);
4015       else
4016         {
4017           /* We divided.  Now finish doing X - Y * (X / Y).  */
4018           remainder = expand_mult (compute_mode, quotient, op1,
4019                                    NULL_RTX, unsignedp);
4020           remainder = expand_binop (compute_mode, sub_optab, op0,
4021                                     remainder, target, unsignedp,
4022                                     OPTAB_LIB_WIDEN);
4023         }
4024     }
4025
4026   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4027 }
4028 \f
4029 /* Return a tree node with data type TYPE, describing the value of X.
4030    Usually this is an RTL_EXPR, if there is no obvious better choice.
4031    X may be an expression, however we only support those expressions
4032    generated by loop.c.  */
4033
4034 tree
4035 make_tree (type, x)
4036      tree type;
4037      rtx x;
4038 {
4039   tree t;
4040
4041   switch (GET_CODE (x))
4042     {
4043     case CONST_INT:
4044       t = build_int_2 (INTVAL (x),
4045                        (TREE_UNSIGNED (type)
4046                         && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
4047                        || INTVAL (x) >= 0 ? 0 : -1);
4048       TREE_TYPE (t) = type;
4049       return t;
4050
4051     case CONST_DOUBLE:
4052       if (GET_MODE (x) == VOIDmode)
4053         {
4054           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4055           TREE_TYPE (t) = type;
4056         }
4057       else
4058         {
4059           REAL_VALUE_TYPE d;
4060
4061           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4062           t = build_real (type, d);
4063         }
4064
4065       return t;
4066
4067     case CONST_VECTOR:
4068       {
4069         int i, units;
4070         rtx elt;
4071         tree t = NULL_TREE;
4072
4073         units = CONST_VECTOR_NUNITS (x);
4074
4075         /* Build a tree with vector elements.  */
4076         for (i = units - 1; i >= 0; --i)
4077           {
4078             elt = CONST_VECTOR_ELT (x, i);
4079             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4080           }
4081
4082         return build_vector (type, t);
4083       }
4084
4085     case PLUS:
4086       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4087                           make_tree (type, XEXP (x, 1))));
4088
4089     case MINUS:
4090       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4091                           make_tree (type, XEXP (x, 1))));
4092
4093     case NEG:
4094       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4095
4096     case MULT:
4097       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4098                           make_tree (type, XEXP (x, 1))));
4099
4100     case ASHIFT:
4101       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4102                           make_tree (type, XEXP (x, 1))));
4103
4104     case LSHIFTRT:
4105       t = (*lang_hooks.types.unsigned_type) (type);
4106       return fold (convert (type,
4107                             build (RSHIFT_EXPR, t,
4108                                    make_tree (t, XEXP (x, 0)),
4109                                    make_tree (type, XEXP (x, 1)))));
4110
4111     case ASHIFTRT:
4112       t = (*lang_hooks.types.signed_type) (type);
4113       return fold (convert (type,
4114                             build (RSHIFT_EXPR, t,
4115                                    make_tree (t, XEXP (x, 0)),
4116                                    make_tree (type, XEXP (x, 1)))));
4117
4118     case DIV:
4119       if (TREE_CODE (type) != REAL_TYPE)
4120         t = (*lang_hooks.types.signed_type) (type);
4121       else
4122         t = type;
4123
4124       return fold (convert (type,
4125                             build (TRUNC_DIV_EXPR, t,
4126                                    make_tree (t, XEXP (x, 0)),
4127                                    make_tree (t, XEXP (x, 1)))));
4128     case UDIV:
4129       t = (*lang_hooks.types.unsigned_type) (type);
4130       return fold (convert (type,
4131                             build (TRUNC_DIV_EXPR, t,
4132                                    make_tree (t, XEXP (x, 0)),
4133                                    make_tree (t, XEXP (x, 1)))));
4134
4135     case SIGN_EXTEND:
4136     case ZERO_EXTEND:
4137       t = (*lang_hooks.types.type_for_mode) (GET_MODE (XEXP (x, 0)),
4138                                              GET_CODE (x) == ZERO_EXTEND);
4139       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4140
4141    default:
4142       t = make_node (RTL_EXPR);
4143       TREE_TYPE (t) = type;
4144
4145 #ifdef POINTERS_EXTEND_UNSIGNED
4146       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4147          ptr_mode.  So convert.  */
4148       if (POINTER_TYPE_P (type) && GET_MODE (x) != TYPE_MODE (type))
4149         x = convert_memory_address (TYPE_MODE (type), x);
4150 #endif
4151
4152       RTL_EXPR_RTL (t) = x;
4153       /* There are no insns to be output
4154          when this rtl_expr is used.  */
4155       RTL_EXPR_SEQUENCE (t) = 0;
4156       return t;
4157     }
4158 }
4159
4160 /* Check whether the multiplication X * MULT + ADD overflows.
4161    X, MULT and ADD must be CONST_*.
4162    MODE is the machine mode for the computation.
4163    X and MULT must have mode MODE.  ADD may have a different mode.
4164    So can X (defaults to same as MODE).
4165    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4166
4167 bool
4168 const_mult_add_overflow_p (x, mult, add, mode, unsignedp)
4169      rtx x, mult, add;
4170      enum machine_mode mode;
4171      int unsignedp;
4172 {
4173   tree type, mult_type, add_type, result;
4174
4175   type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4176
4177   /* In order to get a proper overflow indication from an unsigned
4178      type, we have to pretend that it's a sizetype.  */
4179   mult_type = type;
4180   if (unsignedp)
4181     {
4182       mult_type = copy_node (type);
4183       TYPE_IS_SIZETYPE (mult_type) = 1;
4184     }
4185
4186   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4187               : (*lang_hooks.types.type_for_mode) (GET_MODE (add), unsignedp));
4188
4189   result = fold (build (PLUS_EXPR, mult_type,
4190                         fold (build (MULT_EXPR, mult_type,
4191                                      make_tree (mult_type, x),
4192                                      make_tree (mult_type, mult))),
4193                         make_tree (add_type, add)));
4194
4195   return TREE_CONSTANT_OVERFLOW (result);
4196 }
4197
4198 /* Return an rtx representing the value of X * MULT + ADD.
4199    TARGET is a suggestion for where to store the result (an rtx).
4200    MODE is the machine mode for the computation.
4201    X and MULT must have mode MODE.  ADD may have a different mode.
4202    So can X (defaults to same as MODE).
4203    UNSIGNEDP is nonzero to do unsigned multiplication.
4204    This may emit insns.  */
4205
4206 rtx
4207 expand_mult_add (x, target, mult, add, mode, unsignedp)
4208      rtx x, target, mult, add;
4209      enum machine_mode mode;
4210      int unsignedp;
4211 {
4212   tree type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4213   tree add_type = (GET_MODE (add) == VOIDmode
4214                    ? type: (*lang_hooks.types.type_for_mode) (GET_MODE (add),
4215                                                               unsignedp));
4216   tree result =  fold (build (PLUS_EXPR, type,
4217                               fold (build (MULT_EXPR, type,
4218                                            make_tree (type, x),
4219                                            make_tree (type, mult))),
4220                               make_tree (add_type, add)));
4221
4222   return expand_expr (result, target, VOIDmode, 0);
4223 }
4224 \f
4225 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4226    and returning TARGET.
4227
4228    If TARGET is 0, a pseudo-register or constant is returned.  */
4229
4230 rtx
4231 expand_and (mode, op0, op1, target)
4232      enum machine_mode mode;
4233      rtx op0, op1, target;
4234 {
4235   rtx tem = 0;
4236
4237   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4238     tem = simplify_binary_operation (AND, mode, op0, op1);
4239   if (tem == 0)
4240     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4241
4242   if (target == 0)
4243     target = tem;
4244   else if (tem != target)
4245     emit_move_insn (target, tem);
4246   return target;
4247 }
4248 \f
4249 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4250    and storing in TARGET.  Normally return TARGET.
4251    Return 0 if that cannot be done.
4252
4253    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4254    it is VOIDmode, they cannot both be CONST_INT.
4255
4256    UNSIGNEDP is for the case where we have to widen the operands
4257    to perform the operation.  It says to use zero-extension.
4258
4259    NORMALIZEP is 1 if we should convert the result to be either zero
4260    or one.  Normalize is -1 if we should convert the result to be
4261    either zero or -1.  If NORMALIZEP is zero, the result will be left
4262    "raw" out of the scc insn.  */
4263
4264 rtx
4265 emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
4266      rtx target;
4267      enum rtx_code code;
4268      rtx op0, op1;
4269      enum machine_mode mode;
4270      int unsignedp;
4271      int normalizep;
4272 {
4273   rtx subtarget;
4274   enum insn_code icode;
4275   enum machine_mode compare_mode;
4276   enum machine_mode target_mode = GET_MODE (target);
4277   rtx tem;
4278   rtx last = get_last_insn ();
4279   rtx pattern, comparison;
4280
4281   /* ??? Ok to do this and then fail? */
4282   op0 = protect_from_queue (op0, 0);
4283   op1 = protect_from_queue (op1, 0);
4284
4285   if (unsignedp)
4286     code = unsigned_condition (code);
4287
4288   /* If one operand is constant, make it the second one.  Only do this
4289      if the other operand is not constant as well.  */
4290
4291   if (swap_commutative_operands_p (op0, op1))
4292     {
4293       tem = op0;
4294       op0 = op1;
4295       op1 = tem;
4296       code = swap_condition (code);
4297     }
4298
4299   if (mode == VOIDmode)
4300     mode = GET_MODE (op0);
4301
4302   /* For some comparisons with 1 and -1, we can convert this to
4303      comparisons with zero.  This will often produce more opportunities for
4304      store-flag insns.  */
4305
4306   switch (code)
4307     {
4308     case LT:
4309       if (op1 == const1_rtx)
4310         op1 = const0_rtx, code = LE;
4311       break;
4312     case LE:
4313       if (op1 == constm1_rtx)
4314         op1 = const0_rtx, code = LT;
4315       break;
4316     case GE:
4317       if (op1 == const1_rtx)
4318         op1 = const0_rtx, code = GT;
4319       break;
4320     case GT:
4321       if (op1 == constm1_rtx)
4322         op1 = const0_rtx, code = GE;
4323       break;
4324     case GEU:
4325       if (op1 == const1_rtx)
4326         op1 = const0_rtx, code = NE;
4327       break;
4328     case LTU:
4329       if (op1 == const1_rtx)
4330         op1 = const0_rtx, code = EQ;
4331       break;
4332     default:
4333       break;
4334     }
4335
4336   /* If we are comparing a double-word integer with zero, we can convert
4337      the comparison into one involving a single word.  */
4338   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4339       && GET_MODE_CLASS (mode) == MODE_INT
4340       && op1 == const0_rtx
4341       && (GET_CODE (op0) != MEM || ! MEM_VOLATILE_P (op0)))
4342     {
4343       if (code == EQ || code == NE)
4344         {
4345           /* Do a logical OR of the two words and compare the result.  */
4346           rtx op0h = gen_highpart (word_mode, op0);
4347           rtx op0l = gen_lowpart (word_mode, op0);
4348           rtx op0both = expand_binop (word_mode, ior_optab, op0h, op0l,
4349                                       NULL_RTX, unsignedp, OPTAB_DIRECT);
4350           if (op0both != 0)
4351             return emit_store_flag (target, code, op0both, op1, word_mode,
4352                                     unsignedp, normalizep);
4353         }
4354       else if (code == LT || code == GE)
4355         /* If testing the sign bit, can just test on high word.  */
4356         return emit_store_flag (target, code, gen_highpart (word_mode, op0),
4357                                 op1, word_mode, unsignedp, normalizep);
4358     }
4359
4360   /* From now on, we won't change CODE, so set ICODE now.  */
4361   icode = setcc_gen_code[(int) code];
4362
4363   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4364      complement of A (for GE) and shifting the sign bit to the low bit.  */
4365   if (op1 == const0_rtx && (code == LT || code == GE)
4366       && GET_MODE_CLASS (mode) == MODE_INT
4367       && (normalizep || STORE_FLAG_VALUE == 1
4368           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4369               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4370                   == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4371     {
4372       subtarget = target;
4373
4374       /* If the result is to be wider than OP0, it is best to convert it
4375          first.  If it is to be narrower, it is *incorrect* to convert it
4376          first.  */
4377       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4378         {
4379           op0 = protect_from_queue (op0, 0);
4380           op0 = convert_modes (target_mode, mode, op0, 0);
4381           mode = target_mode;
4382         }
4383
4384       if (target_mode != mode)
4385         subtarget = 0;
4386
4387       if (code == GE)
4388         op0 = expand_unop (mode, one_cmpl_optab, op0,
4389                            ((STORE_FLAG_VALUE == 1 || normalizep)
4390                             ? 0 : subtarget), 0);
4391
4392       if (STORE_FLAG_VALUE == 1 || normalizep)
4393         /* If we are supposed to produce a 0/1 value, we want to do
4394            a logical shift from the sign bit to the low-order bit; for
4395            a -1/0 value, we do an arithmetic shift.  */
4396         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4397                             size_int (GET_MODE_BITSIZE (mode) - 1),
4398                             subtarget, normalizep != -1);
4399
4400       if (mode != target_mode)
4401         op0 = convert_modes (target_mode, mode, op0, 0);
4402
4403       return op0;
4404     }
4405
4406   if (icode != CODE_FOR_nothing)
4407     {
4408       insn_operand_predicate_fn pred;
4409
4410       /* We think we may be able to do this with a scc insn.  Emit the
4411          comparison and then the scc insn.
4412
4413          compare_from_rtx may call emit_queue, which would be deleted below
4414          if the scc insn fails.  So call it ourselves before setting LAST.
4415          Likewise for do_pending_stack_adjust.  */
4416
4417       emit_queue ();
4418       do_pending_stack_adjust ();
4419       last = get_last_insn ();
4420
4421       comparison
4422         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4423       if (GET_CODE (comparison) == CONST_INT)
4424         return (comparison == const0_rtx ? const0_rtx
4425                 : normalizep == 1 ? const1_rtx
4426                 : normalizep == -1 ? constm1_rtx
4427                 : const_true_rtx);
4428
4429       /* The code of COMPARISON may not match CODE if compare_from_rtx
4430          decided to swap its operands and reverse the original code.
4431
4432          We know that compare_from_rtx returns either a CONST_INT or
4433          a new comparison code, so it is safe to just extract the
4434          code from COMPARISON.  */
4435       code = GET_CODE (comparison);
4436
4437       /* Get a reference to the target in the proper mode for this insn.  */
4438       compare_mode = insn_data[(int) icode].operand[0].mode;
4439       subtarget = target;
4440       pred = insn_data[(int) icode].operand[0].predicate;
4441       if (preserve_subexpressions_p ()
4442           || ! (*pred) (subtarget, compare_mode))
4443         subtarget = gen_reg_rtx (compare_mode);
4444
4445       pattern = GEN_FCN (icode) (subtarget);
4446       if (pattern)
4447         {
4448           emit_insn (pattern);
4449
4450           /* If we are converting to a wider mode, first convert to
4451              TARGET_MODE, then normalize.  This produces better combining
4452              opportunities on machines that have a SIGN_EXTRACT when we are
4453              testing a single bit.  This mostly benefits the 68k.
4454
4455              If STORE_FLAG_VALUE does not have the sign bit set when
4456              interpreted in COMPARE_MODE, we can do this conversion as
4457              unsigned, which is usually more efficient.  */
4458           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4459             {
4460               convert_move (target, subtarget,
4461                             (GET_MODE_BITSIZE (compare_mode)
4462                              <= HOST_BITS_PER_WIDE_INT)
4463                             && 0 == (STORE_FLAG_VALUE
4464                                      & ((HOST_WIDE_INT) 1
4465                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4466               op0 = target;
4467               compare_mode = target_mode;
4468             }
4469           else
4470             op0 = subtarget;
4471
4472           /* If we want to keep subexpressions around, don't reuse our
4473              last target.  */
4474
4475           if (preserve_subexpressions_p ())
4476             subtarget = 0;
4477
4478           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4479              we don't have to do anything.  */
4480           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4481             ;
4482           /* STORE_FLAG_VALUE might be the most negative number, so write
4483              the comparison this way to avoid a compiler-time warning.  */
4484           else if (- normalizep == STORE_FLAG_VALUE)
4485             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4486
4487           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4488              makes it hard to use a value of just the sign bit due to
4489              ANSI integer constant typing rules.  */
4490           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4491                    && (STORE_FLAG_VALUE
4492                        & ((HOST_WIDE_INT) 1
4493                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4494             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4495                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4496                                 subtarget, normalizep == 1);
4497           else if (STORE_FLAG_VALUE & 1)
4498             {
4499               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4500               if (normalizep == -1)
4501                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4502             }
4503           else
4504             abort ();
4505
4506           /* If we were converting to a smaller mode, do the
4507              conversion now.  */
4508           if (target_mode != compare_mode)
4509             {
4510               convert_move (target, op0, 0);
4511               return target;
4512             }
4513           else
4514             return op0;
4515         }
4516     }
4517
4518   delete_insns_since (last);
4519
4520   /* If expensive optimizations, use different pseudo registers for each
4521      insn, instead of reusing the same pseudo.  This leads to better CSE,
4522      but slows down the compiler, since there are more pseudos */
4523   subtarget = (!flag_expensive_optimizations
4524                && (target_mode == mode)) ? target : NULL_RTX;
4525
4526   /* If we reached here, we can't do this with a scc insn.  However, there
4527      are some comparisons that can be done directly.  For example, if
4528      this is an equality comparison of integers, we can try to exclusive-or
4529      (or subtract) the two operands and use a recursive call to try the
4530      comparison with zero.  Don't do any of these cases if branches are
4531      very cheap.  */
4532
4533   if (BRANCH_COST > 0
4534       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4535       && op1 != const0_rtx)
4536     {
4537       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4538                           OPTAB_WIDEN);
4539
4540       if (tem == 0)
4541         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4542                             OPTAB_WIDEN);
4543       if (tem != 0)
4544         tem = emit_store_flag (target, code, tem, const0_rtx,
4545                                mode, unsignedp, normalizep);
4546       if (tem == 0)
4547         delete_insns_since (last);
4548       return tem;
4549     }
4550
4551   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4552      the constant zero.  Reject all other comparisons at this point.  Only
4553      do LE and GT if branches are expensive since they are expensive on
4554      2-operand machines.  */
4555
4556   if (BRANCH_COST == 0
4557       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4558       || (code != EQ && code != NE
4559           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4560     return 0;
4561
4562   /* See what we need to return.  We can only return a 1, -1, or the
4563      sign bit.  */
4564
4565   if (normalizep == 0)
4566     {
4567       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4568         normalizep = STORE_FLAG_VALUE;
4569
4570       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4571                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4572                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4573         ;
4574       else
4575         return 0;
4576     }
4577
4578   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4579      do the necessary operation below.  */
4580
4581   tem = 0;
4582
4583   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4584      the sign bit set.  */
4585
4586   if (code == LE)
4587     {
4588       /* This is destructive, so SUBTARGET can't be OP0.  */
4589       if (rtx_equal_p (subtarget, op0))
4590         subtarget = 0;
4591
4592       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4593                           OPTAB_WIDEN);
4594       if (tem)
4595         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4596                             OPTAB_WIDEN);
4597     }
4598
4599   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4600      number of bits in the mode of OP0, minus one.  */
4601
4602   if (code == GT)
4603     {
4604       if (rtx_equal_p (subtarget, op0))
4605         subtarget = 0;
4606
4607       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4608                           size_int (GET_MODE_BITSIZE (mode) - 1),
4609                           subtarget, 0);
4610       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4611                           OPTAB_WIDEN);
4612     }
4613
4614   if (code == EQ || code == NE)
4615     {
4616       /* For EQ or NE, one way to do the comparison is to apply an operation
4617          that converts the operand into a positive number if it is nonzero
4618          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4619          for NE we negate.  This puts the result in the sign bit.  Then we
4620          normalize with a shift, if needed.
4621
4622          Two operations that can do the above actions are ABS and FFS, so try
4623          them.  If that doesn't work, and MODE is smaller than a full word,
4624          we can use zero-extension to the wider mode (an unsigned conversion)
4625          as the operation.  */
4626
4627       /* Note that ABS doesn't yield a positive number for INT_MIN, but
4628          that is compensated by the subsequent overflow when subtracting
4629          one / negating.  */
4630
4631       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4632         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4633       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4634         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4635       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4636         {
4637           op0 = protect_from_queue (op0, 0);
4638           tem = convert_modes (word_mode, mode, op0, 1);
4639           mode = word_mode;
4640         }
4641
4642       if (tem != 0)
4643         {
4644           if (code == EQ)
4645             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4646                                 0, OPTAB_WIDEN);
4647           else
4648             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4649         }
4650
4651       /* If we couldn't do it that way, for NE we can "or" the two's complement
4652          of the value with itself.  For EQ, we take the one's complement of
4653          that "or", which is an extra insn, so we only handle EQ if branches
4654          are expensive.  */
4655
4656       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4657         {
4658           if (rtx_equal_p (subtarget, op0))
4659             subtarget = 0;
4660
4661           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4662           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4663                               OPTAB_WIDEN);
4664
4665           if (tem && code == EQ)
4666             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4667         }
4668     }
4669
4670   if (tem && normalizep)
4671     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4672                         size_int (GET_MODE_BITSIZE (mode) - 1),
4673                         subtarget, normalizep == 1);
4674
4675   if (tem)
4676     {
4677       if (GET_MODE (tem) != target_mode)
4678         {
4679           convert_move (target, tem, 0);
4680           tem = target;
4681         }
4682       else if (!subtarget)
4683         {
4684           emit_move_insn (target, tem);
4685           tem = target;
4686         }
4687     }
4688   else
4689     delete_insns_since (last);
4690
4691   return tem;
4692 }
4693
4694 /* Like emit_store_flag, but always succeeds.  */
4695
4696 rtx
4697 emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
4698      rtx target;
4699      enum rtx_code code;
4700      rtx op0, op1;
4701      enum machine_mode mode;
4702      int unsignedp;
4703      int normalizep;
4704 {
4705   rtx tem, label;
4706
4707   /* First see if emit_store_flag can do the job.  */
4708   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4709   if (tem != 0)
4710     return tem;
4711
4712   if (normalizep == 0)
4713     normalizep = 1;
4714
4715   /* If this failed, we have to do this with set/compare/jump/set code.  */
4716
4717   if (GET_CODE (target) != REG
4718       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4719     target = gen_reg_rtx (GET_MODE (target));
4720
4721   emit_move_insn (target, const1_rtx);
4722   label = gen_label_rtx ();
4723   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
4724                            NULL_RTX, label);
4725
4726   emit_move_insn (target, const0_rtx);
4727   emit_label (label);
4728
4729   return target;
4730 }
4731 \f
4732 /* Perform possibly multi-word comparison and conditional jump to LABEL
4733    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4734
4735    The algorithm is based on the code in expr.c:do_jump.
4736
4737    Note that this does not perform a general comparison.  Only variants
4738    generated within expmed.c are correctly handled, others abort (but could
4739    be handled if needed).  */
4740
4741 static void
4742 do_cmp_and_jump (arg1, arg2, op, mode, label)
4743      rtx arg1, arg2, label;
4744      enum rtx_code op;
4745      enum machine_mode mode;
4746 {
4747   /* If this mode is an integer too wide to compare properly,
4748      compare word by word.  Rely on cse to optimize constant cases.  */
4749
4750   if (GET_MODE_CLASS (mode) == MODE_INT
4751       && ! can_compare_p (op, mode, ccp_jump))
4752     {
4753       rtx label2 = gen_label_rtx ();
4754
4755       switch (op)
4756         {
4757         case LTU:
4758           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4759           break;
4760
4761         case LEU:
4762           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4763           break;
4764
4765         case LT:
4766           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4767           break;
4768
4769         case GT:
4770           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4771           break;
4772
4773         case GE:
4774           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4775           break;
4776
4777           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
4778              that's the only equality operations we do */
4779         case EQ:
4780           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4781             abort ();
4782           do_jump_by_parts_equality_rtx (arg1, label2, label);
4783           break;
4784
4785         case NE:
4786           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4787             abort ();
4788           do_jump_by_parts_equality_rtx (arg1, label, label2);
4789           break;
4790
4791         default:
4792           abort ();
4793         }
4794
4795       emit_label (label2);
4796     }
4797   else
4798     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
4799 }