gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 88, 89, 92, 93, 94, 1995 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include "config.h"
  24 #include "rtl.h"
  25 #include "tree.h"
  26 #include "flags.h"
  27 #include "insn-flags.h"
  28 #include "insn-codes.h"
  29 #include "insn-config.h"
  30 #include "expr.h"
  31 #include "real.h"
  32 #include "recog.h"
  33
  34 static void store_fixed_bit_field       PROTO((rtx, int, int, int, rtx, int));
  35 static void store_split_bit_field       PROTO((rtx, int, int, rtx, int));
  36 static rtx extract_fixed_bit_field      PROTO((enum machine_mode, rtx, int,
  37                                                int, int, rtx, int, int));
  38 static rtx mask_rtx                     PROTO((enum machine_mode, int,
  39                                                int, int));
  40 static rtx lshift_value                 PROTO((enum machine_mode, rtx,
  41                                                int, int));
  42 static rtx extract_split_bit_field      PROTO((rtx, int, int, int, int));
  43
  44 #define CEIL(x,y) (((x) + (y) - 1) / (y))
  45
  46 /* Non-zero means divides or modulus operations are relatively cheap for
  47    powers of two, so don't use branches; emit the operation instead.
  48    Usually, this will mean that the MD file will emit non-branch
  49    sequences.  */
  50
  51 static int sdiv_pow2_cheap, smod_pow2_cheap;
  52
  53 #ifndef SLOW_UNALIGNED_ACCESS
  54 #define SLOW_UNALIGNED_ACCESS STRICT_ALIGNMENT
  55 #endif
  56
  57 /* For compilers that support multiple targets with different word sizes,
  58    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  59    is the H8/300(H) compiler.  */
  60
  61 #ifndef MAX_BITS_PER_WORD
  62 #define MAX_BITS_PER_WORD BITS_PER_WORD
  63 #endif
  64
  65 /* Cost of various pieces of RTL.  Note that some of these are indexed by shift count,
  66    and some by mode.  */
  67 static int add_cost, negate_cost, zero_cost;
  68 static int shift_cost[MAX_BITS_PER_WORD];
  69 static int shiftadd_cost[MAX_BITS_PER_WORD];
  70 static int shiftsub_cost[MAX_BITS_PER_WORD];
  71 static int mul_cost[NUM_MACHINE_MODES];
  72 static int div_cost[NUM_MACHINE_MODES];
  73 static int mul_widen_cost[NUM_MACHINE_MODES];
  74 static int mul_highpart_cost[NUM_MACHINE_MODES];
  75
  76 void
  77 init_expmed ()
  78 {
  79   char *free_point;
  80   /* This is "some random pseudo register" for purposes of calling recog
  81      to see what insns exist.  */
  82   rtx reg = gen_rtx (REG, word_mode, 10000);
  83   rtx shift_insn, shiftadd_insn, shiftsub_insn;
  84   int dummy;
  85   int m;
  86   enum machine_mode mode, wider_mode;
  87
  88   start_sequence ();
  89
  90   /* Since we are on the permanent obstack, we must be sure we save this
  91      spot AFTER we call start_sequence, since it will reuse the rtl it
  92      makes.  */
  93
  94   free_point = (char *) oballoc (0);
  95
  96   zero_cost = rtx_cost (const0_rtx, 0);
  97   add_cost = rtx_cost (gen_rtx (PLUS, word_mode, reg, reg), SET);
  98
  99   shift_insn = emit_insn (gen_rtx (SET, VOIDmode, reg,
 100                                    gen_rtx (ASHIFT, word_mode, reg,
 101                                             const0_rtx)));
 102
 103   shiftadd_insn = emit_insn (gen_rtx (SET, VOIDmode, reg,
 104                                       gen_rtx (PLUS, word_mode,
 105                                                gen_rtx (MULT, word_mode,
 106                                                         reg, const0_rtx),
 107                                                reg)));
 108
 109   shiftsub_insn = emit_insn (gen_rtx (SET, VOIDmode, reg,
 110                                       gen_rtx (MINUS, word_mode,
 111                                                gen_rtx (MULT, word_mode,
 112                                                          reg, const0_rtx),
 113                                                 reg)));
 114
 115   init_recog ();
 116
 117   shift_cost[0] = 0;
 118   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 119
 120   for (m = 1; m < BITS_PER_WORD; m++)
 121     {
 122       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 123
 124       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 125       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 126         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 127
 128       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1)
 129         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 130       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 131         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 132
 133       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1)
 134         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 135       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 136         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 137     }
 138
 139   negate_cost = rtx_cost (gen_rtx (NEG, word_mode, reg), SET);
 140
 141   sdiv_pow2_cheap
 142     = (rtx_cost (gen_rtx (DIV, word_mode, reg, GEN_INT (32)), SET)
 143        <= 2 * add_cost);
 144   smod_pow2_cheap
 145     = (rtx_cost (gen_rtx (MOD, word_mode, reg, GEN_INT (32)), SET)
 146        <= 2 * add_cost);
 147
 148   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 149        mode != VOIDmode;
 150        mode = GET_MODE_WIDER_MODE (mode))
 151     {
 152       reg = gen_rtx (REG, mode, 10000);
 153       div_cost[(int) mode] = rtx_cost (gen_rtx (UDIV, mode, reg, reg), SET);
 154       mul_cost[(int) mode] = rtx_cost (gen_rtx (MULT, mode, reg, reg), SET);
 155       wider_mode = GET_MODE_WIDER_MODE (mode);
 156       if (wider_mode != VOIDmode)
 157         {
 158           mul_widen_cost[(int) wider_mode]
 159             = rtx_cost (gen_rtx (MULT, wider_mode,
 160                                  gen_rtx (ZERO_EXTEND, wider_mode, reg),
 161                                  gen_rtx (ZERO_EXTEND, wider_mode, reg)),
 162                         SET);
 163           mul_highpart_cost[(int) mode]
 164             = rtx_cost (gen_rtx (TRUNCATE, mode,
 165                                  gen_rtx (LSHIFTRT, wider_mode,
 166                                           gen_rtx (MULT, wider_mode,
 167                                                    gen_rtx (ZERO_EXTEND, wider_mode, reg),
 168                                                    gen_rtx (ZERO_EXTEND, wider_mode, reg)),
 169                                           GEN_INT (GET_MODE_BITSIZE (mode)))),
 170                         SET);
 171         }
 172     }
 173
 174   /* Free the objects we just allocated.  */
 175   end_sequence ();
 176   obfree (free_point);
 177 }
 178
 179 /* Return an rtx representing minus the value of X.
 180    MODE is the intended mode of the result,
 181    useful if X is a CONST_INT.  */
 182
 183 rtx
 184 negate_rtx (mode, x)
 185      enum machine_mode mode;
 186      rtx x;
 187 {
 188   if (GET_CODE (x) == CONST_INT)
 189     {
 190       HOST_WIDE_INT val = - INTVAL (x);
 191       if (GET_MODE_BITSIZE (mode) < HOST_BITS_PER_WIDE_INT)
 192         {
 193           /* Sign extend the value from the bits that are significant.  */
 194           if (val & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
 195             val |= (HOST_WIDE_INT) (-1) << GET_MODE_BITSIZE (mode);
 196           else
 197             val &= ((HOST_WIDE_INT) 1 << GET_MODE_BITSIZE (mode)) - 1;
 198         }
 199       return GEN_INT (val);
 200     }
 201   else
 202     return expand_unop (GET_MODE (x), neg_optab, x, NULL_RTX, 0);
 203 }
 204 \f
 205 /* Generate code to store value from rtx VALUE
 206    into a bit-field within structure STR_RTX
 207    containing BITSIZE bits starting at bit BITNUM.
 208    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 209    ALIGN is the alignment that STR_RTX is known to have, measured in bytes.
 210    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 211
 212 /* ??? Note that there are two different ideas here for how
 213    to determine the size to count bits within, for a register.
 214    One is BITS_PER_WORD, and the other is the size of operand 3
 215    of the insv pattern.  (The latter assumes that an n-bit machine
 216    will be able to insert bit fields up to n bits wide.)
 217    It isn't certain that either of these is right.
 218    extract_bit_field has the same quandary.  */
 219
 220 rtx
 221 store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
 222      rtx str_rtx;
 223      register int bitsize;
 224      int bitnum;
 225      enum machine_mode fieldmode;
 226      rtx value;
 227      int align;
 228      int total_size;
 229 {
 230   int unit = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 231   register int offset = bitnum / unit;
 232   register int bitpos = bitnum % unit;
 233   register rtx op0 = str_rtx;
 234
 235   if (GET_CODE (str_rtx) == MEM && ! MEM_IN_STRUCT_P (str_rtx))
 236     abort ();
 237
 238   /* Discount the part of the structure before the desired byte.
 239      We need to know how many bytes are safe to reference after it.  */
 240   if (total_size >= 0)
 241     total_size -= (bitpos / BIGGEST_ALIGNMENT
 242                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 243
 244   while (GET_CODE (op0) == SUBREG)
 245     {
 246       /* The following line once was done only if WORDS_BIG_ENDIAN,
 247          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 248          meaningful at a much higher level; when structures are copied
 249          between memory and regs, the higher-numbered regs
 250          always get higher addresses.  */
 251       offset += SUBREG_WORD (op0);
 252       /* We used to adjust BITPOS here, but now we do the whole adjustment
 253          right after the loop.  */
 254       op0 = SUBREG_REG (op0);
 255     }
 256
 257   /* If OP0 is a register, BITPOS must count within a word.
 258      But as we have it, it counts within whatever size OP0 now has.
 259      On a bigendian machine, these are not the same, so convert.  */
 260   if (BYTES_BIG_ENDIAN
 261       && GET_CODE (op0) != MEM
 262       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 263     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 264
 265   value = protect_from_queue (value, 0);
 266
 267   if (flag_force_mem)
 268     value = force_not_mem (value);
 269
 270   /* Note that the adjustment of BITPOS above has no effect on whether
 271      BITPOS is 0 in a REG bigger than a word.  */
 272   if (GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 273       && (GET_CODE (op0) != MEM
 274           || ! SLOW_UNALIGNED_ACCESS
 275           || (offset * BITS_PER_UNIT % bitsize == 0
 276               && align % GET_MODE_SIZE (fieldmode) == 0))
 277       && bitpos == 0 && bitsize == GET_MODE_BITSIZE (fieldmode))
 278     {
 279       /* Storing in a full-word or multi-word field in a register
 280          can be done with just SUBREG.  */
 281       if (GET_MODE (op0) != fieldmode)
 282         {
 283           if (GET_CODE (op0) == REG)
 284             op0 = gen_rtx (SUBREG, fieldmode, op0, offset);
 285           else
 286             op0 = change_address (op0, fieldmode,
 287                                   plus_constant (XEXP (op0, 0), offset));
 288         }
 289       emit_move_insn (op0, value);
 290       return value;
 291     }
 292
 293   /* Storing an lsb-aligned field in a register
 294      can be done with a movestrict instruction.  */
 295
 296   if (GET_CODE (op0) != MEM
 297       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 298       && bitsize == GET_MODE_BITSIZE (fieldmode)
 299       && (GET_MODE (op0) == fieldmode
 300           || (movstrict_optab->handlers[(int) fieldmode].insn_code
 301               != CODE_FOR_nothing)))
 302     {
 303       /* Get appropriate low part of the value being stored.  */
 304       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 305         value = gen_lowpart (fieldmode, value);
 306       else if (!(GET_CODE (value) == SYMBOL_REF
 307                  || GET_CODE (value) == LABEL_REF
 308                  || GET_CODE (value) == CONST))
 309         value = convert_to_mode (fieldmode, value, 0);
 310
 311       if (GET_MODE (op0) == fieldmode)
 312         emit_move_insn (op0, value);
 313       else
 314         {
 315           int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 316           if(! (*insn_operand_predicate[icode][1]) (value, fieldmode))
 317             value = copy_to_mode_reg (fieldmode, value);
 318           emit_insn (GEN_FCN (icode)
 319                    (gen_rtx (SUBREG, fieldmode, op0, offset), value));
 320         }
 321       return value;
 322     }
 323
 324   /* Handle fields bigger than a word.  */
 325
 326   if (bitsize > BITS_PER_WORD)
 327     {
 328       /* Here we transfer the words of the field
 329          in the order least significant first.
 330          This is because the most significant word is the one which may
 331          be less than full.
 332          However, only do that if the value is not BLKmode.  */
 333
 334       int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 335
 336       int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 337       int i;
 338
 339       /* This is the mode we must force value to, so that there will be enough
 340          subwords to extract.  Note that fieldmode will often (always?) be
 341          VOIDmode, because that is what store_field uses to indicate that this
 342          is a bit field, but passing VOIDmode to operand_subword_force will
 343          result in an abort.  */
 344       fieldmode = mode_for_size (nwords * BITS_PER_WORD, MODE_INT, 0);
 345
 346       for (i = 0; i < nwords; i++)
 347         {
 348           /* If I is 0, use the low-order word in both field and target;
 349              if I is 1, use the next to lowest word; and so on.  */
 350           int wordnum = (backwards ? nwords - i - 1 : i);
 351           int bit_offset = (backwards
 352                             ? MAX (bitsize - (i + 1) * BITS_PER_WORD, 0)
 353                             : i * BITS_PER_WORD);
 354           store_bit_field (op0, MIN (BITS_PER_WORD,
 355                                      bitsize - i * BITS_PER_WORD),
 356                            bitnum + bit_offset, word_mode,
 357                            operand_subword_force (value, wordnum,
 358                                                   (GET_MODE (value) == VOIDmode
 359                                                    ? fieldmode
 360                                                    : GET_MODE (value))),
 361                            align, total_size);
 362         }
 363       return value;
 364     }
 365
 366   /* From here on we can assume that the field to be stored in is
 367      a full-word (whatever type that is), since it is shorter than a word.  */
 368
 369   /* OFFSET is the number of words or bytes (UNIT says which)
 370      from STR_RTX to the first word or byte containing part of the field.  */
 371
 372   if (GET_CODE (op0) == REG)
 373     {
 374       if (offset != 0
 375           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 376         op0 = gen_rtx (SUBREG, TYPE_MODE (type_for_size (BITS_PER_WORD, 0)),
 377                        op0, offset);
 378       offset = 0;
 379     }
 380   else
 381     {
 382       op0 = protect_from_queue (op0, 1);
 383     }
 384
 385   /* If VALUE is a floating-point mode, access it as an integer of the
 386      corresponding size.  This can occur on a machine with 64 bit registers
 387      that uses SFmode for float.  This can also occur for unaligned float
 388      structure fields.  */
 389   if (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT)
 390     {
 391       if (GET_CODE (value) != REG)
 392         value = copy_to_reg (value);
 393       value = gen_rtx (SUBREG, word_mode, value, 0);
 394     }
 395
 396   /* Now OFFSET is nonzero only if OP0 is memory
 397      and is therefore always measured in bytes.  */
 398
 399 #ifdef HAVE_insv
 400   if (HAVE_insv
 401       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 402       /* Ensure insv's size is wide enough for this field.  */
 403       && (GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_insv][3])
 404           >= bitsize)
 405       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 406             && (bitsize + bitpos
 407                 > GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_insv][3]))))
 408     {
 409       int xbitpos = bitpos;
 410       rtx value1;
 411       rtx xop0 = op0;
 412       rtx last = get_last_insn ();
 413       rtx pat;
 414       enum machine_mode maxmode
 415         = insn_operand_mode[(int) CODE_FOR_insv][3];
 416
 417       int save_volatile_ok = volatile_ok;
 418       volatile_ok = 1;
 419
 420       /* If this machine's insv can only insert into a register, or if we
 421          are to force MEMs into a register, copy OP0 into a register and
 422          save it back later.  */
 423       if (GET_CODE (op0) == MEM
 424           && (flag_force_mem
 425               || ! ((*insn_operand_predicate[(int) CODE_FOR_insv][0])
 426                     (op0, VOIDmode))))
 427         {
 428           rtx tempreg;
 429           enum machine_mode bestmode;
 430
 431           /* Get the mode to use for inserting into this field.  If OP0 is
 432              BLKmode, get the smallest mode consistent with the alignment. If
 433              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 434              mode. Otherwise, use the smallest mode containing the field.  */
 435
 436           if (GET_MODE (op0) == BLKmode
 437               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 438             bestmode
 439               = get_best_mode (bitsize, bitnum, align * BITS_PER_UNIT, maxmode,
 440                                MEM_VOLATILE_P (op0));
 441           else
 442             bestmode = GET_MODE (op0);
 443
 444           if (bestmode == VOIDmode
 445               || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
 446             goto insv_loses;
 447
 448           /* Adjust address to point to the containing unit of that mode.  */
 449           unit = GET_MODE_BITSIZE (bestmode);
 450           /* Compute offset as multiple of this unit, counting in bytes.  */
 451           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 452           bitpos = bitnum % unit;
 453           op0 = change_address (op0, bestmode,
 454                                 plus_constant (XEXP (op0, 0), offset));
 455
 456           /* Fetch that unit, store the bitfield in it, then store the unit.  */
 457           tempreg = copy_to_reg (op0);
 458           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 459                            align, total_size);
 460           emit_move_insn (op0, tempreg);
 461           return value;
 462         }
 463       volatile_ok = save_volatile_ok;
 464
 465       /* Add OFFSET into OP0's address.  */
 466       if (GET_CODE (xop0) == MEM)
 467         xop0 = change_address (xop0, byte_mode,
 468                                plus_constant (XEXP (xop0, 0), offset));
 469
 470       /* If xop0 is a register, we need it in MAXMODE
 471          to make it acceptable to the format of insv.  */
 472       if (GET_CODE (xop0) == SUBREG)
 473         /* We can't just change the mode, because this might clobber op0,
 474            and we will need the original value of op0 if insv fails.  */
 475         xop0 = gen_rtx (SUBREG, maxmode, SUBREG_REG (xop0), SUBREG_WORD (xop0));
 476       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 477         xop0 = gen_rtx (SUBREG, maxmode, xop0, 0);
 478
 479       /* On big-endian machines, we count bits from the most significant.
 480          If the bit field insn does not, we must invert.  */
 481
 482       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 483         xbitpos = unit - bitsize - xbitpos;
 484
 485       /* We have been counting XBITPOS within UNIT.
 486          Count instead within the size of the register.  */
 487       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 488         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 489
 490       unit = GET_MODE_BITSIZE (maxmode);
 491
 492       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 493       value1 = value;
 494       if (GET_MODE (value) != maxmode)
 495         {
 496           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 497             {
 498               /* Optimization: Don't bother really extending VALUE
 499                  if it has all the bits we will actually use.  However,
 500                  if we must narrow it, be sure we do it correctly.  */
 501
 502               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 503                 {
 504                   /* Avoid making subreg of a subreg, or of a mem.  */
 505                   if (GET_CODE (value1) != REG)
 506                 value1 = copy_to_reg (value1);
 507                   value1 = gen_rtx (SUBREG, maxmode, value1, 0);
 508                 }
 509               else
 510                 value1 = gen_lowpart (maxmode, value1);
 511             }
 512           else if (!CONSTANT_P (value))
 513             /* Parse phase is supposed to make VALUE's data type
 514                match that of the component reference, which is a type
 515                at least as wide as the field; so VALUE should have
 516                a mode that corresponds to that type.  */
 517             abort ();
 518         }
 519
 520       /* If this machine's insv insists on a register,
 521          get VALUE1 into a register.  */
 522       if (! ((*insn_operand_predicate[(int) CODE_FOR_insv][3])
 523              (value1, maxmode)))
 524         value1 = force_reg (maxmode, value1);
 525
 526       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 527       if (pat)
 528         emit_insn (pat);
 529       else
 530         {
 531           delete_insns_since (last);
 532           store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 533         }
 534     }
 535   else
 536     insv_loses:
 537 #endif
 538     /* Insv is not available; store using shifts and boolean ops.  */
 539     store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 540   return value;
 541 }
 542 \f
 543 /* Use shifts and boolean operations to store VALUE
 544    into a bit field of width BITSIZE
 545    in a memory location specified by OP0 except offset by OFFSET bytes.
 546      (OFFSET must be 0 if OP0 is a register.)
 547    The field starts at position BITPOS within the byte.
 548     (If OP0 is a register, it may be a full word or a narrower mode,
 549      but BITPOS still counts within a full word,
 550      which is significant on bigendian machines.)
 551    STRUCT_ALIGN is the alignment the structure is known to have (in bytes).
 552
 553    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 554
 555 static void
 556 store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align)
 557      register rtx op0;
 558      register int offset, bitsize, bitpos;
 559      register rtx value;
 560      int struct_align;
 561 {
 562   register enum machine_mode mode;
 563   int total_bits = BITS_PER_WORD;
 564   rtx subtarget, temp;
 565   int all_zero = 0;
 566   int all_one = 0;
 567
 568   /* There is a case not handled here:
 569      a structure with a known alignment of just a halfword
 570      and a field split across two aligned halfwords within the structure.
 571      Or likewise a structure with a known alignment of just a byte
 572      and a field split across two bytes.
 573      Such cases are not supposed to be able to occur.  */
 574
 575   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 576     {
 577       if (offset != 0)
 578         abort ();
 579       /* Special treatment for a bit field split across two registers.  */
 580       if (bitsize + bitpos > BITS_PER_WORD)
 581         {
 582           store_split_bit_field (op0, bitsize, bitpos,
 583                                  value, BITS_PER_WORD);
 584           return;
 585         }
 586     }
 587   else
 588     {
 589       /* Get the proper mode to use for this field.  We want a mode that
 590          includes the entire field.  If such a mode would be larger than
 591          a word, we won't be doing the extraction the normal way.  */
 592
 593       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 594                             struct_align * BITS_PER_UNIT, word_mode,
 595                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
 596
 597       if (mode == VOIDmode)
 598         {
 599           /* The only way this should occur is if the field spans word
 600              boundaries.  */
 601           store_split_bit_field (op0,
 602                                  bitsize, bitpos + offset * BITS_PER_UNIT,
 603                                  value, struct_align);
 604           return;
 605         }
 606
 607       total_bits = GET_MODE_BITSIZE (mode);
 608
 609       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 610          be be in the range 0 to total_bits-1, and put any excess bytes in
 611          OFFSET.  */
 612       if (bitpos >= total_bits)
 613         {
 614           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 615           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 616                      * BITS_PER_UNIT);
 617         }
 618
 619       /* Get ref to an aligned byte, halfword, or word containing the field.
 620          Adjust BITPOS to be position within a word,
 621          and OFFSET to be the offset of that word.
 622          Then alter OP0 to refer to that word.  */
 623       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 624       offset -= (offset % (total_bits / BITS_PER_UNIT));
 625       op0 = change_address (op0, mode,
 626                             plus_constant (XEXP (op0, 0), offset));
 627     }
 628
 629   mode = GET_MODE (op0);
 630
 631   /* Now MODE is either some integral mode for a MEM as OP0,
 632      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 633      The bit field is contained entirely within OP0.
 634      BITPOS is the starting bit number within OP0.
 635      (OP0's mode may actually be narrower than MODE.)  */
 636
 637   if (BYTES_BIG_ENDIAN)
 638       /* BITPOS is the distance between our msb
 639          and that of the containing datum.
 640          Convert it to the distance from the lsb.  */
 641       bitpos = total_bits - bitsize - bitpos;
 642
 643   /* Now BITPOS is always the distance between our lsb
 644      and that of OP0.  */
 645
 646   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 647      we must first convert its mode to MODE.  */
 648
 649   if (GET_CODE (value) == CONST_INT)
 650     {
 651       register HOST_WIDE_INT v = INTVAL (value);
 652
 653       if (bitsize < HOST_BITS_PER_WIDE_INT)
 654         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 655
 656       if (v == 0)
 657         all_zero = 1;
 658       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 659                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 660                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 661         all_one = 1;
 662
 663       value = lshift_value (mode, value, bitpos, bitsize);
 664     }
 665   else
 666     {
 667       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 668                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 669
 670       if (GET_MODE (value) != mode)
 671         {
 672           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 673               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 674             value = gen_lowpart (mode, value);
 675           else
 676             value = convert_to_mode (mode, value, 1);
 677         }
 678
 679       if (must_and)
 680         value = expand_binop (mode, and_optab, value,
 681                               mask_rtx (mode, 0, bitsize, 0),
 682                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 683       if (bitpos > 0)
 684         value = expand_shift (LSHIFT_EXPR, mode, value,
 685                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 686     }
 687
 688   /* Now clear the chosen bits in OP0,
 689      except that if VALUE is -1 we need not bother.  */
 690
 691   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 692
 693   if (! all_one)
 694     {
 695       temp = expand_binop (mode, and_optab, op0,
 696                            mask_rtx (mode, bitpos, bitsize, 1),
 697                            subtarget, 1, OPTAB_LIB_WIDEN);
 698       subtarget = temp;
 699     }
 700   else
 701     temp = op0;
 702
 703   /* Now logical-or VALUE into OP0, unless it is zero.  */
 704
 705   if (! all_zero)
 706     temp = expand_binop (mode, ior_optab, temp, value,
 707                          subtarget, 1, OPTAB_LIB_WIDEN);
 708   if (op0 != temp)
 709     emit_move_insn (op0, temp);
 710 }
 711 \f
 712 /* Store a bit field that is split across multiple accessible memory objects.
 713
 714    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 715    BITSIZE is the field width; BITPOS the position of its first bit
 716    (within the word).
 717    VALUE is the value to store.
 718    ALIGN is the known alignment of OP0, measured in bytes.
 719    This is also the size of the memory objects to be used.
 720
 721    This does not yet handle fields wider than BITS_PER_WORD.  */
 722
 723 static void
 724 store_split_bit_field (op0, bitsize, bitpos, value, align)
 725      rtx op0;
 726      int bitsize, bitpos;
 727      rtx value;
 728      int align;
 729 {
 730   int unit;
 731   int bitsdone = 0;
 732
 733   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 734      much at a time.  */
 735   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 736     unit = BITS_PER_WORD;
 737   else
 738     unit = MIN (align * BITS_PER_UNIT, BITS_PER_WORD);
 739
 740   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 741      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 742      that VALUE might be a floating-point constant.  */
 743   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 744     {
 745       rtx word = gen_lowpart_common (word_mode, value);
 746
 747       if (word && (value != word))
 748         value = word;
 749       else
 750         value = gen_lowpart_common (word_mode,
 751                                     force_reg (GET_MODE (value) != VOIDmode
 752                                                ? GET_MODE (value)
 753                                                : word_mode, value));
 754     }
 755
 756   while (bitsdone < bitsize)
 757     {
 758       int thissize;
 759       rtx part, word;
 760       int thispos;
 761       int offset;
 762
 763       offset = (bitpos + bitsdone) / unit;
 764       thispos = (bitpos + bitsdone) % unit;
 765
 766       /* THISSIZE must not overrun a word boundary.  Otherwise,
 767          store_fixed_bit_field will call us again, and we will mutually
 768          recurse forever.  */
 769       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 770       thissize = MIN (thissize, unit - thispos);
 771
 772       if (BYTES_BIG_ENDIAN)
 773         {
 774           int total_bits;
 775
 776           /* We must do an endian conversion exactly the same way as it is
 777              done in extract_bit_field, so that the two calls to
 778              extract_fixed_bit_field will have comparable arguments.  */
 779           if (GET_CODE (value) != MEM)
 780             total_bits = BITS_PER_WORD;
 781           else
 782             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 783
 784           /* Fetch successively less significant portions.  */
 785           if (GET_CODE (value) == CONST_INT)
 786             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 787                              >> (bitsize - bitsdone - thissize))
 788                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 789           else
 790             /* The args are chosen so that the last part includes the
 791                lsb.  Give extract_bit_field the value it needs (with
 792                endianness compensation) to fetch the piece we want.  */
 793             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 794                                             total_bits - bitsize + bitsdone,
 795                                             NULL_RTX, 1, align);
 796         }
 797       else
 798         {
 799           /* Fetch successively more significant portions.  */
 800           if (GET_CODE (value) == CONST_INT)
 801             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 802                              >> bitsdone)
 803                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 804           else
 805             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 806                                             bitsdone, NULL_RTX, 1, align);
 807         }
 808
 809       /* If OP0 is a register, then handle OFFSET here.
 810
 811          When handling multiword bitfields, extract_bit_field may pass
 812          down a word_mode SUBREG of a larger REG for a bitfield that actually
 813          crosses a word boundary.  Thus, for a SUBREG, we must find
 814          the current word starting from the base register.  */
 815       if (GET_CODE (op0) == SUBREG)
 816         {
 817           word = operand_subword_force (SUBREG_REG (op0),
 818                                         SUBREG_WORD (op0) + offset,
 819                                         GET_MODE (SUBREG_REG (op0)));
 820           offset = 0;
 821         }
 822       else if (GET_CODE (op0) == REG)
 823         {
 824           word = operand_subword_force (op0, offset, GET_MODE (op0));
 825           offset = 0;
 826         }
 827       else
 828         word = op0;
 829
 830       /* OFFSET is in UNITs, and UNIT is in bits.
 831          store_fixed_bit_field wants offset in bytes.  */
 832       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT,
 833                              thissize, thispos, part, align);
 834       bitsdone += thissize;
 835     }
 836 }
 837 \f
 838 /* Generate code to extract a byte-field from STR_RTX
 839    containing BITSIZE bits, starting at BITNUM,
 840    and put it in TARGET if possible (if TARGET is nonzero).
 841    Regardless of TARGET, we return the rtx for where the value is placed.
 842    It may be a QUEUED.
 843
 844    STR_RTX is the structure containing the byte (a REG or MEM).
 845    UNSIGNEDP is nonzero if this is an unsigned bit field.
 846    MODE is the natural mode of the field value once extracted.
 847    TMODE is the mode the caller would like the value to have;
 848    but the value may be returned with type MODE instead.
 849
 850    ALIGN is the alignment that STR_RTX is known to have, measured in bytes.
 851    TOTAL_SIZE is the size in bytes of the containing structure,
 852    or -1 if varying.
 853
 854    If a TARGET is specified and we can store in it at no extra cost,
 855    we do so, and return TARGET.
 856    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
 857    if they are equally easy.  */
 858
 859 rtx
 860 extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 861                    target, mode, tmode, align, total_size)
 862      rtx str_rtx;
 863      register int bitsize;
 864      int bitnum;
 865      int unsignedp;
 866      rtx target;
 867      enum machine_mode mode, tmode;
 868      int align;
 869      int total_size;
 870 {
 871   int unit = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 872   register int offset = bitnum / unit;
 873   register int bitpos = bitnum % unit;
 874   register rtx op0 = str_rtx;
 875   rtx spec_target = target;
 876   rtx spec_target_subreg = 0;
 877
 878   if (GET_CODE (str_rtx) == MEM && ! MEM_IN_STRUCT_P (str_rtx))
 879     abort ();
 880
 881   /* Discount the part of the structure before the desired byte.
 882      We need to know how many bytes are safe to reference after it.  */
 883   if (total_size >= 0)
 884     total_size -= (bitpos / BIGGEST_ALIGNMENT
 885                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 886
 887   if (tmode == VOIDmode)
 888     tmode = mode;
 889   while (GET_CODE (op0) == SUBREG)
 890     {
 891       int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
 892       int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
 893
 894       offset += SUBREG_WORD (op0);
 895
 896       if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
 897         {
 898           bitpos += inner_size - outer_size;
 899           if (bitpos > unit)
 900             {
 901               offset += (bitpos / unit);
 902               bitpos %= unit;
 903             }
 904         }
 905
 906       op0 = SUBREG_REG (op0);
 907     }
 908
 909   /* ??? We currently assume TARGET is at least as big as BITSIZE.
 910      If that's wrong, the solution is to test for it and set TARGET to 0
 911      if needed.  */
 912
 913   /* If OP0 is a register, BITPOS must count within a word.
 914      But as we have it, it counts within whatever size OP0 now has.
 915      On a bigendian machine, these are not the same, so convert.  */
 916   if (BYTES_BIG_ENDIAN &&
 917       GET_CODE (op0) != MEM
 918       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 919     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 920
 921   /* Extracting a full-word or multi-word value
 922      from a structure in a register or aligned memory.
 923      This can be done with just SUBREG.
 924      So too extracting a subword value in
 925      the least significant part of the register.  */
 926
 927   if ((GET_CODE (op0) == REG
 928        || (GET_CODE (op0) == MEM
 929            && (! SLOW_UNALIGNED_ACCESS
 930                || (offset * BITS_PER_UNIT % bitsize == 0
 931                    && align * BITS_PER_UNIT % bitsize == 0))))
 932       && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
 933            && bitpos % BITS_PER_WORD == 0)
 934           || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
 935               && (BYTES_BIG_ENDIAN
 936                   ? bitpos + bitsize == BITS_PER_WORD
 937                   : bitpos == 0))))
 938     {
 939       enum machine_mode mode1
 940         = mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0);
 941
 942       if (mode1 != GET_MODE (op0))
 943         {
 944           if (GET_CODE (op0) == REG)
 945             op0 = gen_rtx (SUBREG, mode1, op0, offset);
 946           else
 947             op0 = change_address (op0, mode1,
 948                                   plus_constant (XEXP (op0, 0), offset));
 949         }
 950       if (mode1 != mode)
 951         return convert_to_mode (tmode, op0, unsignedp);
 952       return op0;
 953     }
 954
 955   /* Handle fields bigger than a word.  */
 956
 957   if (bitsize > BITS_PER_WORD)
 958     {
 959       /* Here we transfer the words of the field
 960          in the order least significant first.
 961          This is because the most significant word is the one which may
 962          be less than full.  */
 963
 964       int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 965       int i;
 966
 967       if (target == 0 || GET_CODE (target) != REG)
 968         target = gen_reg_rtx (mode);
 969
 970       for (i = 0; i < nwords; i++)
 971         {
 972           /* If I is 0, use the low-order word in both field and target;
 973              if I is 1, use the next to lowest word; and so on.  */
 974           /* Word number in TARGET to use.  */
 975           int wordnum = (WORDS_BIG_ENDIAN
 976                          ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
 977                          : i);
 978           /* Offset from start of field in OP0.  */
 979           int bit_offset = (WORDS_BIG_ENDIAN
 980                             ? MAX (0, bitsize - (i + 1) * BITS_PER_WORD)
 981                             : i * BITS_PER_WORD);
 982           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
 983           rtx result_part
 984             = extract_bit_field (op0, MIN (BITS_PER_WORD,
 985                                            bitsize - i * BITS_PER_WORD),
 986                                  bitnum + bit_offset,
 987                                  1, target_part, mode, word_mode,
 988                                  align, total_size);
 989
 990           if (target_part == 0)
 991             abort ();
 992
 993           if (result_part != target_part)
 994             emit_move_insn (target_part, result_part);
 995         }
 996
 997       if (unsignedp)
 998         {
 999           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1000              need to be zero'd out.  */
1001           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1002             {
1003               int i,total_words;
1004
1005               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1006               for (i = nwords; i < total_words; i++)
1007                 {
1008                   int wordnum = WORDS_BIG_ENDIAN ? total_words - i - 1 : i;
1009                   rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1010                   emit_move_insn (target_part, const0_rtx);
1011                 }
1012             }
1013           return target;
1014         }
1015
1016       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1017       target = expand_shift (LSHIFT_EXPR, mode, target,
1018                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1019                              NULL_RTX, 0);
1020       return expand_shift (RSHIFT_EXPR, mode, target,
1021                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1022                            NULL_RTX, 0);
1023     }
1024
1025   /* From here on we know the desired field is smaller than a word
1026      so we can assume it is an integer.  So we can safely extract it as one
1027      size of integer, if necessary, and then truncate or extend
1028      to the size that is wanted.  */
1029
1030   /* OFFSET is the number of words or bytes (UNIT says which)
1031      from STR_RTX to the first word or byte containing part of the field.  */
1032
1033   if (GET_CODE (op0) == REG)
1034     {
1035       if (offset != 0
1036           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1037         op0 = gen_rtx (SUBREG, TYPE_MODE (type_for_size (BITS_PER_WORD, 0)),
1038                        op0, offset);
1039       offset = 0;
1040     }
1041   else
1042     {
1043       op0 = protect_from_queue (str_rtx, 1);
1044     }
1045
1046   /* Now OFFSET is nonzero only for memory operands.  */
1047
1048   if (unsignedp)
1049     {
1050 #ifdef HAVE_extzv
1051       if (HAVE_extzv
1052           && (GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extzv][0])
1053               >= bitsize)
1054           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1055                 && (bitsize + bitpos
1056                     > GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extzv][0]))))
1057         {
1058           int xbitpos = bitpos, xoffset = offset;
1059           rtx bitsize_rtx, bitpos_rtx;
1060           rtx last = get_last_insn();
1061           rtx xop0 = op0;
1062           rtx xtarget = target;
1063           rtx xspec_target = spec_target;
1064           rtx xspec_target_subreg = spec_target_subreg;
1065           rtx pat;
1066           enum machine_mode maxmode
1067             = insn_operand_mode[(int) CODE_FOR_extzv][0];
1068
1069           if (GET_CODE (xop0) == MEM)
1070             {
1071               int save_volatile_ok = volatile_ok;
1072               volatile_ok = 1;
1073
1074               /* Is the memory operand acceptable?  */
1075               if (flag_force_mem
1076                   || ! ((*insn_operand_predicate[(int) CODE_FOR_extzv][1])
1077                         (xop0, GET_MODE (xop0))))
1078                 {
1079                   /* No, load into a reg and extract from there.  */
1080                   enum machine_mode bestmode;
1081
1082                   /* Get the mode to use for inserting into this field.  If
1083                      OP0 is BLKmode, get the smallest mode consistent with the
1084                      alignment. If OP0 is a non-BLKmode object that is no
1085                      wider than MAXMODE, use its mode. Otherwise, use the
1086                      smallest mode containing the field.  */
1087
1088                   if (GET_MODE (xop0) == BLKmode
1089                       || (GET_MODE_SIZE (GET_MODE (op0))
1090                           > GET_MODE_SIZE (maxmode)))
1091                     bestmode = get_best_mode (bitsize, bitnum,
1092                                               align * BITS_PER_UNIT, maxmode,
1093                                               MEM_VOLATILE_P (xop0));
1094                   else
1095                     bestmode = GET_MODE (xop0);
1096
1097                   if (bestmode == VOIDmode
1098                       || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
1099                     goto extzv_loses;
1100
1101                   /* Compute offset as multiple of this unit,
1102                      counting in bytes.  */
1103                   unit = GET_MODE_BITSIZE (bestmode);
1104                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1105                   xbitpos = bitnum % unit;
1106                   xop0 = change_address (xop0, bestmode,
1107                                          plus_constant (XEXP (xop0, 0),
1108                                                         xoffset));
1109                   /* Fetch it to a register in that size.  */
1110                   xop0 = force_reg (bestmode, xop0);
1111
1112                   /* XBITPOS counts within UNIT, which is what is expected.  */
1113                 }
1114               else
1115                 /* Get ref to first byte containing part of the field.  */
1116                 xop0 = change_address (xop0, byte_mode,
1117                                        plus_constant (XEXP (xop0, 0), xoffset));
1118
1119               volatile_ok = save_volatile_ok;
1120             }
1121
1122           /* If op0 is a register, we need it in MAXMODE (which is usually
1123              SImode). to make it acceptable to the format of extzv.  */
1124           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1125             abort ();
1126           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1127             xop0 = gen_rtx (SUBREG, maxmode, xop0, 0);
1128
1129           /* On big-endian machines, we count bits from the most significant.
1130              If the bit field insn does not, we must invert.  */
1131           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1132             xbitpos = unit - bitsize - xbitpos;
1133
1134           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1135           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1136             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1137
1138           unit = GET_MODE_BITSIZE (maxmode);
1139
1140           if (xtarget == 0
1141               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1142             xtarget = xspec_target = gen_reg_rtx (tmode);
1143
1144           if (GET_MODE (xtarget) != maxmode)
1145             {
1146               if (GET_CODE (xtarget) == REG)
1147                 {
1148                   int wider = (GET_MODE_SIZE (maxmode)
1149                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1150                   xtarget = gen_lowpart (maxmode, xtarget);
1151                   if (wider)
1152                     xspec_target_subreg = xtarget;
1153                 }
1154               else
1155                 xtarget = gen_reg_rtx (maxmode);
1156             }
1157
1158           /* If this machine's extzv insists on a register target,
1159              make sure we have one.  */
1160           if (! ((*insn_operand_predicate[(int) CODE_FOR_extzv][0])
1161                  (xtarget, maxmode)))
1162             xtarget = gen_reg_rtx (maxmode);
1163
1164           bitsize_rtx = GEN_INT (bitsize);
1165           bitpos_rtx = GEN_INT (xbitpos);
1166
1167           pat = gen_extzv (protect_from_queue (xtarget, 1),
1168                            xop0, bitsize_rtx, bitpos_rtx);
1169           if (pat)
1170             {
1171               emit_insn (pat);
1172               target = xtarget;
1173               spec_target = xspec_target;
1174               spec_target_subreg = xspec_target_subreg;
1175             }
1176           else
1177             {
1178               delete_insns_since (last);
1179               target = extract_fixed_bit_field (tmode, op0, offset, bitsize,
1180                                                 bitpos, target, 1, align);
1181             }
1182         }
1183       else
1184         extzv_loses:
1185 #endif
1186         target = extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1187                                           target, 1, align);
1188     }
1189   else
1190     {
1191 #ifdef HAVE_extv
1192       if (HAVE_extv
1193           && (GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extv][0])
1194               >= bitsize)
1195           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1196                 && (bitsize + bitpos
1197                     > GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extv][0]))))
1198         {
1199           int xbitpos = bitpos, xoffset = offset;
1200           rtx bitsize_rtx, bitpos_rtx;
1201           rtx last = get_last_insn();
1202           rtx xop0 = op0, xtarget = target;
1203           rtx xspec_target = spec_target;
1204           rtx xspec_target_subreg = spec_target_subreg;
1205           rtx pat;
1206           enum machine_mode maxmode
1207             = insn_operand_mode[(int) CODE_FOR_extv][0];
1208
1209           if (GET_CODE (xop0) == MEM)
1210             {
1211               /* Is the memory operand acceptable?  */
1212               if (! ((*insn_operand_predicate[(int) CODE_FOR_extv][1])
1213                      (xop0, GET_MODE (xop0))))
1214                 {
1215                   /* No, load into a reg and extract from there.  */
1216                   enum machine_mode bestmode;
1217
1218                   /* Get the mode to use for inserting into this field.  If
1219                      OP0 is BLKmode, get the smallest mode consistent with the
1220                      alignment. If OP0 is a non-BLKmode object that is no
1221                      wider than MAXMODE, use its mode. Otherwise, use the
1222                      smallest mode containing the field.  */
1223
1224                   if (GET_MODE (xop0) == BLKmode
1225                       || (GET_MODE_SIZE (GET_MODE (op0))
1226                           > GET_MODE_SIZE (maxmode)))
1227                     bestmode = get_best_mode (bitsize, bitnum,
1228                                               align * BITS_PER_UNIT, maxmode,
1229                                               MEM_VOLATILE_P (xop0));
1230                   else
1231                     bestmode = GET_MODE (xop0);
1232
1233                   if (bestmode == VOIDmode
1234                       || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
1235                     goto extv_loses;
1236
1237                   /* Compute offset as multiple of this unit,
1238                      counting in bytes.  */
1239                   unit = GET_MODE_BITSIZE (bestmode);
1240                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1241                   xbitpos = bitnum % unit;
1242                   xop0 = change_address (xop0, bestmode,
1243                                          plus_constant (XEXP (xop0, 0),
1244                                                         xoffset));
1245                   /* Fetch it to a register in that size.  */
1246                   xop0 = force_reg (bestmode, xop0);
1247
1248                   /* XBITPOS counts within UNIT, which is what is expected.  */
1249                 }
1250               else
1251                 /* Get ref to first byte containing part of the field.  */
1252                 xop0 = change_address (xop0, byte_mode,
1253                                        plus_constant (XEXP (xop0, 0), xoffset));
1254             }
1255
1256           /* If op0 is a register, we need it in MAXMODE (which is usually
1257              SImode) to make it acceptable to the format of extv.  */
1258           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1259             abort ();
1260           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1261             xop0 = gen_rtx (SUBREG, maxmode, xop0, 0);
1262
1263           /* On big-endian machines, we count bits from the most significant.
1264              If the bit field insn does not, we must invert.  */
1265           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1266             xbitpos = unit - bitsize - xbitpos;
1267
1268           /* XBITPOS counts within a size of UNIT.
1269              Adjust to count within a size of MAXMODE.  */
1270           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1271             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1272
1273           unit = GET_MODE_BITSIZE (maxmode);
1274
1275           if (xtarget == 0
1276               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1277             xtarget = xspec_target = gen_reg_rtx (tmode);
1278
1279           if (GET_MODE (xtarget) != maxmode)
1280             {
1281               if (GET_CODE (xtarget) == REG)
1282                 {
1283                   int wider = (GET_MODE_SIZE (maxmode)
1284                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1285                   xtarget = gen_lowpart (maxmode, xtarget);
1286                   if (wider)
1287                     xspec_target_subreg = xtarget;
1288                 }
1289               else
1290                 xtarget = gen_reg_rtx (maxmode);
1291             }
1292
1293           /* If this machine's extv insists on a register target,
1294              make sure we have one.  */
1295           if (! ((*insn_operand_predicate[(int) CODE_FOR_extv][0])
1296                  (xtarget, maxmode)))
1297             xtarget = gen_reg_rtx (maxmode);
1298
1299           bitsize_rtx = GEN_INT (bitsize);
1300           bitpos_rtx = GEN_INT (xbitpos);
1301
1302           pat = gen_extv (protect_from_queue (xtarget, 1),
1303                           xop0, bitsize_rtx, bitpos_rtx);
1304           if (pat)
1305             {
1306               emit_insn (pat);
1307               target = xtarget;
1308               spec_target = xspec_target;
1309               spec_target_subreg = xspec_target_subreg;
1310             }
1311           else
1312             {
1313               delete_insns_since (last);
1314               target = extract_fixed_bit_field (tmode, op0, offset, bitsize,
1315                                                 bitpos, target, 0, align);
1316             }
1317         }
1318       else
1319         extv_loses:
1320 #endif
1321         target = extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1322                                           target, 0, align);
1323     }
1324   if (target == spec_target)
1325     return target;
1326   if (target == spec_target_subreg)
1327     return spec_target;
1328   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1329     {
1330       /* If the target mode is floating-point, first convert to the
1331          integer mode of that size and then access it as a floating-point
1332          value via a SUBREG.  */
1333       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1334         {
1335           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1336                                                    MODE_INT, 0),
1337                                     target, unsignedp);
1338           if (GET_CODE (target) != REG)
1339             target = copy_to_reg (target);
1340           return gen_rtx (SUBREG, tmode, target, 0);
1341         }
1342       else
1343         return convert_to_mode (tmode, target, unsignedp);
1344     }
1345   return target;
1346 }
1347 \f
1348 /* Extract a bit field using shifts and boolean operations
1349    Returns an rtx to represent the value.
1350    OP0 addresses a register (word) or memory (byte).
1351    BITPOS says which bit within the word or byte the bit field starts in.
1352    OFFSET says how many bytes farther the bit field starts;
1353     it is 0 if OP0 is a register.
1354    BITSIZE says how many bits long the bit field is.
1355     (If OP0 is a register, it may be narrower than a full word,
1356      but BITPOS still counts within a full word,
1357      which is significant on bigendian machines.)
1358
1359    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1360    If TARGET is nonzero, attempts to store the value there
1361    and return TARGET, but this is not guaranteed.
1362    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.
1363
1364    ALIGN is the alignment that STR_RTX is known to have, measured in bytes.  */
1365
1366 static rtx
1367 extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1368                          target, unsignedp, align)
1369      enum machine_mode tmode;
1370      register rtx op0, target;
1371      register int offset, bitsize, bitpos;
1372      int unsignedp;
1373      int align;
1374 {
1375   int total_bits = BITS_PER_WORD;
1376   enum machine_mode mode;
1377
1378   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1379     {
1380       /* Special treatment for a bit field split across two registers.  */
1381       if (bitsize + bitpos > BITS_PER_WORD)
1382         return extract_split_bit_field (op0, bitsize, bitpos,
1383                                         unsignedp, align);
1384     }
1385   else
1386     {
1387       /* Get the proper mode to use for this field.  We want a mode that
1388          includes the entire field.  If such a mode would be larger than
1389          a word, we won't be doing the extraction the normal way.  */
1390
1391       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1392                             align * BITS_PER_UNIT, word_mode,
1393                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
1394
1395       if (mode == VOIDmode)
1396         /* The only way this should occur is if the field spans word
1397            boundaries.  */
1398         return extract_split_bit_field (op0, bitsize,
1399                                         bitpos + offset * BITS_PER_UNIT,
1400                                         unsignedp, align);
1401
1402       total_bits = GET_MODE_BITSIZE (mode);
1403
1404       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1405          be be in the range 0 to total_bits-1, and put any excess bytes in
1406          OFFSET.  */
1407       if (bitpos >= total_bits)
1408         {
1409           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1410           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1411                      * BITS_PER_UNIT);
1412         }
1413
1414       /* Get ref to an aligned byte, halfword, or word containing the field.
1415          Adjust BITPOS to be position within a word,
1416          and OFFSET to be the offset of that word.
1417          Then alter OP0 to refer to that word.  */
1418       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1419       offset -= (offset % (total_bits / BITS_PER_UNIT));
1420       op0 = change_address (op0, mode,
1421                             plus_constant (XEXP (op0, 0), offset));
1422     }
1423
1424   mode = GET_MODE (op0);
1425
1426   if (BYTES_BIG_ENDIAN)
1427     {
1428       /* BITPOS is the distance between our msb and that of OP0.
1429          Convert it to the distance from the lsb.  */
1430
1431       bitpos = total_bits - bitsize - bitpos;
1432     }
1433
1434   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1435      We have reduced the big-endian case to the little-endian case.  */
1436
1437   if (unsignedp)
1438     {
1439       if (bitpos)
1440         {
1441           /* If the field does not already start at the lsb,
1442              shift it so it does.  */
1443           tree amount = build_int_2 (bitpos, 0);
1444           /* Maybe propagate the target for the shift.  */
1445           /* But not if we will return it--could confuse integrate.c.  */
1446           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1447                            && !REG_FUNCTION_VALUE_P (target)
1448                            ? target : 0);
1449           if (tmode != mode) subtarget = 0;
1450           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1451         }
1452       /* Convert the value to the desired mode.  */
1453       if (mode != tmode)
1454         op0 = convert_to_mode (tmode, op0, 1);
1455
1456       /* Unless the msb of the field used to be the msb when we shifted,
1457          mask out the upper bits.  */
1458
1459       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize
1460 #if 0
1461 #ifdef SLOW_ZERO_EXTEND
1462           /* Always generate an `and' if
1463              we just zero-extended op0 and SLOW_ZERO_EXTEND, since it
1464              will combine fruitfully with the zero-extend. */
1465           || tmode != mode
1466 #endif
1467 #endif
1468           )
1469         return expand_binop (GET_MODE (op0), and_optab, op0,
1470                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1471                              target, 1, OPTAB_LIB_WIDEN);
1472       return op0;
1473     }
1474
1475   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1476      then arithmetic-shift its lsb to the lsb of the word.  */
1477   op0 = force_reg (mode, op0);
1478   if (mode != tmode)
1479     target = 0;
1480
1481   /* Find the narrowest integer mode that contains the field.  */
1482
1483   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1484        mode = GET_MODE_WIDER_MODE (mode))
1485     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1486       {
1487         op0 = convert_to_mode (mode, op0, 0);
1488         break;
1489       }
1490
1491   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1492     {
1493       tree amount = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1494       /* Maybe propagate the target for the shift.  */
1495       /* But not if we will return the result--could confuse integrate.c.  */
1496       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1497                        && ! REG_FUNCTION_VALUE_P (target)
1498                        ? target : 0);
1499       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1500     }
1501
1502   return expand_shift (RSHIFT_EXPR, mode, op0,
1503                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1504                        target, 0);
1505 }
1506 \f
1507 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1508    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1509    complement of that if COMPLEMENT.  The mask is truncated if
1510    necessary to the width of mode MODE.  The mask is zero-extended if
1511    BITSIZE+BITPOS is too small for MODE.  */
1512
1513 static rtx
1514 mask_rtx (mode, bitpos, bitsize, complement)
1515      enum machine_mode mode;
1516      int bitpos, bitsize, complement;
1517 {
1518   HOST_WIDE_INT masklow, maskhigh;
1519
1520   if (bitpos < HOST_BITS_PER_WIDE_INT)
1521     masklow = (HOST_WIDE_INT) -1 << bitpos;
1522   else
1523     masklow = 0;
1524
1525   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1526     masklow &= ((unsigned HOST_WIDE_INT) -1
1527                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1528
1529   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1530     maskhigh = -1;
1531   else
1532     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1533
1534   if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1535     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1536                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1537   else
1538     maskhigh = 0;
1539
1540   if (complement)
1541     {
1542       maskhigh = ~maskhigh;
1543       masklow = ~masklow;
1544     }
1545
1546   return immed_double_const (masklow, maskhigh, mode);
1547 }
1548
1549 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1550    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1551
1552 static rtx
1553 lshift_value (mode, value, bitpos, bitsize)
1554      enum machine_mode mode;
1555      rtx value;
1556      int bitpos, bitsize;
1557 {
1558   unsigned HOST_WIDE_INT v = INTVAL (value);
1559   HOST_WIDE_INT low, high;
1560
1561   if (bitsize < HOST_BITS_PER_WIDE_INT)
1562     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1563
1564   if (bitpos < HOST_BITS_PER_WIDE_INT)
1565     {
1566       low = v << bitpos;
1567       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1568     }
1569   else
1570     {
1571       low = 0;
1572       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1573     }
1574
1575   return immed_double_const (low, high, mode);
1576 }
1577 \f
1578 /* Extract a bit field that is split across two words
1579    and return an RTX for the result.
1580
1581    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1582    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1583    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
1584
1585    ALIGN is the known alignment of OP0, measured in bytes.
1586    This is also the size of the memory objects to be used.  */
1587
1588 static rtx
1589 extract_split_bit_field (op0, bitsize, bitpos, unsignedp, align)
1590      rtx op0;
1591      int bitsize, bitpos, unsignedp, align;
1592 {
1593   int unit;
1594   int bitsdone = 0;
1595   rtx result;
1596   int first = 1;
1597
1598   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1599      much at a time.  */
1600   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1601     unit = BITS_PER_WORD;
1602   else
1603     unit = MIN (align * BITS_PER_UNIT, BITS_PER_WORD);
1604
1605   while (bitsdone < bitsize)
1606     {
1607       int thissize;
1608       rtx part, word;
1609       int thispos;
1610       int offset;
1611
1612       offset = (bitpos + bitsdone) / unit;
1613       thispos = (bitpos + bitsdone) % unit;
1614
1615       /* THISSIZE must not overrun a word boundary.  Otherwise,
1616          extract_fixed_bit_field will call us again, and we will mutually
1617          recurse forever.  */
1618       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1619       thissize = MIN (thissize, unit - thispos);
1620
1621       /* If OP0 is a register, then handle OFFSET here.
1622
1623          When handling multiword bitfields, extract_bit_field may pass
1624          down a word_mode SUBREG of a larger REG for a bitfield that actually
1625          crosses a word boundary.  Thus, for a SUBREG, we must find
1626          the current word starting from the base register.  */
1627       if (GET_CODE (op0) == SUBREG)
1628         {
1629           word = operand_subword_force (SUBREG_REG (op0),
1630                                         SUBREG_WORD (op0) + offset,
1631                                         GET_MODE (SUBREG_REG (op0)));
1632           offset = 0;
1633         }
1634       else if (GET_CODE (op0) == REG)
1635         {
1636           word = operand_subword_force (op0, offset, GET_MODE (op0));
1637           offset = 0;
1638         }
1639       else
1640         word = op0;
1641
1642       /* Extract the parts in bit-counting order,
1643          whose meaning is determined by BYTES_PER_UNIT.
1644          OFFSET is in UNITs, and UNIT is in bits.
1645          extract_fixed_bit_field wants offset in bytes.  */
1646       part = extract_fixed_bit_field (word_mode, word,
1647                                       offset * unit / BITS_PER_UNIT,
1648                                       thissize, thispos, 0, 1, align);
1649       bitsdone += thissize;
1650
1651       /* Shift this part into place for the result.  */
1652       if (BYTES_BIG_ENDIAN)
1653         {
1654           if (bitsize != bitsdone)
1655             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1656                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1657         }
1658       else
1659         {
1660           if (bitsdone != thissize)
1661             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1662                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1663         }
1664
1665       if (first)
1666         result = part;
1667       else
1668         /* Combine the parts with bitwise or.  This works
1669            because we extracted each part as an unsigned bit field.  */
1670         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1671                                OPTAB_LIB_WIDEN);
1672
1673       first = 0;
1674     }
1675
1676   /* Unsigned bit field: we are done.  */
1677   if (unsignedp)
1678     return result;
1679   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1680   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1681                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1682                          NULL_RTX, 0);
1683   return expand_shift (RSHIFT_EXPR, word_mode, result,
1684                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1685 }
1686 \f
1687 /* Add INC into TARGET.  */
1688
1689 void
1690 expand_inc (target, inc)
1691      rtx target, inc;
1692 {
1693   rtx value = expand_binop (GET_MODE (target), add_optab,
1694                             target, inc,
1695                             target, 0, OPTAB_LIB_WIDEN);
1696   if (value != target)
1697     emit_move_insn (target, value);
1698 }
1699
1700 /* Subtract DEC from TARGET.  */
1701
1702 void
1703 expand_dec (target, dec)
1704      rtx target, dec;
1705 {
1706   rtx value = expand_binop (GET_MODE (target), sub_optab,
1707                             target, dec,
1708                             target, 0, OPTAB_LIB_WIDEN);
1709   if (value != target)
1710     emit_move_insn (target, value);
1711 }
1712 \f
1713 /* Output a shift instruction for expression code CODE,
1714    with SHIFTED being the rtx for the value to shift,
1715    and AMOUNT the tree for the amount to shift by.
1716    Store the result in the rtx TARGET, if that is convenient.
1717    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1718    Return the rtx for where the value is.  */
1719
1720 rtx
1721 expand_shift (code, mode, shifted, amount, target, unsignedp)
1722      enum tree_code code;
1723      register enum machine_mode mode;
1724      rtx shifted;
1725      tree amount;
1726      register rtx target;
1727      int unsignedp;
1728 {
1729   register rtx op1, temp = 0;
1730   register int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1731   register int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1732   int try;
1733
1734   /* Previously detected shift-counts computed by NEGATE_EXPR
1735      and shifted in the other direction; but that does not work
1736      on all machines.  */
1737
1738   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1739
1740 #ifdef SHIFT_COUNT_TRUNCATED
1741   if (SHIFT_COUNT_TRUNCATED
1742       && GET_CODE (op1) == CONST_INT
1743       && (unsigned HOST_WIDE_INT) INTVAL (op1) >= GET_MODE_BITSIZE (mode))
1744     op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1745                    % GET_MODE_BITSIZE (mode));
1746 #endif
1747
1748   if (op1 == const0_rtx)
1749     return shifted;
1750
1751   for (try = 0; temp == 0 && try < 3; try++)
1752     {
1753       enum optab_methods methods;
1754
1755       if (try == 0)
1756         methods = OPTAB_DIRECT;
1757       else if (try == 1)
1758         methods = OPTAB_WIDEN;
1759       else
1760         methods = OPTAB_LIB_WIDEN;
1761
1762       if (rotate)
1763         {
1764           /* Widening does not work for rotation.  */
1765           if (methods == OPTAB_WIDEN)
1766             continue;
1767           else if (methods == OPTAB_LIB_WIDEN)
1768             {
1769               /* If we have been unable to open-code this by a rotation,
1770                  do it as the IOR of two shifts.  I.e., to rotate A
1771                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1772                  where C is the bitsize of A.
1773
1774                  It is theoretically possible that the target machine might
1775                  not be able to perform either shift and hence we would
1776                  be making two libcalls rather than just the one for the
1777                  shift (similarly if IOR could not be done).  We will allow
1778                  this extremely unlikely lossage to avoid complicating the
1779                  code below.  */
1780
1781               rtx subtarget = target == shifted ? 0 : target;
1782               rtx temp1;
1783               tree type = TREE_TYPE (amount);
1784               tree new_amount = make_tree (type, op1);
1785               tree other_amount
1786                 = fold (build (MINUS_EXPR, type,
1787                                convert (type,
1788                                         build_int_2 (GET_MODE_BITSIZE (mode),
1789                                                      0)),
1790                                amount));
1791
1792               shifted = force_reg (mode, shifted);
1793
1794               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1795                                    mode, shifted, new_amount, subtarget, 1);
1796               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1797                                     mode, shifted, other_amount, 0, 1);
1798               return expand_binop (mode, ior_optab, temp, temp1, target,
1799                                    unsignedp, methods);
1800             }
1801
1802           temp = expand_binop (mode,
1803                                left ? rotl_optab : rotr_optab,
1804                                shifted, op1, target, unsignedp, methods);
1805
1806           /* If we don't have the rotate, but we are rotating by a constant
1807              that is in range, try a rotate in the opposite direction.  */
1808
1809           if (temp == 0 && GET_CODE (op1) == CONST_INT
1810               && INTVAL (op1) > 0 && INTVAL (op1) < GET_MODE_BITSIZE (mode))
1811             temp = expand_binop (mode,
1812                                  left ? rotr_optab : rotl_optab,
1813                                  shifted,
1814                                  GEN_INT (GET_MODE_BITSIZE (mode)
1815                                           - INTVAL (op1)),
1816                                  target, unsignedp, methods);
1817         }
1818       else if (unsignedp)
1819         temp = expand_binop (mode,
1820                              left ? ashl_optab : lshr_optab,
1821                              shifted, op1, target, unsignedp, methods);
1822
1823       /* Do arithmetic shifts.
1824          Also, if we are going to widen the operand, we can just as well
1825          use an arithmetic right-shift instead of a logical one.  */
1826       if (temp == 0 && ! rotate
1827           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
1828         {
1829           enum optab_methods methods1 = methods;
1830
1831           /* If trying to widen a log shift to an arithmetic shift,
1832              don't accept an arithmetic shift of the same size.  */
1833           if (unsignedp)
1834             methods1 = OPTAB_MUST_WIDEN;
1835
1836           /* Arithmetic shift */
1837
1838           temp = expand_binop (mode,
1839                                left ? ashl_optab : ashr_optab,
1840                                shifted, op1, target, unsignedp, methods1);
1841         }
1842
1843       /* We used to try extzv here for logical right shifts, but that was
1844          only useful for one machine, the VAX, and caused poor code
1845          generation there for lshrdi3, so the code was deleted and a
1846          define_expand for lshrsi3 was added to vax.md.  */
1847     }
1848
1849   if (temp == 0)
1850     abort ();
1851   return temp;
1852 }
1853 \f
1854 enum alg_code { alg_zero, alg_m, alg_shift,
1855                   alg_add_t_m2, alg_sub_t_m2,
1856                   alg_add_factor, alg_sub_factor,
1857                   alg_add_t2_m, alg_sub_t2_m,
1858                   alg_add, alg_subtract, alg_factor, alg_shiftop };
1859
1860 /* This structure records a sequence of operations.
1861    `ops' is the number of operations recorded.
1862    `cost' is their total cost.
1863    The operations are stored in `op' and the corresponding
1864    logarithms of the integer coefficients in `log'.
1865
1866    These are the operations:
1867    alg_zero             total := 0;
1868    alg_m                total := multiplicand;
1869    alg_shift            total := total * coeff
1870    alg_add_t_m2         total := total + multiplicand * coeff;
1871    alg_sub_t_m2         total := total - multiplicand * coeff;
1872    alg_add_factor       total := total * coeff + total;
1873    alg_sub_factor       total := total * coeff - total;
1874    alg_add_t2_m         total := total * coeff + multiplicand;
1875    alg_sub_t2_m         total := total * coeff - multiplicand;
1876
1877    The first operand must be either alg_zero or alg_m.  */
1878
1879 struct algorithm
1880 {
1881   short cost;
1882   short ops;
1883   /* The size of the OP and LOG fields are not directly related to the
1884      word size, but the worst-case algorithms will be if we have few
1885      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
1886      In that case we will generate shift-by-2, add, shift-by-2, add,...,
1887      in total wordsize operations.  */
1888   enum alg_code op[MAX_BITS_PER_WORD];
1889   char log[MAX_BITS_PER_WORD];
1890 };
1891
1892 /* Compute and return the best algorithm for multiplying by T.
1893    The algorithm must cost less than cost_limit
1894    If retval.cost >= COST_LIMIT, no algorithm was found and all
1895    other field of the returned struct are undefined.  */
1896
1897 static void
1898 synth_mult (alg_out, t, cost_limit)
1899      struct algorithm *alg_out;
1900      unsigned HOST_WIDE_INT t;
1901      int cost_limit;
1902 {
1903   int m;
1904   struct algorithm *alg_in, *best_alg;
1905   unsigned int cost;
1906   unsigned HOST_WIDE_INT q;
1907
1908   /* Indicate that no algorithm is yet found.  If no algorithm
1909      is found, this value will be returned and indicate failure.  */
1910   alg_out->cost = cost_limit;
1911
1912   if (cost_limit <= 0)
1913     return;
1914
1915   /* t == 1 can be done in zero cost.  */
1916   if (t == 1)
1917     {
1918       alg_out->ops = 1;
1919       alg_out->cost = 0;
1920       alg_out->op[0] = alg_m;
1921       return;
1922     }
1923
1924   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
1925      fail now.  */
1926   if (t == 0)
1927     {
1928       if (zero_cost >= cost_limit)
1929         return;
1930       else
1931         {
1932           alg_out->ops = 1;
1933           alg_out->cost = zero_cost;
1934           alg_out->op[0] = alg_zero;
1935           return;
1936         }
1937     }
1938
1939   /* We'll be needing a couple extra algorithm structures now.  */
1940
1941   alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
1942   best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
1943
1944   /* If we have a group of zero bits at the low-order part of T, try
1945      multiplying by the remaining bits and then doing a shift.  */
1946
1947   if ((t & 1) == 0)
1948     {
1949       m = floor_log2 (t & -t);  /* m = number of low zero bits */
1950       q = t >> m;
1951       cost = shift_cost[m];
1952       synth_mult (alg_in, q, cost_limit - cost);
1953
1954       cost += alg_in->cost;
1955       if (cost < cost_limit)
1956         {
1957           struct algorithm *x;
1958           x = alg_in, alg_in = best_alg, best_alg = x;
1959           best_alg->log[best_alg->ops] = m;
1960           best_alg->op[best_alg->ops] = alg_shift;
1961           cost_limit = cost;
1962         }
1963     }
1964
1965   /* If we have an odd number, add or subtract one.  */
1966   if ((t & 1) != 0)
1967     {
1968       unsigned HOST_WIDE_INT w;
1969
1970       for (w = 1; (w & t) != 0; w <<= 1)
1971         ;
1972       if (w > 2
1973           /* Reject the case where t is 3.
1974              Thus we prefer addition in that case.  */
1975           && t != 3)
1976         {
1977           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
1978
1979           cost = add_cost;
1980           synth_mult (alg_in, t + 1, cost_limit - cost);
1981
1982           cost += alg_in->cost;
1983           if (cost < cost_limit)
1984             {
1985               struct algorithm *x;
1986               x = alg_in, alg_in = best_alg, best_alg = x;
1987               best_alg->log[best_alg->ops] = 0;
1988               best_alg->op[best_alg->ops] = alg_sub_t_m2;
1989               cost_limit = cost;
1990             }
1991         }
1992       else
1993         {
1994           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
1995
1996           cost = add_cost;
1997           synth_mult (alg_in, t - 1, cost_limit - cost);
1998
1999           cost += alg_in->cost;
2000           if (cost < cost_limit)
2001             {
2002               struct algorithm *x;
2003               x = alg_in, alg_in = best_alg, best_alg = x;
2004               best_alg->log[best_alg->ops] = 0;
2005               best_alg->op[best_alg->ops] = alg_add_t_m2;
2006               cost_limit = cost;
2007             }
2008         }
2009     }
2010
2011   /* Look for factors of t of the form
2012      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2013      If we find such a factor, we can multiply by t using an algorithm that
2014      multiplies by q, shift the result by m and add/subtract it to itself.
2015
2016      We search for large factors first and loop down, even if large factors
2017      are less probable than small; if we find a large factor we will find a
2018      good sequence quickly, and therefore be able to prune (by decreasing
2019      COST_LIMIT) the search.  */
2020
2021   for (m = floor_log2 (t - 1); m >= 2; m--)
2022     {
2023       unsigned HOST_WIDE_INT d;
2024
2025       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2026       if (t % d == 0 && t > d)
2027         {
2028           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2029           synth_mult (alg_in, t / d, cost_limit - cost);
2030
2031           cost += alg_in->cost;
2032           if (cost < cost_limit)
2033             {
2034               struct algorithm *x;
2035               x = alg_in, alg_in = best_alg, best_alg = x;
2036               best_alg->log[best_alg->ops] = m;
2037               best_alg->op[best_alg->ops] = alg_add_factor;
2038               cost_limit = cost;
2039             }
2040           /* Other factors will have been taken care of in the recursion.  */
2041           break;
2042         }
2043
2044       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2045       if (t % d == 0 && t > d)
2046         {
2047           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2048           synth_mult (alg_in, t / d, cost_limit - cost);
2049
2050           cost += alg_in->cost;
2051           if (cost < cost_limit)
2052             {
2053               struct algorithm *x;
2054               x = alg_in, alg_in = best_alg, best_alg = x;
2055               best_alg->log[best_alg->ops] = m;
2056               best_alg->op[best_alg->ops] = alg_sub_factor;
2057               cost_limit = cost;
2058             }
2059           break;
2060         }
2061     }
2062
2063   /* Try shift-and-add (load effective address) instructions,
2064      i.e. do a*3, a*5, a*9.  */
2065   if ((t & 1) != 0)
2066     {
2067       q = t - 1;
2068       q = q & -q;
2069       m = exact_log2 (q);
2070       if (m >= 0)
2071         {
2072           cost = shiftadd_cost[m];
2073           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2074
2075           cost += alg_in->cost;
2076           if (cost < cost_limit)
2077             {
2078               struct algorithm *x;
2079               x = alg_in, alg_in = best_alg, best_alg = x;
2080               best_alg->log[best_alg->ops] = m;
2081               best_alg->op[best_alg->ops] = alg_add_t2_m;
2082               cost_limit = cost;
2083             }
2084         }
2085
2086       q = t + 1;
2087       q = q & -q;
2088       m = exact_log2 (q);
2089       if (m >= 0)
2090         {
2091           cost = shiftsub_cost[m];
2092           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2093
2094           cost += alg_in->cost;
2095           if (cost < cost_limit)
2096             {
2097               struct algorithm *x;
2098               x = alg_in, alg_in = best_alg, best_alg = x;
2099               best_alg->log[best_alg->ops] = m;
2100               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2101               cost_limit = cost;
2102             }
2103         }
2104     }
2105
2106   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2107      we have not found any algorithm.  */
2108   if (cost_limit == alg_out->cost)
2109     return;
2110
2111   /* If we are getting a too long sequence for `struct algorithm'
2112      to record, make this search fail.  */
2113   if (best_alg->ops == MAX_BITS_PER_WORD)
2114     return;
2115
2116   /* Copy the algorithm from temporary space to the space at alg_out.
2117      We avoid using structure assignment because the majority of
2118      best_alg is normally undefined, and this is a critical function.  */
2119   alg_out->ops = best_alg->ops + 1;
2120   alg_out->cost = cost_limit;
2121   bcopy ((char *) best_alg->op, (char *) alg_out->op,
2122          alg_out->ops * sizeof *alg_out->op);
2123   bcopy ((char *) best_alg->log, (char *) alg_out->log,
2124          alg_out->ops * sizeof *alg_out->log);
2125 }
2126 \f
2127 /* Perform a multiplication and return an rtx for the result.
2128    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2129    TARGET is a suggestion for where to store the result (an rtx).
2130
2131    We check specially for a constant integer as OP1.
2132    If you want this check for OP0 as well, then before calling
2133    you should swap the two operands if OP0 would be constant.  */
2134
2135 rtx
2136 expand_mult (mode, op0, op1, target, unsignedp)
2137      enum machine_mode mode;
2138      register rtx op0, op1, target;
2139      int unsignedp;
2140 {
2141   rtx const_op1 = op1;
2142
2143   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2144      less than or equal in size to `unsigned int' this doesn't matter.
2145      If the mode is larger than `unsigned int', then synth_mult works only
2146      if the constant value exactly fits in an `unsigned int' without any
2147      truncation.  This means that multiplying by negative values does
2148      not work; results are off by 2^32 on a 32 bit machine.  */
2149
2150   /* If we are multiplying in DImode, it may still be a win
2151      to try to work with shifts and adds.  */
2152   if (GET_CODE (op1) == CONST_DOUBLE
2153       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2154       && HOST_BITS_PER_INT >= BITS_PER_WORD
2155       && CONST_DOUBLE_HIGH (op1) == 0)
2156     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2157   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2158            && GET_CODE (op1) == CONST_INT
2159            && INTVAL (op1) < 0)
2160     const_op1 = 0;
2161
2162   /* We used to test optimize here, on the grounds that it's better to
2163      produce a smaller program when -O is not used.
2164      But this causes such a terrible slowdown sometimes
2165      that it seems better to use synth_mult always.  */
2166
2167   if (const_op1 && GET_CODE (const_op1) == CONST_INT)
2168     {
2169       struct algorithm alg;
2170       struct algorithm alg2;
2171       HOST_WIDE_INT val = INTVAL (op1);
2172       HOST_WIDE_INT val_so_far;
2173       rtx insn;
2174       int mult_cost;
2175       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2176
2177       /* Try to do the computation three ways: multiply by the negative of OP1
2178          and then negate, do the multiplication directly, or do multiplication
2179          by OP1 - 1.  */
2180
2181       mult_cost = rtx_cost (gen_rtx (MULT, mode, op0, op1), SET);
2182       mult_cost = MIN (12 * add_cost, mult_cost);
2183
2184       synth_mult (&alg, val, mult_cost);
2185
2186       /* This works only if the inverted value actually fits in an
2187          `unsigned int' */
2188       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2189         {
2190           synth_mult (&alg2, - val,
2191                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2192           if (alg2.cost + negate_cost < alg.cost)
2193             alg = alg2, variant = negate_variant;
2194         }
2195
2196       /* This proves very useful for division-by-constant.  */
2197       synth_mult (&alg2, val - 1,
2198                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2199       if (alg2.cost + add_cost < alg.cost)
2200         alg = alg2, variant = add_variant;
2201
2202       if (alg.cost < mult_cost)
2203         {
2204           /* We found something cheaper than a multiply insn.  */
2205           int opno;
2206           rtx accum, tem;
2207
2208           op0 = protect_from_queue (op0, 0);
2209
2210           /* Avoid referencing memory over and over.
2211              For speed, but also for correctness when mem is volatile.  */
2212           if (GET_CODE (op0) == MEM)
2213             op0 = force_reg (mode, op0);
2214
2215           /* ACCUM starts out either as OP0 or as a zero, depending on
2216              the first operation.  */
2217
2218           if (alg.op[0] == alg_zero)
2219             {
2220               accum = copy_to_mode_reg (mode, const0_rtx);
2221               val_so_far = 0;
2222             }
2223           else if (alg.op[0] == alg_m)
2224             {
2225               accum = copy_to_mode_reg (mode, op0);
2226               val_so_far = 1;
2227             }
2228           else
2229             abort ();
2230
2231           for (opno = 1; opno < alg.ops; opno++)
2232             {
2233               int log = alg.log[opno];
2234               int preserve = preserve_subexpressions_p ();
2235               rtx shift_subtarget = preserve ? 0 : accum;
2236               rtx add_target
2237                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2238                   ? target : 0);
2239               rtx accum_target = preserve ? 0 : accum;
2240
2241               switch (alg.op[opno])
2242                 {
2243                 case alg_shift:
2244                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2245                                         build_int_2 (log, 0), NULL_RTX, 0);
2246                   val_so_far <<= log;
2247                   break;
2248
2249                 case alg_add_t_m2:
2250                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2251                                       build_int_2 (log, 0), NULL_RTX, 0);
2252                   accum = force_operand (gen_rtx (PLUS, mode, accum, tem),
2253                                          add_target ? add_target : accum_target);
2254                   val_so_far += (HOST_WIDE_INT) 1 << log;
2255                   break;
2256
2257                 case alg_sub_t_m2:
2258                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2259                                       build_int_2 (log, 0), NULL_RTX, 0);
2260                   accum = force_operand (gen_rtx (MINUS, mode, accum, tem),
2261                                          add_target ? add_target : accum_target);
2262                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2263                   break;
2264
2265                 case alg_add_t2_m:
2266                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2267                                         build_int_2 (log, 0), shift_subtarget,
2268                                         0);
2269                   accum = force_operand (gen_rtx (PLUS, mode, accum, op0),
2270                                          add_target ? add_target : accum_target);
2271                   val_so_far = (val_so_far << log) + 1;
2272                   break;
2273
2274                 case alg_sub_t2_m:
2275                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2276                                         build_int_2 (log, 0), shift_subtarget,
2277                                         0);
2278                   accum = force_operand (gen_rtx (MINUS, mode, accum, op0),
2279                                          add_target ? add_target : accum_target);
2280                   val_so_far = (val_so_far << log) - 1;
2281                   break;
2282
2283                 case alg_add_factor:
2284                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2285                                       build_int_2 (log, 0), NULL_RTX, 0);
2286                   accum = force_operand (gen_rtx (PLUS, mode, accum, tem),
2287                                          add_target ? add_target : accum_target);
2288                   val_so_far += val_so_far << log;
2289                   break;
2290
2291                 case alg_sub_factor:
2292                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2293                                       build_int_2 (log, 0), NULL_RTX, 0);
2294                   accum = force_operand (gen_rtx (MINUS, mode, tem, accum),
2295                                          (add_target ? add_target
2296                                           : preserve ? 0 : tem));
2297                   val_so_far = (val_so_far << log) - val_so_far;
2298                   break;
2299
2300                 default:
2301                   abort ();;
2302                 }
2303
2304               /* Write a REG_EQUAL note on the last insn so that we can cse
2305                  multiplication sequences.  */
2306
2307               insn = get_last_insn ();
2308               REG_NOTES (insn)
2309                 = gen_rtx (EXPR_LIST, REG_EQUAL,
2310                            gen_rtx (MULT, mode, op0, GEN_INT (val_so_far)),
2311                            REG_NOTES (insn));
2312             }
2313
2314           if (variant == negate_variant)
2315             {
2316               val_so_far = - val_so_far;
2317               accum = expand_unop (mode, neg_optab, accum, target, 0);
2318             }
2319           else if (variant == add_variant)
2320             {
2321               val_so_far = val_so_far + 1;
2322               accum = force_operand (gen_rtx (PLUS, mode, accum, op0), target);
2323             }
2324
2325           if (val != val_so_far)
2326             abort ();
2327
2328           return accum;
2329         }
2330     }
2331
2332   /* This used to use umul_optab if unsigned, but for non-widening multiply
2333      there is no difference between signed and unsigned.  */
2334   op0 = expand_binop (mode, smul_optab,
2335                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2336   if (op0 == 0)
2337     abort ();
2338   return op0;
2339 }
2340 \f
2341 /* Return the smallest n such that 2**n >= X.  */
2342
2343 int
2344 ceil_log2 (x)
2345      unsigned HOST_WIDE_INT x;
2346 {
2347   return floor_log2 (x - 1) + 1;
2348 }
2349
2350 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2351    replace division by D, and put the least significant N bits of the result
2352    in *MULTIPLIER_PTR and return the most significant bit.
2353
2354    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2355    needed precision is in PRECISION (should be <= N).
2356
2357    PRECISION should be as small as possible so this function can choose
2358    multiplier more freely.
2359
2360    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2361    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2362
2363    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2364    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2365
2366 static
2367 unsigned HOST_WIDE_INT
2368 choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
2369      unsigned HOST_WIDE_INT d;
2370      int n;
2371      int precision;
2372      unsigned HOST_WIDE_INT *multiplier_ptr;
2373      int *post_shift_ptr;
2374      int *lgup_ptr;
2375 {
2376   unsigned HOST_WIDE_INT mhigh_hi, mhigh_lo;
2377   unsigned HOST_WIDE_INT mlow_hi, mlow_lo;
2378   int lgup, post_shift;
2379   int pow, pow2;
2380   unsigned HOST_WIDE_INT nh, nl, dummy1, dummy2;
2381
2382   /* lgup = ceil(log2(divisor)); */
2383   lgup = ceil_log2 (d);
2384
2385   if (lgup > n)
2386     abort ();
2387
2388   pow = n + lgup;
2389   pow2 = n + lgup - precision;
2390
2391   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2392     {
2393       /* We could handle this with some effort, but this case is much better
2394          handled directly with a scc insn, so rely on caller using that.  */
2395       abort ();
2396     }
2397
2398   /* mlow = 2^(N + lgup)/d */
2399  if (pow >= HOST_BITS_PER_WIDE_INT)
2400     {
2401       nh = (unsigned HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2402       nl = 0;
2403     }
2404   else
2405     {
2406       nh = 0;
2407       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2408     }
2409   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2410                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2411
2412   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2413   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2414     nh |= (unsigned HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2415   else
2416     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2417   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2418                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2419
2420   if (mhigh_hi && nh - d >= d)
2421     abort ();
2422   if (mhigh_hi > 1 || mlow_hi > 1)
2423     abort ();
2424   /* assert that mlow < mhigh.  */
2425   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2426     abort();
2427
2428   /* If precision == N, then mlow, mhigh exceed 2^N
2429      (but they do not exceed 2^(N+1)).  */
2430
2431   /* Reduce to lowest terms */
2432   for (post_shift = lgup; post_shift > 0; post_shift--)
2433     {
2434       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2435       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2436       if (ml_lo >= mh_lo)
2437         break;
2438
2439       mlow_hi = 0;
2440       mlow_lo = ml_lo;
2441       mhigh_hi = 0;
2442       mhigh_lo = mh_lo;
2443     }
2444
2445   *post_shift_ptr = post_shift;
2446   *lgup_ptr = lgup;
2447   if (n < HOST_BITS_PER_WIDE_INT)
2448     {
2449       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2450       *multiplier_ptr = mhigh_lo & mask;
2451       return mhigh_lo >= mask;
2452     }
2453   else
2454     {
2455       *multiplier_ptr = mhigh_lo;
2456       return mhigh_hi;
2457     }
2458 }
2459
2460 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2461    congruent to 1 (mod 2**N).  */
2462
2463 static unsigned HOST_WIDE_INT
2464 invert_mod2n (x, n)
2465      unsigned HOST_WIDE_INT x;
2466      int n;
2467 {
2468   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y. */
2469
2470   /* The algorithm notes that the choice y = x satisfies
2471      x*y == 1 mod 2^3, since x is assumed odd.
2472      Each iteration doubles the number of bits of significance in y.  */
2473
2474   unsigned HOST_WIDE_INT mask;
2475   unsigned HOST_WIDE_INT y = x;
2476   int nbit = 3;
2477
2478   mask = (n == HOST_BITS_PER_WIDE_INT
2479           ? ~(unsigned HOST_WIDE_INT) 0
2480           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2481
2482   while (nbit < n)
2483     {
2484       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2485       nbit *= 2;
2486     }
2487   return y;
2488 }
2489
2490 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2491    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2492    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2493    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2494    become signed.
2495
2496    The result is put in TARGET if that is convenient.
2497
2498    MODE is the mode of operation.  */
2499
2500 rtx
2501 expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
2502      enum machine_mode mode;
2503      register rtx adj_operand, op0, op1, target;
2504      int unsignedp;
2505 {
2506   rtx tem;
2507   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2508
2509   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2510                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2511                       NULL_RTX, 0);
2512   tem = expand_and (tem, op1, NULL_RTX);
2513   adj_operand = force_operand (gen_rtx (adj_code, mode, adj_operand, tem),
2514                                adj_operand);
2515
2516   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2517                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2518                       NULL_RTX, 0);
2519   tem = expand_and (tem, op0, NULL_RTX);
2520   target = force_operand (gen_rtx (adj_code, mode, adj_operand, tem), target);
2521
2522   return target;
2523 }
2524
2525 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2526    in TARGET if that is convenient, and return where the result is.  If the
2527    operation can not be performed, 0 is returned.
2528
2529    MODE is the mode of operation and result.
2530
2531    UNSIGNEDP nonzero means unsigned multiply.
2532
2533    MAX_COST is the total allowed cost for the expanded RTL.  */
2534
2535 rtx
2536 expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
2537      enum machine_mode mode;
2538      register rtx op0, target;
2539      unsigned HOST_WIDE_INT cnst1;
2540      int unsignedp;
2541      int max_cost;
2542 {
2543   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2544   optab mul_highpart_optab;
2545   optab moptab;
2546   rtx tem;
2547   int size = GET_MODE_BITSIZE (mode);
2548   rtx op1, wide_op1;
2549
2550   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2551   if (size > HOST_BITS_PER_WIDE_INT)
2552     abort ();
2553
2554   op1 = GEN_INT (cnst1);
2555
2556   if (GET_MODE_BITSIZE (wider_mode) <= HOST_BITS_PER_INT)
2557     wide_op1 = op1;
2558   else
2559     wide_op1
2560       = immed_double_const (cnst1,
2561                             (unsignedp
2562                              ? (HOST_WIDE_INT) 0
2563                              : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2564                             wider_mode);
2565
2566   /* expand_mult handles constant multiplication of word_mode
2567      or narrower.  It does a poor job for large modes.  */
2568   if (size < BITS_PER_WORD
2569       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2570     {
2571       /* We have to do this, since expand_binop doesn't do conversion for
2572          multiply.  Maybe change expand_binop to handle widening multiply?  */
2573       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2574
2575       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, unsignedp);
2576       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2577                           build_int_2 (size, 0), NULL_RTX, 1);
2578       return convert_modes (mode, wider_mode, tem, unsignedp);
2579     }
2580
2581   if (target == 0)
2582     target = gen_reg_rtx (mode);
2583
2584   /* Firstly, try using a multiplication insn that only generates the needed
2585      high part of the product, and in the sign flavor of unsignedp.  */
2586   if (mul_highpart_cost[(int) mode] < max_cost)
2587     {
2588       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2589       target = expand_binop (mode, mul_highpart_optab,
2590                              op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2591       if (target)
2592         return target;
2593     }
2594
2595   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2596      Need to adjust the result after the multiplication.  */
2597   if (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost < max_cost)
2598     {
2599       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2600       target = expand_binop (mode, mul_highpart_optab,
2601                              op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2602       if (target)
2603         /* We used the wrong signedness.  Adjust the result.  */
2604         return expand_mult_highpart_adjust (mode, target, op0,
2605                                             op1, target, unsignedp);
2606     }
2607
2608   /* Try widening multiplication.  */
2609   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2610   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2611       && mul_widen_cost[(int) wider_mode] < max_cost)
2612     goto try;
2613
2614   /* Try widening the mode and perform a non-widening multiplication.  */
2615   moptab = smul_optab;
2616   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2617       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2618     goto try;
2619
2620   /* Try widening multiplication of opposite signedness, and adjust.  */
2621   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2622   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2623       && (mul_widen_cost[(int) wider_mode]
2624           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2625     {
2626       tem = expand_binop (wider_mode, moptab, op0, wide_op1,
2627                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2628       if (tem != 0)
2629         {
2630           /* Extract the high half of the just generated product.  */
2631           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2632                               build_int_2 (size, 0), NULL_RTX, 1);
2633           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2634           /* We used the wrong signedness.  Adjust the result.  */
2635           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2636                                               target, unsignedp);
2637         }
2638     }
2639
2640   return 0;
2641
2642  try:
2643   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2644   tem = expand_binop (wider_mode, moptab, op0, wide_op1,
2645                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2646   if (tem == 0)
2647     return 0;
2648
2649   /* Extract the high half of the just generated product.  */
2650   tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2651                       build_int_2 (size, 0), NULL_RTX, 1);
2652   return convert_modes (mode, wider_mode, tem, unsignedp);
2653 }
2654 \f
2655 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2656    if that is convenient, and returning where the result is.
2657    You may request either the quotient or the remainder as the result;
2658    specify REM_FLAG nonzero to get the remainder.
2659
2660    CODE is the expression code for which kind of division this is;
2661    it controls how rounding is done.  MODE is the machine mode to use.
2662    UNSIGNEDP nonzero means do unsigned division.  */
2663
2664 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2665    and then correct it by or'ing in missing high bits
2666    if result of ANDI is nonzero.
2667    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2668    This could optimize to a bfexts instruction.
2669    But C doesn't use these operations, so their optimizations are
2670    left for later.  */
2671
2672 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2673
2674 rtx
2675 expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
2676      int rem_flag;
2677      enum tree_code code;
2678      enum machine_mode mode;
2679      register rtx op0, op1, target;
2680      int unsignedp;
2681 {
2682   enum machine_mode compute_mode;
2683   register rtx tquotient;
2684   rtx quotient = 0, remainder = 0;
2685   rtx last;
2686   int size;
2687   rtx insn, set;
2688   optab optab1, optab2;
2689   int op1_is_constant, op1_is_pow2;
2690   int max_cost, extra_cost;
2691
2692   op1_is_constant = GET_CODE (op1) == CONST_INT;
2693   op1_is_pow2 = (op1_is_constant
2694                  && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2695                       || EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))));
2696
2697   /*
2698      This is the structure of expand_divmod:
2699
2700      First comes code to fix up the operands so we can perform the operations
2701      correctly and efficiently.
2702
2703      Second comes a switch statement with code specific for each rounding mode.
2704      For some special operands this code emits all RTL for the desired
2705      operation, for other cases, it generates only a quotient and stores it in
2706      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2707      to indicate that it has not done anything.
2708
2709      Last comes code that finishes the operation.  If QUOTIENT is set and
2710      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2711      QUOTIENT is not set, it is computed using trunc rounding.
2712
2713      We try to generate special code for division and remainder when OP1 is a
2714      constant.  If |OP1| = 2**n we can use shifts and some other fast
2715      operations.  For other values of OP1, we compute a carefully selected
2716      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2717      by m.
2718
2719      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2720      half of the product.  Different strategies for generating the product are
2721      implemented in expand_mult_highpart.
2722
2723      If what we actually want is the remainder, we generate that by another
2724      by-constant multiplication and a subtraction.  */
2725
2726   /* We shouldn't be called with OP1 == const1_rtx, but some of the
2727      code below will malfunction if we are, so check here and handle
2728      the special case if so.  */
2729   if (op1 == const1_rtx)
2730     return rem_flag ? const0_rtx : op0;
2731
2732   if (target
2733       /* Don't use the function value register as a target
2734          since we have to read it as well as write it,
2735          and function-inlining gets confused by this.  */
2736       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
2737           /* Don't clobber an operand while doing a multi-step calculation.  */
2738           || ((rem_flag || op1_is_constant)
2739               && (reg_mentioned_p (target, op0)
2740                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
2741           || reg_mentioned_p (target, op1)
2742           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
2743     target = 0;
2744
2745   /* Get the mode in which to perform this computation.  Normally it will
2746      be MODE, but sometimes we can't do the desired operation in MODE.
2747      If so, pick a wider mode in which we can do the operation.  Convert
2748      to that mode at the start to avoid repeated conversions.
2749
2750      First see what operations we need.  These depend on the expression
2751      we are evaluating.  (We assume that divxx3 insns exist under the
2752      same conditions that modxx3 insns and that these insns don't normally
2753      fail.  If these assumptions are not correct, we may generate less
2754      efficient code in some cases.)
2755
2756      Then see if we find a mode in which we can open-code that operation
2757      (either a division, modulus, or shift).  Finally, check for the smallest
2758      mode for which we can do the operation with a library call.  */
2759
2760   /* We might want to refine this now that we have division-by-constant
2761      optimization.  Since expand_mult_highpart tries so many variants, it is
2762      not straightforward to generalize this.  Maybe we should make an array
2763      of possible modes in init_expmed?  Save this for GCC 2.7.  */
2764
2765   optab1 = (op1_is_pow2 ? (unsignedp ? lshr_optab : ashr_optab)
2766             : (unsignedp ? udiv_optab : sdiv_optab));
2767   optab2 = (op1_is_pow2 ? optab1 : (unsignedp ? udivmod_optab : sdivmod_optab));
2768
2769   for (compute_mode = mode; compute_mode != VOIDmode;
2770        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
2771     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
2772         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
2773       break;
2774
2775   if (compute_mode == VOIDmode)
2776     for (compute_mode = mode; compute_mode != VOIDmode;
2777          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
2778       if (optab1->handlers[(int) compute_mode].libfunc
2779           || optab2->handlers[(int) compute_mode].libfunc)
2780         break;
2781
2782   /* If we still couldn't find a mode, use MODE, but we'll probably abort
2783      in expand_binop.  */
2784   if (compute_mode == VOIDmode)
2785     compute_mode = mode;
2786
2787   if (target && GET_MODE (target) == compute_mode)
2788     tquotient = target;
2789   else
2790     tquotient = gen_reg_rtx (compute_mode);
2791
2792   size = GET_MODE_BITSIZE (compute_mode);
2793 #if 0
2794   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
2795      (mode), and thereby get better code when OP1 is a constant.  Do that
2796      later.  It will require going over all usages of SIZE below.  */
2797   size = GET_MODE_BITSIZE (mode);
2798 #endif
2799
2800   max_cost = div_cost[(int) compute_mode]
2801     - (rem_flag ? mul_cost[(int) compute_mode] + add_cost : 0);
2802
2803   /* Now convert to the best mode to use.  */
2804   if (compute_mode != mode)
2805     {
2806       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
2807       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
2808     }
2809
2810   /* If one of the operands is a volatile MEM, copy it into a register.  */
2811
2812   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
2813     op0 = force_reg (compute_mode, op0);
2814   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
2815     op1 = force_reg (compute_mode, op1);
2816
2817   /* If we need the remainder or if OP1 is constant, we need to
2818      put OP0 in a register in case it has any queued subexpressions.  */
2819   if (rem_flag || op1_is_constant)
2820     op0 = force_reg (compute_mode, op0);
2821
2822   last = get_last_insn ();
2823
2824   /* Promote floor rounding to trunc rounding for unsigned operations.  */
2825   if (unsignedp)
2826     {
2827       if (code == FLOOR_DIV_EXPR)
2828         code = TRUNC_DIV_EXPR;
2829       if (code == FLOOR_MOD_EXPR)
2830         code = TRUNC_MOD_EXPR;
2831     }
2832
2833   if (op1 != const0_rtx)
2834     switch (code)
2835       {
2836       case TRUNC_MOD_EXPR:
2837       case TRUNC_DIV_EXPR:
2838         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
2839           {
2840             if (unsignedp)
2841               {
2842                 unsigned HOST_WIDE_INT mh, ml;
2843                 int pre_shift, post_shift;
2844                 int dummy;
2845                 unsigned HOST_WIDE_INT d = INTVAL (op1);
2846
2847                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
2848                   {
2849                     pre_shift = floor_log2 (d);
2850                     if (rem_flag)
2851                       {
2852                         remainder = expand_binop (compute_mode, and_optab, op0,
2853                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
2854                                                   remainder, 1,
2855                                                   OPTAB_LIB_WIDEN);
2856                         if (remainder)
2857                           return gen_lowpart (mode, remainder);
2858                       }
2859                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
2860                                              build_int_2 (pre_shift, 0),
2861                                              tquotient, 1);
2862                   }
2863                 else if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
2864                   {
2865                     /* Most significant bit of divisor is set, emit a scc insn.
2866                        emit_store_flag needs to be passed a place for the
2867                        result.  */
2868                     quotient = emit_store_flag (tquotient, GEU, op0, op1,
2869                                                 compute_mode, 1, 1);
2870                     if (quotient == 0)
2871                       goto fail1;
2872                   }
2873                 else
2874                   {
2875                     /* Find a suitable multiplier and right shift count instead
2876                        of multiplying with D.  */
2877
2878                     mh = choose_multiplier (d, size, size,
2879                                             &ml, &post_shift, &dummy);
2880
2881                     /* If the suggested multiplier is more than SIZE bits, we
2882                        can do better for even divisors, using an initial right
2883                        shift.  */
2884                     if (mh != 0 && (d & 1) == 0)
2885                       {
2886                         pre_shift = floor_log2 (d & -d);
2887                         mh = choose_multiplier (d >> pre_shift, size,
2888                                                 size - pre_shift,
2889                                                 &ml, &post_shift, &dummy);
2890                         if (mh)
2891                           abort ();
2892                       }
2893                     else
2894                       pre_shift = 0;
2895
2896                     if (mh != 0)
2897                       {
2898                         rtx t1, t2, t3, t4;
2899
2900                         extra_cost = (shift_cost[post_shift - 1]
2901                                       + shift_cost[1] + 2 * add_cost);
2902                         t1 = expand_mult_highpart (compute_mode, op0, ml,
2903                                                    NULL_RTX, 1,
2904                                                    max_cost - extra_cost);
2905                         if (t1 == 0)
2906                           goto fail1;
2907                         t2 = force_operand (gen_rtx (MINUS, compute_mode,
2908                                                      op0, t1),
2909                                             NULL_RTX);
2910                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
2911                                            build_int_2 (1, 0), NULL_RTX, 1);
2912                         t4 = force_operand (gen_rtx (PLUS, compute_mode,
2913                                                      t1, t3),
2914                                             NULL_RTX);
2915                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t4,
2916                                                  build_int_2 (post_shift - 1,
2917                                                               0),
2918                                                  tquotient, 1);
2919                       }
2920                     else
2921                       {
2922                         rtx t1, t2;
2923
2924                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
2925                                            build_int_2 (pre_shift, 0),
2926                                            NULL_RTX, 1);
2927                         extra_cost = (shift_cost[pre_shift]
2928                                       + shift_cost[post_shift]);
2929                         t2 = expand_mult_highpart (compute_mode, t1, ml,
2930                                                    NULL_RTX, 1,
2931                                                    max_cost - extra_cost);
2932                         if (t2 == 0)
2933                           goto fail1;
2934                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t2,
2935                                                  build_int_2 (post_shift, 0),
2936                                                  tquotient, 1);
2937                       }
2938                   }
2939
2940                 insn = get_last_insn ();
2941                 if (insn != last
2942                     && (set = single_set (insn)) != 0
2943                     && SET_DEST (set) == quotient)
2944                   REG_NOTES (insn)
2945                     = gen_rtx (EXPR_LIST, REG_EQUAL,
2946                                gen_rtx (UDIV, compute_mode, op0, op1),
2947                                REG_NOTES (insn));
2948               }
2949             else                /* TRUNC_DIV, signed */
2950               {
2951                 unsigned HOST_WIDE_INT ml;
2952                 int lgup, post_shift;
2953                 HOST_WIDE_INT d = INTVAL (op1);
2954                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
2955
2956                 /* n rem d = n rem -d */
2957                 if (rem_flag && d < 0)
2958                   {
2959                     d = abs_d;
2960                     op1 = GEN_INT (abs_d);
2961                   }
2962
2963                 if (d == 1)
2964                   quotient = op0;
2965                 else if (d == -1)
2966                   quotient = expand_unop (compute_mode, neg_optab, op0,
2967                                           tquotient, 0);
2968                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
2969                   {
2970                     /* This case is not handled correctly below.  */
2971                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
2972                                                 compute_mode, 1, 1);
2973                     if (quotient == 0)
2974                       goto fail1;
2975                   }
2976                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
2977                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap))
2978                   ;
2979                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
2980                   {
2981                     lgup = floor_log2 (abs_d);
2982                     if (abs_d != 2 && BRANCH_COST < 3)
2983                       {
2984                         rtx label = gen_label_rtx ();
2985                         rtx t1;
2986
2987                         t1 = copy_to_mode_reg (compute_mode, op0);
2988                         emit_cmp_insn (t1, const0_rtx, GE,
2989                                        NULL_RTX, compute_mode, 0, 0);
2990                         emit_jump_insn (gen_bge (label));
2991                         expand_inc (t1, GEN_INT (abs_d - 1));
2992                         emit_label (label);
2993                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
2994                                                  build_int_2 (lgup, 0),
2995                                                  tquotient, 0);
2996                       }
2997                     else
2998                       {
2999                         rtx t1, t2, t3;
3000                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3001                                            build_int_2 (size - 1, 0),
3002                                            NULL_RTX, 0);
3003                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3004                                            build_int_2 (size - lgup, 0),
3005                                            NULL_RTX, 1);
3006                         t3 = force_operand (gen_rtx (PLUS, compute_mode,
3007                                                      op0, t2),
3008                                             NULL_RTX);
3009                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3010                                                  build_int_2 (lgup, 0),
3011                                                  tquotient, 0);
3012                       }
3013
3014                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3015                        the quotient.  */
3016                     if (d < 0)
3017                       {
3018                         insn = get_last_insn ();
3019                         if (insn != last
3020                             && (set = single_set (insn)) != 0
3021                             && SET_DEST (set) == quotient)
3022                           REG_NOTES (insn)
3023                             = gen_rtx (EXPR_LIST, REG_EQUAL,
3024                                        gen_rtx (DIV, compute_mode, op0,
3025                                                 GEN_INT (abs_d)),
3026                                        REG_NOTES (insn));
3027
3028                         quotient = expand_unop (compute_mode, neg_optab,
3029                                                 quotient, quotient, 0);
3030                       }
3031                   }
3032                 else
3033                   {
3034                     choose_multiplier (abs_d, size, size - 1,
3035                                        &ml, &post_shift, &lgup);
3036                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3037                       {
3038                         rtx t1, t2, t3;
3039
3040                         extra_cost = (shift_cost[post_shift]
3041                                       + shift_cost[size - 1] + add_cost);
3042                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3043                                                    NULL_RTX, 0,
3044                                                    max_cost - extra_cost);
3045                         if (t1 == 0)
3046                           goto fail1;
3047                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3048                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3049                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3050                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3051                         if (d < 0)
3052                           quotient = force_operand (gen_rtx (MINUS, compute_mode, t3, t2),
3053                                                     tquotient);
3054                         else
3055                           quotient = force_operand (gen_rtx (MINUS, compute_mode, t2, t3),
3056                                                     tquotient);
3057                       }
3058                     else
3059                       {
3060                         rtx t1, t2, t3, t4;
3061
3062                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3063                         extra_cost = (shift_cost[post_shift]
3064                                       + shift_cost[size - 1] + 2 * add_cost);
3065                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3066                                                    NULL_RTX, 0,
3067                                                    max_cost - extra_cost);
3068                         if (t1 == 0)
3069                           goto fail1;
3070                         t2 = force_operand (gen_rtx (PLUS, compute_mode, t1, op0),
3071                                             NULL_RTX);
3072                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3073                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3074                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3075                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3076                         if (d < 0)
3077                           quotient = force_operand (gen_rtx (MINUS, compute_mode, t4, t3),
3078                                                     tquotient);
3079                         else
3080                           quotient = force_operand (gen_rtx (MINUS, compute_mode, t3, t4),
3081                                                     tquotient);
3082                       }
3083                   }
3084
3085                 insn = get_last_insn ();
3086                 if (insn != last
3087                     && (set = single_set (insn)) != 0
3088                     && SET_DEST (set) == quotient)
3089                   REG_NOTES (insn)
3090                     = gen_rtx (EXPR_LIST, REG_EQUAL,
3091                                gen_rtx (DIV, compute_mode, op0, op1),
3092                                REG_NOTES (insn));
3093               }
3094             break;
3095           }
3096       fail1:
3097         delete_insns_since (last);
3098         break;
3099
3100       case FLOOR_DIV_EXPR:
3101       case FLOOR_MOD_EXPR:
3102       /* We will come here only for signed operations.  */
3103         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3104           {
3105             unsigned HOST_WIDE_INT mh, ml;
3106             int pre_shift, lgup, post_shift;
3107             HOST_WIDE_INT d = INTVAL (op1);
3108
3109             if (d > 0)
3110               {
3111                 /* We could just as easily deal with negative constants here,
3112                    but it does not seem worth the trouble for GCC 2.6.  */
3113                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3114                   {
3115                     pre_shift = floor_log2 (d);
3116                     if (rem_flag)
3117                       {
3118                         remainder = expand_binop (compute_mode, and_optab, op0,
3119                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3120                                                   remainder, 0, OPTAB_LIB_WIDEN);
3121                         if (remainder)
3122                           return gen_lowpart (mode, remainder);
3123                       }
3124                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3125                                              build_int_2 (pre_shift, 0),
3126                                              tquotient, 0);
3127                   }
3128                 else
3129                   {
3130                     rtx t1, t2, t3, t4;
3131
3132                     mh = choose_multiplier (d, size, size - 1,
3133                                             &ml, &post_shift, &lgup);
3134                     if (mh)
3135                       abort ();
3136
3137                     t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3138                                        build_int_2 (size - 1, 0), NULL_RTX, 0);
3139                     t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3140                                        NULL_RTX, 0, OPTAB_WIDEN);
3141                     extra_cost = (shift_cost[post_shift]
3142                                   + shift_cost[size - 1] + 2 * add_cost);
3143                     t3 = expand_mult_highpart (compute_mode, t2, ml,
3144                                                NULL_RTX, 1,
3145                                                max_cost - extra_cost);
3146                     if (t3 != 0)
3147                       {
3148                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3149                                            build_int_2 (post_shift, 0),
3150                                            NULL_RTX, 1);
3151                         quotient = expand_binop (compute_mode, xor_optab,
3152                                                  t4, t1, tquotient, 0,
3153                                                  OPTAB_WIDEN);
3154                       }
3155                   }
3156               }
3157             else
3158               {
3159                 rtx nsign, t1, t2, t3, t4;
3160                 t1 = force_operand (gen_rtx (PLUS, compute_mode,
3161                                              op0, constm1_rtx), NULL_RTX);
3162                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3163                                    0, OPTAB_WIDEN);
3164                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3165                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3166                 t3 = force_operand (gen_rtx (MINUS, compute_mode, t1, nsign),
3167                                     NULL_RTX);
3168                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3169                                     NULL_RTX, 0);
3170                 if (t4)
3171                   {
3172                     rtx t5;
3173                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3174                                       NULL_RTX, 0);
3175                     quotient = force_operand (gen_rtx (PLUS, compute_mode,
3176                                                        t4, t5),
3177                                               tquotient);
3178                   }
3179               }
3180           }
3181
3182         if (quotient != 0)
3183           break;
3184         delete_insns_since (last);
3185
3186         /* Try using an instruction that produces both the quotient and
3187            remainder, using truncation.  We can easily compensate the quotient
3188            or remainder to get floor rounding, once we have the remainder.
3189            Notice that we compute also the final remainder value here,
3190            and return the result right away.  */
3191         if (target == 0 || GET_MODE (target) != compute_mode)
3192           target = gen_reg_rtx (compute_mode);
3193
3194         if (rem_flag)
3195           {
3196             remainder
3197               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3198             quotient = gen_reg_rtx (compute_mode);
3199           }
3200         else
3201           {
3202             quotient
3203               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3204             remainder = gen_reg_rtx (compute_mode);
3205           }
3206
3207         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3208                                  quotient, remainder, 0))
3209           {
3210             /* This could be computed with a branch-less sequence.
3211                Save that for later.  */
3212             rtx tem;
3213             rtx label = gen_label_rtx ();
3214             emit_cmp_insn (remainder, const0_rtx, EQ, NULL_RTX,
3215                            compute_mode, 0, 0);
3216             emit_jump_insn (gen_beq (label));
3217             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3218                                 NULL_RTX, 0, OPTAB_WIDEN);
3219             emit_cmp_insn (tem, const0_rtx, GE, NULL_RTX, compute_mode, 0, 0);
3220             emit_jump_insn (gen_bge (label));
3221             expand_dec (quotient, const1_rtx);
3222             expand_inc (remainder, op1);
3223             emit_label (label);
3224             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3225           }
3226
3227         /* No luck with division elimination or divmod.  Have to do it
3228            by conditionally adjusting op0 *and* the result.  */
3229         {
3230           rtx label1, label2, label3, label4, label5;
3231           rtx adjusted_op0;
3232           rtx tem;
3233
3234           quotient = gen_reg_rtx (compute_mode);
3235           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3236           label1 = gen_label_rtx ();
3237           label2 = gen_label_rtx ();
3238           label3 = gen_label_rtx ();
3239           label4 = gen_label_rtx ();
3240           label5 = gen_label_rtx ();
3241           emit_cmp_insn (op1, const0_rtx, LT, NULL_RTX, compute_mode, 0, 0);
3242           emit_jump_insn (gen_blt (label2));
3243           emit_cmp_insn (adjusted_op0, const0_rtx, LT, NULL_RTX,
3244                          compute_mode, 0, 0);
3245           emit_jump_insn (gen_blt (label1));
3246           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3247                               quotient, 0, OPTAB_LIB_WIDEN);
3248           if (tem != quotient)
3249             emit_move_insn (quotient, tem);
3250           emit_jump_insn (gen_jump (label5));
3251           emit_barrier ();
3252           emit_label (label1);
3253           expand_inc (adjusted_op0, const1_rtx);
3254           emit_jump_insn (gen_jump (label4));
3255           emit_barrier ();
3256           emit_label (label2);
3257           emit_cmp_insn (adjusted_op0, const0_rtx, GT, NULL_RTX,
3258                          compute_mode, 0, 0);
3259           emit_jump_insn (gen_bgt (label3));
3260           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3261                               quotient, 0, OPTAB_LIB_WIDEN);
3262           if (tem != quotient)
3263             emit_move_insn (quotient, tem);
3264           emit_jump_insn (gen_jump (label5));
3265           emit_barrier ();
3266           emit_label (label3);
3267           expand_dec (adjusted_op0, const1_rtx);
3268           emit_label (label4);
3269           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3270                               quotient, 0, OPTAB_LIB_WIDEN);
3271           if (tem != quotient)
3272             emit_move_insn (quotient, tem);
3273           expand_dec (quotient, const1_rtx);
3274           emit_label (label5);
3275         }
3276         break;
3277
3278       case CEIL_DIV_EXPR:
3279       case CEIL_MOD_EXPR:
3280         if (unsignedp)
3281           {
3282             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3283               {
3284                 rtx t1, t2, t3;
3285                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3286                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3287                                    build_int_2 (floor_log2 (d), 0),
3288                                    tquotient, 1);
3289                 t2 = expand_binop (compute_mode, and_optab, op0,
3290                                    GEN_INT (d - 1),
3291                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3292                 t3 = gen_reg_rtx (compute_mode);
3293                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3294                                       compute_mode, 1, 1);
3295                 if (t3 == 0)
3296                   {
3297                     rtx lab;
3298                     lab = gen_label_rtx ();
3299                     emit_cmp_insn (t2, const0_rtx, EQ, NULL_RTX,
3300                                    compute_mode, 0, 0);
3301                     emit_jump_insn (gen_beq (lab));
3302                     expand_inc (t1, const1_rtx);
3303                     emit_label (lab);
3304                     quotient = t1;
3305                   }
3306                 else
3307                   quotient = force_operand (gen_rtx (PLUS, compute_mode,
3308                                                      t1, t3),
3309                                             tquotient);
3310                 break;
3311               }
3312
3313             /* Try using an instruction that produces both the quotient and
3314                remainder, using truncation.  We can easily compensate the
3315                quotient or remainder to get ceiling rounding, once we have the
3316                remainder.  Notice that we compute also the final remainder
3317                value here, and return the result right away.  */
3318             if (target == 0 || GET_MODE (target) != compute_mode)
3319               target = gen_reg_rtx (compute_mode);
3320
3321             if (rem_flag)
3322               {
3323                 remainder = (GET_CODE (target) == REG
3324                              ? target : gen_reg_rtx (compute_mode));
3325                 quotient = gen_reg_rtx (compute_mode);
3326               }
3327             else
3328               {
3329                 quotient = (GET_CODE (target) == REG
3330                             ? target : gen_reg_rtx (compute_mode));
3331                 remainder = gen_reg_rtx (compute_mode);
3332               }
3333
3334             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3335                                      remainder, 1))
3336               {
3337                 /* This could be computed with a branch-less sequence.
3338                    Save that for later.  */
3339                 rtx label = gen_label_rtx ();
3340                 emit_cmp_insn (remainder, const0_rtx, EQ, NULL_RTX,
3341                                compute_mode, 0, 0);
3342                 emit_jump_insn (gen_beq (label));
3343                 expand_inc (quotient, const1_rtx);
3344                 expand_dec (remainder, op1);
3345                 emit_label (label);
3346                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3347               }
3348
3349             /* No luck with division elimination or divmod.  Have to do it
3350                by conditionally adjusting op0 *and* the result.  */
3351             {
3352               rtx label1, label2;
3353               rtx adjusted_op0, tem;
3354
3355               quotient = gen_reg_rtx (compute_mode);
3356               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3357               label1 = gen_label_rtx ();
3358               label2 = gen_label_rtx ();
3359               emit_cmp_insn (adjusted_op0, const0_rtx, NE, NULL_RTX,
3360                              compute_mode, 0, 0);
3361               emit_jump_insn (gen_bne (label1));
3362               emit_move_insn  (quotient, const0_rtx);
3363               emit_jump_insn (gen_jump (label2));
3364               emit_barrier ();
3365               emit_label (label1);
3366               expand_dec (adjusted_op0, const1_rtx);
3367               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3368                                   quotient, 1, OPTAB_LIB_WIDEN);
3369               if (tem != quotient)
3370                 emit_move_insn (quotient, tem);
3371               expand_inc (quotient, const1_rtx);
3372               emit_label (label2);
3373             }
3374           }
3375         else /* signed */
3376           {
3377             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3378                 && INTVAL (op1) >= 0)
3379               {
3380                 /* This is extremely similar to the code for the unsigned case
3381                    above.  For 2.7 we should merge these variants, but for
3382                    2.6.1 I don't want to touch the code for unsigned since that
3383                    get used in C.  The signed case will only be used by other
3384                    languages (Ada).  */
3385
3386                 rtx t1, t2, t3;
3387                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3388                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3389                                    build_int_2 (floor_log2 (d), 0),
3390                                    tquotient, 0);
3391                 t2 = expand_binop (compute_mode, and_optab, op0,
3392                                    GEN_INT (d - 1),
3393                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3394                 t3 = gen_reg_rtx (compute_mode);
3395                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3396                                       compute_mode, 1, 1);
3397                 if (t3 == 0)
3398                   {
3399                     rtx lab;
3400                     lab = gen_label_rtx ();
3401                     emit_cmp_insn (t2, const0_rtx, EQ, NULL_RTX,
3402                                    compute_mode, 0, 0);
3403                     emit_jump_insn (gen_beq (lab));
3404                     expand_inc (t1, const1_rtx);
3405                     emit_label (lab);
3406                     quotient = t1;
3407                   }
3408                 else
3409                   quotient = force_operand (gen_rtx (PLUS, compute_mode,
3410                                                      t1, t3),
3411                                             tquotient);
3412                 break;
3413               }
3414
3415             /* Try using an instruction that produces both the quotient and
3416                remainder, using truncation.  We can easily compensate the
3417                quotient or remainder to get ceiling rounding, once we have the
3418                remainder.  Notice that we compute also the final remainder
3419                value here, and return the result right away.  */
3420             if (target == 0 || GET_MODE (target) != compute_mode)
3421               target = gen_reg_rtx (compute_mode);
3422             if (rem_flag)
3423               {
3424                 remainder= (GET_CODE (target) == REG
3425                             ? target : gen_reg_rtx (compute_mode));
3426                 quotient = gen_reg_rtx (compute_mode);
3427               }
3428             else
3429               {
3430                 quotient = (GET_CODE (target) == REG
3431                             ? target : gen_reg_rtx (compute_mode));
3432                 remainder = gen_reg_rtx (compute_mode);
3433               }
3434
3435             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3436                                      remainder, 0))
3437               {
3438                 /* This could be computed with a branch-less sequence.
3439                    Save that for later.  */
3440                 rtx tem;
3441                 rtx label = gen_label_rtx ();
3442                 emit_cmp_insn (remainder, const0_rtx, EQ, NULL_RTX,
3443                                compute_mode, 0, 0);
3444                 emit_jump_insn (gen_beq (label));
3445                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3446                                     NULL_RTX, 0, OPTAB_WIDEN);
3447                 emit_cmp_insn (tem, const0_rtx, LT, NULL_RTX,
3448                                compute_mode, 0, 0);
3449                 emit_jump_insn (gen_blt (label));
3450                 expand_inc (quotient, const1_rtx);
3451                 expand_dec (remainder, op1);
3452                 emit_label (label);
3453                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3454               }
3455
3456             /* No luck with division elimination or divmod.  Have to do it
3457                by conditionally adjusting op0 *and* the result.  */
3458             {
3459               rtx label1, label2, label3, label4, label5;
3460               rtx adjusted_op0;
3461               rtx tem;
3462
3463               quotient = gen_reg_rtx (compute_mode);
3464               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3465               label1 = gen_label_rtx ();
3466               label2 = gen_label_rtx ();
3467               label3 = gen_label_rtx ();
3468               label4 = gen_label_rtx ();
3469               label5 = gen_label_rtx ();
3470               emit_cmp_insn (op1, const0_rtx, LT, NULL_RTX,
3471                              compute_mode, 0, 0);
3472               emit_jump_insn (gen_blt (label2));
3473               emit_cmp_insn (adjusted_op0, const0_rtx, GT, NULL_RTX,
3474                              compute_mode, 0, 0);
3475               emit_jump_insn (gen_bgt (label1));
3476               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3477                                   quotient, 0, OPTAB_LIB_WIDEN);
3478               if (tem != quotient)
3479                 emit_move_insn (quotient, tem);
3480               emit_jump_insn (gen_jump (label5));
3481               emit_barrier ();
3482               emit_label (label1);
3483               expand_dec (adjusted_op0, const1_rtx);
3484               emit_jump_insn (gen_jump (label4));
3485               emit_barrier ();
3486               emit_label (label2);
3487               emit_cmp_insn (adjusted_op0, const0_rtx, LT, NULL_RTX,
3488                              compute_mode, 0, 0);
3489               emit_jump_insn (gen_blt (label3));
3490               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3491                                   quotient, 0, OPTAB_LIB_WIDEN);
3492               if (tem != quotient)
3493                 emit_move_insn (quotient, tem);
3494               emit_jump_insn (gen_jump (label5));
3495               emit_barrier ();
3496               emit_label (label3);
3497               expand_inc (adjusted_op0, const1_rtx);
3498               emit_label (label4);
3499               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3500                                   quotient, 0, OPTAB_LIB_WIDEN);
3501               if (tem != quotient)
3502                 emit_move_insn (quotient, tem);
3503               expand_inc (quotient, const1_rtx);
3504               emit_label (label5);
3505             }
3506           }
3507         break;
3508
3509       case EXACT_DIV_EXPR:
3510         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3511           {
3512             HOST_WIDE_INT d = INTVAL (op1);
3513             unsigned HOST_WIDE_INT ml;
3514             int post_shift;
3515             rtx t1;
3516
3517             post_shift = floor_log2 (d & -d);
3518             ml = invert_mod2n (d >> post_shift, size);
3519             t1 = expand_mult (compute_mode, op0, GEN_INT (ml), NULL_RTX,
3520                               unsignedp);
3521             quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3522                                      build_int_2 (post_shift, 0),
3523                                      NULL_RTX, unsignedp);
3524
3525             insn = get_last_insn ();
3526             REG_NOTES (insn)
3527               = gen_rtx (EXPR_LIST, REG_EQUAL,
3528                          gen_rtx (unsignedp ? UDIV : DIV, compute_mode,
3529                                   op0, op1),
3530                          REG_NOTES (insn));
3531           }
3532         break;
3533
3534       case ROUND_DIV_EXPR:
3535       case ROUND_MOD_EXPR:
3536         if (unsignedp)
3537           {
3538             rtx tem;
3539             rtx label;
3540             label = gen_label_rtx ();
3541             quotient = gen_reg_rtx (compute_mode);
3542             remainder = gen_reg_rtx (compute_mode);
3543             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3544               {
3545                 rtx tem;
3546                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3547                                          quotient, 1, OPTAB_LIB_WIDEN);
3548                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3549                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3550                                           remainder, 1, OPTAB_LIB_WIDEN);
3551               }
3552             tem = plus_constant (op1, -1);
3553             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3554                                 build_int_2 (1, 0), NULL_RTX, 1);
3555             emit_cmp_insn (remainder, tem, LEU, NULL_RTX, compute_mode, 0, 0);
3556             emit_jump_insn (gen_bleu (label));
3557             expand_inc (quotient, const1_rtx);
3558             expand_dec (remainder, op1);
3559             emit_label (label);
3560           }
3561         else
3562           {
3563             rtx abs_rem, abs_op1, tem, mask;
3564             rtx label;
3565             label = gen_label_rtx ();
3566             quotient = gen_reg_rtx (compute_mode);
3567             remainder = gen_reg_rtx (compute_mode);
3568             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3569               {
3570                 rtx tem;
3571                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3572                                          quotient, 0, OPTAB_LIB_WIDEN);
3573                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3574                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3575                                           remainder, 0, OPTAB_LIB_WIDEN);
3576               }
3577             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 0, 0);
3578             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 0, 0);
3579             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3580                                 build_int_2 (1, 0), NULL_RTX, 1);
3581             emit_cmp_insn (tem, abs_op1, LTU, NULL_RTX, compute_mode, 0, 0);
3582             emit_jump_insn (gen_bltu (label));
3583             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3584                                 NULL_RTX, 0, OPTAB_WIDEN);
3585             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3586                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3587             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3588                                 NULL_RTX, 0, OPTAB_WIDEN);
3589             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3590                                 NULL_RTX, 0, OPTAB_WIDEN);
3591             expand_inc (quotient, tem);
3592             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3593                                 NULL_RTX, 0, OPTAB_WIDEN);
3594             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3595                                 NULL_RTX, 0, OPTAB_WIDEN);
3596             expand_dec (remainder, tem);
3597             emit_label (label);
3598           }
3599         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3600       }
3601
3602   if (quotient == 0)
3603     {
3604       if (target && GET_MODE (target) != compute_mode)
3605         target = 0;
3606
3607       if (rem_flag)
3608         {
3609           /* Try to produce the remainder directly without a library call.  */
3610           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3611                                          op0, op1, target,
3612                                          unsignedp, OPTAB_WIDEN);
3613           if (remainder == 0)
3614             {
3615               /* No luck there.  Can we do remainder and divide at once
3616                  without a library call?  */
3617               remainder = gen_reg_rtx (compute_mode);
3618               if (! expand_twoval_binop ((unsignedp
3619                                           ? udivmod_optab
3620                                           : sdivmod_optab),
3621                                          op0, op1,
3622                                          NULL_RTX, remainder, unsignedp))
3623                 remainder = 0;
3624             }
3625
3626           if (remainder)
3627             return gen_lowpart (mode, remainder);
3628         }
3629
3630       /* Produce the quotient.  */
3631       /* Try a quotient insn, but not a library call.  */
3632       quotient = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3633                                     op0, op1, rem_flag ? NULL_RTX : target,
3634                                     unsignedp, OPTAB_WIDEN);
3635       if (quotient == 0)
3636         {
3637           /* No luck there.  Try a quotient-and-remainder insn,
3638              keeping the quotient alone.  */
3639           quotient = gen_reg_rtx (compute_mode);
3640           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3641                                      op0, op1,
3642                                      quotient, NULL_RTX, unsignedp))
3643             {
3644               quotient = 0;
3645               if (! rem_flag)
3646                 /* Still no luck.  If we are not computing the remainder,
3647                    use a library call for the quotient.  */
3648                 quotient = sign_expand_binop (compute_mode,
3649                                               udiv_optab, sdiv_optab,
3650                                               op0, op1, target,
3651                                               unsignedp, OPTAB_LIB_WIDEN);
3652             }
3653         }
3654     }
3655
3656   if (rem_flag)
3657     {
3658       if (target && GET_MODE (target) != compute_mode)
3659         target = 0;
3660
3661       if (quotient == 0)
3662         /* No divide instruction either.  Use library for remainder.  */
3663         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3664                                        op0, op1, target,
3665                                        unsignedp, OPTAB_LIB_WIDEN);
3666       else
3667         {
3668           /* We divided.  Now finish doing X - Y * (X / Y).  */
3669           remainder = expand_mult (compute_mode, quotient, op1,
3670                                    NULL_RTX, unsignedp);
3671           remainder = expand_binop (compute_mode, sub_optab, op0,
3672                                     remainder, target, unsignedp,
3673                                     OPTAB_LIB_WIDEN);
3674         }
3675     }
3676
3677   return gen_lowpart (mode, rem_flag ? remainder : quotient);
3678 }
3679 \f
3680 /* Return a tree node with data type TYPE, describing the value of X.
3681    Usually this is an RTL_EXPR, if there is no obvious better choice.
3682    X may be an expression, however we only support those expressions
3683    generated by loop.c.   */
3684
3685 tree
3686 make_tree (type, x)
3687      tree type;
3688      rtx x;
3689 {
3690   tree t;
3691
3692   switch (GET_CODE (x))
3693     {
3694     case CONST_INT:
3695       t = build_int_2 (INTVAL (x),
3696                        TREE_UNSIGNED (type) || INTVAL (x) >= 0 ? 0 : -1);
3697       TREE_TYPE (t) = type;
3698       return t;
3699
3700     case CONST_DOUBLE:
3701       if (GET_MODE (x) == VOIDmode)
3702         {
3703           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
3704           TREE_TYPE (t) = type;
3705         }
3706       else
3707         {
3708           REAL_VALUE_TYPE d;
3709
3710           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
3711           t = build_real (type, d);
3712         }
3713
3714       return t;
3715
3716     case PLUS:
3717       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3718                           make_tree (type, XEXP (x, 1))));
3719
3720     case MINUS:
3721       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3722                           make_tree (type, XEXP (x, 1))));
3723
3724     case NEG:
3725       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
3726
3727     case MULT:
3728       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
3729                           make_tree (type, XEXP (x, 1))));
3730
3731     case ASHIFT:
3732       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
3733                           make_tree (type, XEXP (x, 1))));
3734
3735     case LSHIFTRT:
3736       return fold (convert (type,
3737                             build (RSHIFT_EXPR, unsigned_type (type),
3738                                    make_tree (unsigned_type (type),
3739                                               XEXP (x, 0)),
3740                                    make_tree (type, XEXP (x, 1)))));
3741
3742     case ASHIFTRT:
3743       return fold (convert (type,
3744                             build (RSHIFT_EXPR, signed_type (type),
3745                                    make_tree (signed_type (type), XEXP (x, 0)),
3746                                    make_tree (type, XEXP (x, 1)))));
3747
3748     case DIV:
3749       if (TREE_CODE (type) != REAL_TYPE)
3750         t = signed_type (type);
3751       else
3752         t = type;
3753
3754       return fold (convert (type,
3755                             build (TRUNC_DIV_EXPR, t,
3756                                    make_tree (t, XEXP (x, 0)),
3757                                    make_tree (t, XEXP (x, 1)))));
3758     case UDIV:
3759       t = unsigned_type (type);
3760       return fold (convert (type,
3761                             build (TRUNC_DIV_EXPR, t,
3762                                    make_tree (t, XEXP (x, 0)),
3763                                    make_tree (t, XEXP (x, 1)))));
3764    default:
3765       t = make_node (RTL_EXPR);
3766       TREE_TYPE (t) = type;
3767       RTL_EXPR_RTL (t) = x;
3768       /* There are no insns to be output
3769          when this rtl_expr is used.  */
3770       RTL_EXPR_SEQUENCE (t) = 0;
3771       return t;
3772     }
3773 }
3774
3775 /* Return an rtx representing the value of X * MULT + ADD.
3776    TARGET is a suggestion for where to store the result (an rtx).
3777    MODE is the machine mode for the computation.
3778    X and MULT must have mode MODE.  ADD may have a different mode.
3779    So can X (defaults to same as MODE).
3780    UNSIGNEDP is non-zero to do unsigned multiplication.
3781    This may emit insns.  */
3782
3783 rtx
3784 expand_mult_add (x, target, mult, add, mode, unsignedp)
3785      rtx x, target, mult, add;
3786      enum machine_mode mode;
3787      int unsignedp;
3788 {
3789   tree type = type_for_mode (mode, unsignedp);
3790   tree add_type = (GET_MODE (add) == VOIDmode
3791                    ? type : type_for_mode (GET_MODE (add), unsignedp));
3792   tree result =  fold (build (PLUS_EXPR, type,
3793                               fold (build (MULT_EXPR, type,
3794                                            make_tree (type, x),
3795                                            make_tree (type, mult))),
3796                               make_tree (add_type, add)));
3797
3798   return expand_expr (result, target, VOIDmode, 0);
3799 }
3800 \f
3801 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
3802    and returning TARGET.
3803
3804    If TARGET is 0, a pseudo-register or constant is returned.  */
3805
3806 rtx
3807 expand_and (op0, op1, target)
3808      rtx op0, op1, target;
3809 {
3810   enum machine_mode mode = VOIDmode;
3811   rtx tem;
3812
3813   if (GET_MODE (op0) != VOIDmode)
3814     mode = GET_MODE (op0);
3815   else if (GET_MODE (op1) != VOIDmode)
3816     mode = GET_MODE (op1);
3817
3818   if (mode != VOIDmode)
3819     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
3820   else if (GET_CODE (op0) == CONST_INT && GET_CODE (op1) == CONST_INT)
3821     tem = GEN_INT (INTVAL (op0) & INTVAL (op1));
3822   else
3823     abort ();
3824
3825   if (target == 0)
3826     target = tem;
3827   else if (tem != target)
3828     emit_move_insn (target, tem);
3829   return target;
3830 }
3831 \f
3832 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
3833    and storing in TARGET.  Normally return TARGET.
3834    Return 0 if that cannot be done.
3835
3836    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
3837    it is VOIDmode, they cannot both be CONST_INT.
3838
3839    UNSIGNEDP is for the case where we have to widen the operands
3840    to perform the operation.  It says to use zero-extension.
3841
3842    NORMALIZEP is 1 if we should convert the result to be either zero
3843    or one one.  Normalize is -1 if we should convert the result to be
3844    either zero or -1.  If NORMALIZEP is zero, the result will be left
3845    "raw" out of the scc insn.  */
3846
3847 rtx
3848 emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
3849      rtx target;
3850      enum rtx_code code;
3851      rtx op0, op1;
3852      enum machine_mode mode;
3853      int unsignedp;
3854      int normalizep;
3855 {
3856   rtx subtarget;
3857   enum insn_code icode;
3858   enum machine_mode compare_mode;
3859   enum machine_mode target_mode = GET_MODE (target);
3860   rtx tem;
3861   rtx last = get_last_insn ();
3862   rtx pattern, comparison;
3863
3864   /* If one operand is constant, make it the second one.  Only do this
3865      if the other operand is not constant as well.  */
3866
3867   if ((CONSTANT_P (op0) && ! CONSTANT_P (op1))
3868       || (GET_CODE (op0) == CONST_INT && GET_CODE (op1) != CONST_INT))
3869     {
3870       tem = op0;
3871       op0 = op1;
3872       op1 = tem;
3873       code = swap_condition (code);
3874     }
3875
3876   if (mode == VOIDmode)
3877     mode = GET_MODE (op0);
3878
3879   /* For some comparisons with 1 and -1, we can convert this to
3880      comparisons with zero.  This will often produce more opportunities for
3881      store-flag insns. */
3882
3883   switch (code)
3884     {
3885     case LT:
3886       if (op1 == const1_rtx)
3887         op1 = const0_rtx, code = LE;
3888       break;
3889     case LE:
3890       if (op1 == constm1_rtx)
3891         op1 = const0_rtx, code = LT;
3892       break;
3893     case GE:
3894       if (op1 == const1_rtx)
3895         op1 = const0_rtx, code = GT;
3896       break;
3897     case GT:
3898       if (op1 == constm1_rtx)
3899         op1 = const0_rtx, code = GE;
3900       break;
3901     case GEU:
3902       if (op1 == const1_rtx)
3903         op1 = const0_rtx, code = NE;
3904       break;
3905     case LTU:
3906       if (op1 == const1_rtx)
3907         op1 = const0_rtx, code = EQ;
3908       break;
3909     }
3910
3911   /* From now on, we won't change CODE, so set ICODE now.  */
3912   icode = setcc_gen_code[(int) code];
3913
3914   /* If this is A < 0 or A >= 0, we can do this by taking the ones
3915      complement of A (for GE) and shifting the sign bit to the low bit.  */
3916   if (op1 == const0_rtx && (code == LT || code == GE)
3917       && GET_MODE_CLASS (mode) == MODE_INT
3918       && (normalizep || STORE_FLAG_VALUE == 1
3919           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
3920               && (STORE_FLAG_VALUE
3921                   == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
3922     {
3923       subtarget = target;
3924
3925       /* If the result is to be wider than OP0, it is best to convert it
3926          first.  If it is to be narrower, it is *incorrect* to convert it
3927          first.  */
3928       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
3929         {
3930           op0 = protect_from_queue (op0, 0);
3931           op0 = convert_modes (target_mode, mode, op0, 0);
3932           mode = target_mode;
3933         }
3934
3935       if (target_mode != mode)
3936         subtarget = 0;
3937
3938       if (code == GE)
3939         op0 = expand_unop (mode, one_cmpl_optab, op0, subtarget, 0);
3940
3941       if (normalizep || STORE_FLAG_VALUE == 1)
3942         /* If we are supposed to produce a 0/1 value, we want to do
3943            a logical shift from the sign bit to the low-order bit; for
3944            a -1/0 value, we do an arithmetic shift.  */
3945         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
3946                             size_int (GET_MODE_BITSIZE (mode) - 1),
3947                             subtarget, normalizep != -1);
3948
3949       if (mode != target_mode)
3950         op0 = convert_modes (target_mode, mode, op0, 0);
3951
3952       return op0;
3953     }
3954
3955   if (icode != CODE_FOR_nothing)
3956     {
3957       /* We think we may be able to do this with a scc insn.  Emit the
3958          comparison and then the scc insn.
3959
3960          compare_from_rtx may call emit_queue, which would be deleted below
3961          if the scc insn fails.  So call it ourselves before setting LAST.  */
3962
3963       emit_queue ();
3964       last = get_last_insn ();
3965
3966       comparison
3967         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
3968       if (GET_CODE (comparison) == CONST_INT)
3969         return (comparison == const0_rtx ? const0_rtx
3970                 : normalizep == 1 ? const1_rtx
3971                 : normalizep == -1 ? constm1_rtx
3972                 : const_true_rtx);
3973
3974       /* If the code of COMPARISON doesn't match CODE, something is
3975          wrong; we can no longer be sure that we have the operation.
3976          We could handle this case, but it should not happen.  */
3977
3978       if (GET_CODE (comparison) != code)
3979         abort ();
3980
3981       /* Get a reference to the target in the proper mode for this insn.  */
3982       compare_mode = insn_operand_mode[(int) icode][0];
3983       subtarget = target;
3984       if (preserve_subexpressions_p ()
3985           || ! (*insn_operand_predicate[(int) icode][0]) (subtarget, compare_mode))
3986         subtarget = gen_reg_rtx (compare_mode);
3987
3988       pattern = GEN_FCN (icode) (subtarget);
3989       if (pattern)
3990         {
3991           emit_insn (pattern);
3992
3993           /* If we are converting to a wider mode, first convert to
3994              TARGET_MODE, then normalize.  This produces better combining
3995              opportunities on machines that have a SIGN_EXTRACT when we are
3996              testing a single bit.  This mostly benefits the 68k.
3997
3998              If STORE_FLAG_VALUE does not have the sign bit set when
3999              interpreted in COMPARE_MODE, we can do this conversion as
4000              unsigned, which is usually more efficient.  */
4001           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4002             {
4003               convert_move (target, subtarget,
4004                             (GET_MODE_BITSIZE (compare_mode)
4005                              <= HOST_BITS_PER_WIDE_INT)
4006                             && 0 == (STORE_FLAG_VALUE
4007                                      & ((HOST_WIDE_INT) 1
4008                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4009               op0 = target;
4010               compare_mode = target_mode;
4011             }
4012           else
4013             op0 = subtarget;
4014
4015           /* If we want to keep subexpressions around, don't reuse our
4016              last target.  */
4017
4018           if (preserve_subexpressions_p ())
4019             subtarget = 0;
4020
4021           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4022              we don't have to do anything.  */
4023           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4024             ;
4025           else if (normalizep == - STORE_FLAG_VALUE)
4026             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4027
4028           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4029              makes it hard to use a value of just the sign bit due to
4030              ANSI integer constant typing rules.  */
4031           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4032                    && (STORE_FLAG_VALUE
4033                        & ((HOST_WIDE_INT) 1
4034                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4035             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4036                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4037                                 subtarget, normalizep == 1);
4038           else if (STORE_FLAG_VALUE & 1)
4039             {
4040               op0 = expand_and (op0, const1_rtx, subtarget);
4041               if (normalizep == -1)
4042                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4043             }
4044           else
4045             abort ();
4046
4047           /* If we were converting to a smaller mode, do the
4048              conversion now.  */
4049           if (target_mode != compare_mode)
4050             {
4051               convert_move (target, op0, 0);
4052               return target;
4053             }
4054           else
4055             return op0;
4056         }
4057     }
4058
4059   delete_insns_since (last);
4060
4061   /* If expensive optimizations, use different pseudo registers for each
4062      insn, instead of reusing the same pseudo.  This leads to better CSE,
4063      but slows down the compiler, since there are more pseudos */
4064   subtarget = (!flag_expensive_optimizations
4065                && (target_mode == mode)) ? target : NULL_RTX;
4066
4067   /* If we reached here, we can't do this with a scc insn.  However, there
4068      are some comparisons that can be done directly.  For example, if
4069      this is an equality comparison of integers, we can try to exclusive-or
4070      (or subtract) the two operands and use a recursive call to try the
4071      comparison with zero.  Don't do any of these cases if branches are
4072      very cheap.  */
4073
4074   if (BRANCH_COST > 0
4075       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4076       && op1 != const0_rtx)
4077     {
4078       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4079                           OPTAB_WIDEN);
4080
4081       if (tem == 0)
4082         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4083                             OPTAB_WIDEN);
4084       if (tem != 0)
4085         tem = emit_store_flag (target, code, tem, const0_rtx,
4086                                mode, unsignedp, normalizep);
4087       if (tem == 0)
4088         delete_insns_since (last);
4089       return tem;
4090     }
4091
4092   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4093      the constant zero.  Reject all other comparisons at this point.  Only
4094      do LE and GT if branches are expensive since they are expensive on
4095      2-operand machines.  */
4096
4097   if (BRANCH_COST == 0
4098       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4099       || (code != EQ && code != NE
4100           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4101     return 0;
4102
4103   /* See what we need to return.  We can only return a 1, -1, or the
4104      sign bit.  */
4105
4106   if (normalizep == 0)
4107     {
4108       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4109         normalizep = STORE_FLAG_VALUE;
4110
4111       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4112                && (STORE_FLAG_VALUE
4113                    == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4114         ;
4115       else
4116         return 0;
4117     }
4118
4119   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4120      do the necessary operation below.  */
4121
4122   tem = 0;
4123
4124   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4125      the sign bit set.  */
4126
4127   if (code == LE)
4128     {
4129       /* This is destructive, so SUBTARGET can't be OP0.  */
4130       if (rtx_equal_p (subtarget, op0))
4131         subtarget = 0;
4132
4133       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4134                           OPTAB_WIDEN);
4135       if (tem)
4136         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4137                             OPTAB_WIDEN);
4138     }
4139
4140   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4141      number of bits in the mode of OP0, minus one.  */
4142
4143   if (code == GT)
4144     {
4145       if (rtx_equal_p (subtarget, op0))
4146         subtarget = 0;
4147
4148       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4149                           size_int (GET_MODE_BITSIZE (mode) - 1),
4150                           subtarget, 0);
4151       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4152                           OPTAB_WIDEN);
4153     }
4154
4155   if (code == EQ || code == NE)
4156     {
4157       /* For EQ or NE, one way to do the comparison is to apply an operation
4158          that converts the operand into a positive number if it is non-zero
4159          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4160          for NE we negate.  This puts the result in the sign bit.  Then we
4161          normalize with a shift, if needed.
4162
4163          Two operations that can do the above actions are ABS and FFS, so try
4164          them.  If that doesn't work, and MODE is smaller than a full word,
4165          we can use zero-extension to the wider mode (an unsigned conversion)
4166          as the operation.  */
4167
4168       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4169         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4170       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4171         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4172       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4173         {
4174           op0 = protect_from_queue (op0, 0);
4175           tem = convert_modes (word_mode, mode, op0, 1);
4176           mode = word_mode;
4177         }
4178
4179       if (tem != 0)
4180         {
4181           if (code == EQ)
4182             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4183                                 0, OPTAB_WIDEN);
4184           else
4185             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4186         }
4187
4188       /* If we couldn't do it that way, for NE we can "or" the two's complement
4189          of the value with itself.  For EQ, we take the one's complement of
4190          that "or", which is an extra insn, so we only handle EQ if branches
4191          are expensive.  */
4192
4193       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4194         {
4195           if (rtx_equal_p (subtarget, op0))
4196             subtarget = 0;
4197
4198           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4199           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4200                               OPTAB_WIDEN);
4201
4202           if (tem && code == EQ)
4203             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4204         }
4205     }
4206
4207   if (tem && normalizep)
4208     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4209                         size_int (GET_MODE_BITSIZE (mode) - 1),
4210                         subtarget, normalizep == 1);
4211
4212   if (tem)
4213     {
4214       if (GET_MODE (tem) != target_mode)
4215         {
4216           convert_move (target, tem, 0);
4217           tem = target;
4218         }
4219       else if (!subtarget)
4220         {
4221           emit_move_insn (target, tem);
4222           tem = target;
4223         }
4224     }
4225   else
4226     delete_insns_since (last);
4227
4228   return tem;
4229 }
4230   emit_jump_insn ((*bcc_gen_fctn[(int) code]) (label));
4231   emit_move_insn (target, const1_rtx);
4232   emit_label (label);
4233
4234   return target;
4235 }