gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 88, 89, 92-5, 1996 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include "config.h"
  24 #include "rtl.h"
  25 #include "tree.h"
  26 #include "flags.h"
  27 #include "insn-flags.h"
  28 #include "insn-codes.h"
  29 #include "insn-config.h"
  30 #include "expr.h"
  31 #include "real.h"
  32 #include "recog.h"
  33
  34 static void store_fixed_bit_field       PROTO((rtx, int, int, int, rtx, int));
  35 static void store_split_bit_field       PROTO((rtx, int, int, rtx, int));
  36 static rtx extract_fixed_bit_field      PROTO((enum machine_mode, rtx, int,
  37                                                int, int, rtx, int, int));
  38 static rtx mask_rtx                     PROTO((enum machine_mode, int,
  39                                                int, int));
  40 static rtx lshift_value                 PROTO((enum machine_mode, rtx,
  41                                                int, int));
  42 static rtx extract_split_bit_field      PROTO((rtx, int, int, int, int));
  43
  44 #define CEIL(x,y) (((x) + (y) - 1) / (y))
  45
  46 /* Non-zero means divides or modulus operations are relatively cheap for
  47    powers of two, so don't use branches; emit the operation instead.
  48    Usually, this will mean that the MD file will emit non-branch
  49    sequences.  */
  50
  51 static int sdiv_pow2_cheap, smod_pow2_cheap;
  52
  53 #ifndef SLOW_UNALIGNED_ACCESS
  54 #define SLOW_UNALIGNED_ACCESS STRICT_ALIGNMENT
  55 #endif
  56
  57 /* For compilers that support multiple targets with different word sizes,
  58    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  59    is the H8/300(H) compiler.  */
  60
  61 #ifndef MAX_BITS_PER_WORD
  62 #define MAX_BITS_PER_WORD BITS_PER_WORD
  63 #endif
  64
  65 /* Cost of various pieces of RTL.  Note that some of these are indexed by shift count,
  66    and some by mode.  */
  67 static int add_cost, negate_cost, zero_cost;
  68 static int shift_cost[MAX_BITS_PER_WORD];
  69 static int shiftadd_cost[MAX_BITS_PER_WORD];
  70 static int shiftsub_cost[MAX_BITS_PER_WORD];
  71 static int mul_cost[NUM_MACHINE_MODES];
  72 static int div_cost[NUM_MACHINE_MODES];
  73 static int mul_widen_cost[NUM_MACHINE_MODES];
  74 static int mul_highpart_cost[NUM_MACHINE_MODES];
  75
  76 void
  77 init_expmed ()
  78 {
  79   char *free_point;
  80   /* This is "some random pseudo register" for purposes of calling recog
  81      to see what insns exist.  */
  82   rtx reg = gen_rtx (REG, word_mode, 10000);
  83   rtx shift_insn, shiftadd_insn, shiftsub_insn;
  84   int dummy;
  85   int m;
  86   enum machine_mode mode, wider_mode;
  87
  88   start_sequence ();
  89
  90   /* Since we are on the permanent obstack, we must be sure we save this
  91      spot AFTER we call start_sequence, since it will reuse the rtl it
  92      makes.  */
  93
  94   free_point = (char *) oballoc (0);
  95
  96   zero_cost = rtx_cost (const0_rtx, 0);
  97   add_cost = rtx_cost (gen_rtx (PLUS, word_mode, reg, reg), SET);
  98
  99   shift_insn = emit_insn (gen_rtx (SET, VOIDmode, reg,
 100                                    gen_rtx (ASHIFT, word_mode, reg,
 101                                             const0_rtx)));
 102
 103   shiftadd_insn = emit_insn (gen_rtx (SET, VOIDmode, reg,
 104                                       gen_rtx (PLUS, word_mode,
 105                                                gen_rtx (MULT, word_mode,
 106                                                         reg, const0_rtx),
 107                                                reg)));
 108
 109   shiftsub_insn = emit_insn (gen_rtx (SET, VOIDmode, reg,
 110                                       gen_rtx (MINUS, word_mode,
 111                                                gen_rtx (MULT, word_mode,
 112                                                          reg, const0_rtx),
 113                                                 reg)));
 114
 115   init_recog ();
 116
 117   shift_cost[0] = 0;
 118   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 119
 120   for (m = 1; m < BITS_PER_WORD; m++)
 121     {
 122       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 123
 124       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 125       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 126         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 127
 128       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1)
 129         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 130       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 131         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 132
 133       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1)
 134         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 135       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 136         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 137     }
 138
 139   negate_cost = rtx_cost (gen_rtx (NEG, word_mode, reg), SET);
 140
 141   sdiv_pow2_cheap
 142     = (rtx_cost (gen_rtx (DIV, word_mode, reg, GEN_INT (32)), SET)
 143        <= 2 * add_cost);
 144   smod_pow2_cheap
 145     = (rtx_cost (gen_rtx (MOD, word_mode, reg, GEN_INT (32)), SET)
 146        <= 2 * add_cost);
 147
 148   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 149        mode != VOIDmode;
 150        mode = GET_MODE_WIDER_MODE (mode))
 151     {
 152       reg = gen_rtx (REG, mode, 10000);
 153       div_cost[(int) mode] = rtx_cost (gen_rtx (UDIV, mode, reg, reg), SET);
 154       mul_cost[(int) mode] = rtx_cost (gen_rtx (MULT, mode, reg, reg), SET);
 155       wider_mode = GET_MODE_WIDER_MODE (mode);
 156       if (wider_mode != VOIDmode)
 157         {
 158           mul_widen_cost[(int) wider_mode]
 159             = rtx_cost (gen_rtx (MULT, wider_mode,
 160                                  gen_rtx (ZERO_EXTEND, wider_mode, reg),
 161                                  gen_rtx (ZERO_EXTEND, wider_mode, reg)),
 162                         SET);
 163           mul_highpart_cost[(int) mode]
 164             = rtx_cost (gen_rtx (TRUNCATE, mode,
 165                                  gen_rtx (LSHIFTRT, wider_mode,
 166                                           gen_rtx (MULT, wider_mode,
 167                                                    gen_rtx (ZERO_EXTEND, wider_mode, reg),
 168                                                    gen_rtx (ZERO_EXTEND, wider_mode, reg)),
 169                                           GEN_INT (GET_MODE_BITSIZE (mode)))),
 170                         SET);
 171         }
 172     }
 173
 174   /* Free the objects we just allocated.  */
 175   end_sequence ();
 176   obfree (free_point);
 177 }
 178
 179 /* Return an rtx representing minus the value of X.
 180    MODE is the intended mode of the result,
 181    useful if X is a CONST_INT.  */
 182
 183 rtx
 184 negate_rtx (mode, x)
 185      enum machine_mode mode;
 186      rtx x;
 187 {
 188   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 189
 190   if (result == 0)
 191     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 192
 193   return result;
 194 }
 195 \f
 196 /* Generate code to store value from rtx VALUE
 197    into a bit-field within structure STR_RTX
 198    containing BITSIZE bits starting at bit BITNUM.
 199    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 200    ALIGN is the alignment that STR_RTX is known to have, measured in bytes.
 201    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 202
 203 /* ??? Note that there are two different ideas here for how
 204    to determine the size to count bits within, for a register.
 205    One is BITS_PER_WORD, and the other is the size of operand 3
 206    of the insv pattern.  (The latter assumes that an n-bit machine
 207    will be able to insert bit fields up to n bits wide.)
 208    It isn't certain that either of these is right.
 209    extract_bit_field has the same quandary.  */
 210
 211 rtx
 212 store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
 213      rtx str_rtx;
 214      register int bitsize;
 215      int bitnum;
 216      enum machine_mode fieldmode;
 217      rtx value;
 218      int align;
 219      int total_size;
 220 {
 221   int unit = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 222   register int offset = bitnum / unit;
 223   register int bitpos = bitnum % unit;
 224   register rtx op0 = str_rtx;
 225
 226   if (GET_CODE (str_rtx) == MEM && ! MEM_IN_STRUCT_P (str_rtx))
 227     abort ();
 228
 229   /* Discount the part of the structure before the desired byte.
 230      We need to know how many bytes are safe to reference after it.  */
 231   if (total_size >= 0)
 232     total_size -= (bitpos / BIGGEST_ALIGNMENT
 233                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 234
 235   while (GET_CODE (op0) == SUBREG)
 236     {
 237       /* The following line once was done only if WORDS_BIG_ENDIAN,
 238          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 239          meaningful at a much higher level; when structures are copied
 240          between memory and regs, the higher-numbered regs
 241          always get higher addresses.  */
 242       offset += SUBREG_WORD (op0);
 243       /* We used to adjust BITPOS here, but now we do the whole adjustment
 244          right after the loop.  */
 245       op0 = SUBREG_REG (op0);
 246     }
 247
 248   /* If OP0 is a register, BITPOS must count within a word.
 249      But as we have it, it counts within whatever size OP0 now has.
 250      On a bigendian machine, these are not the same, so convert.  */
 251   if (BYTES_BIG_ENDIAN
 252       && GET_CODE (op0) != MEM
 253       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 254     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 255
 256   value = protect_from_queue (value, 0);
 257
 258   if (flag_force_mem)
 259     value = force_not_mem (value);
 260
 261   /* Note that the adjustment of BITPOS above has no effect on whether
 262      BITPOS is 0 in a REG bigger than a word.  */
 263   if (GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 264       && (GET_CODE (op0) != MEM
 265           || ! SLOW_UNALIGNED_ACCESS
 266           || (offset * BITS_PER_UNIT % bitsize == 0
 267               && align % GET_MODE_SIZE (fieldmode) == 0))
 268       && bitpos == 0 && bitsize == GET_MODE_BITSIZE (fieldmode))
 269     {
 270       /* Storing in a full-word or multi-word field in a register
 271          can be done with just SUBREG.  */
 272       if (GET_MODE (op0) != fieldmode)
 273         {
 274           if (GET_CODE (op0) == REG)
 275             op0 = gen_rtx (SUBREG, fieldmode, op0, offset);
 276           else
 277             op0 = change_address (op0, fieldmode,
 278                                   plus_constant (XEXP (op0, 0), offset));
 279         }
 280       emit_move_insn (op0, value);
 281       return value;
 282     }
 283
 284   /* Storing an lsb-aligned field in a register
 285      can be done with a movestrict instruction.  */
 286
 287   if (GET_CODE (op0) != MEM
 288       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 289       && bitsize == GET_MODE_BITSIZE (fieldmode)
 290       && (GET_MODE (op0) == fieldmode
 291           || (movstrict_optab->handlers[(int) fieldmode].insn_code
 292               != CODE_FOR_nothing)))
 293     {
 294       /* Get appropriate low part of the value being stored.  */
 295       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 296         value = gen_lowpart (fieldmode, value);
 297       else if (!(GET_CODE (value) == SYMBOL_REF
 298                  || GET_CODE (value) == LABEL_REF
 299                  || GET_CODE (value) == CONST))
 300         value = convert_to_mode (fieldmode, value, 0);
 301
 302       if (GET_MODE (op0) == fieldmode)
 303         emit_move_insn (op0, value);
 304       else
 305         {
 306           int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 307           if(! (*insn_operand_predicate[icode][1]) (value, fieldmode))
 308             value = copy_to_mode_reg (fieldmode, value);
 309           emit_insn (GEN_FCN (icode)
 310                    (gen_rtx (SUBREG, fieldmode, op0, offset), value));
 311         }
 312       return value;
 313     }
 314
 315   /* Handle fields bigger than a word.  */
 316
 317   if (bitsize > BITS_PER_WORD)
 318     {
 319       /* Here we transfer the words of the field
 320          in the order least significant first.
 321          This is because the most significant word is the one which may
 322          be less than full.
 323          However, only do that if the value is not BLKmode.  */
 324
 325       int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 326
 327       int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 328       int i;
 329
 330       /* This is the mode we must force value to, so that there will be enough
 331          subwords to extract.  Note that fieldmode will often (always?) be
 332          VOIDmode, because that is what store_field uses to indicate that this
 333          is a bit field, but passing VOIDmode to operand_subword_force will
 334          result in an abort.  */
 335       fieldmode = mode_for_size (nwords * BITS_PER_WORD, MODE_INT, 0);
 336
 337       for (i = 0; i < nwords; i++)
 338         {
 339           /* If I is 0, use the low-order word in both field and target;
 340              if I is 1, use the next to lowest word; and so on.  */
 341           int wordnum = (backwards ? nwords - i - 1 : i);
 342           int bit_offset = (backwards
 343                             ? MAX (bitsize - (i + 1) * BITS_PER_WORD, 0)
 344                             : i * BITS_PER_WORD);
 345           store_bit_field (op0, MIN (BITS_PER_WORD,
 346                                      bitsize - i * BITS_PER_WORD),
 347                            bitnum + bit_offset, word_mode,
 348                            operand_subword_force (value, wordnum,
 349                                                   (GET_MODE (value) == VOIDmode
 350                                                    ? fieldmode
 351                                                    : GET_MODE (value))),
 352                            align, total_size);
 353         }
 354       return value;
 355     }
 356
 357   /* From here on we can assume that the field to be stored in is
 358      a full-word (whatever type that is), since it is shorter than a word.  */
 359
 360   /* OFFSET is the number of words or bytes (UNIT says which)
 361      from STR_RTX to the first word or byte containing part of the field.  */
 362
 363   if (GET_CODE (op0) == REG)
 364     {
 365       if (offset != 0
 366           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 367         op0 = gen_rtx (SUBREG, TYPE_MODE (type_for_size (BITS_PER_WORD, 0)),
 368                        op0, offset);
 369       offset = 0;
 370     }
 371   else
 372     {
 373       op0 = protect_from_queue (op0, 1);
 374     }
 375
 376   /* If VALUE is a floating-point mode, access it as an integer of the
 377      corresponding size.  This can occur on a machine with 64 bit registers
 378      that uses SFmode for float.  This can also occur for unaligned float
 379      structure fields.  */
 380   if (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT)
 381     {
 382       if (GET_CODE (value) != REG)
 383         value = copy_to_reg (value);
 384       value = gen_rtx (SUBREG, word_mode, value, 0);
 385     }
 386
 387   /* Now OFFSET is nonzero only if OP0 is memory
 388      and is therefore always measured in bytes.  */
 389
 390 #ifdef HAVE_insv
 391   if (HAVE_insv
 392       && GET_MODE (value) != BLKmode
 393       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 394       /* Ensure insv's size is wide enough for this field.  */
 395       && (GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_insv][3])
 396           >= bitsize)
 397       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 398             && (bitsize + bitpos
 399                 > GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_insv][3]))))
 400     {
 401       int xbitpos = bitpos;
 402       rtx value1;
 403       rtx xop0 = op0;
 404       rtx last = get_last_insn ();
 405       rtx pat;
 406       enum machine_mode maxmode
 407         = insn_operand_mode[(int) CODE_FOR_insv][3];
 408
 409       int save_volatile_ok = volatile_ok;
 410       volatile_ok = 1;
 411
 412       /* If this machine's insv can only insert into a register, copy OP0
 413          into a register and save it back later.  */
 414       /* This used to check flag_force_mem, but that was a serious
 415          de-optimization now that flag_force_mem is enabled by -O2.  */
 416       if (GET_CODE (op0) == MEM
 417           && ! ((*insn_operand_predicate[(int) CODE_FOR_insv][0])
 418                 (op0, VOIDmode)))
 419         {
 420           rtx tempreg;
 421           enum machine_mode bestmode;
 422
 423           /* Get the mode to use for inserting into this field.  If OP0 is
 424              BLKmode, get the smallest mode consistent with the alignment. If
 425              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 426              mode. Otherwise, use the smallest mode containing the field.  */
 427
 428           if (GET_MODE (op0) == BLKmode
 429               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 430             bestmode
 431               = get_best_mode (bitsize, bitnum, align * BITS_PER_UNIT, maxmode,
 432                                MEM_VOLATILE_P (op0));
 433           else
 434             bestmode = GET_MODE (op0);
 435
 436           if (bestmode == VOIDmode
 437               || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
 438             goto insv_loses;
 439
 440           /* Adjust address to point to the containing unit of that mode.  */
 441           unit = GET_MODE_BITSIZE (bestmode);
 442           /* Compute offset as multiple of this unit, counting in bytes.  */
 443           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 444           bitpos = bitnum % unit;
 445           op0 = change_address (op0, bestmode,
 446                                 plus_constant (XEXP (op0, 0), offset));
 447
 448           /* Fetch that unit, store the bitfield in it, then store the unit.  */
 449           tempreg = copy_to_reg (op0);
 450           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 451                            align, total_size);
 452           emit_move_insn (op0, tempreg);
 453           return value;
 454         }
 455       volatile_ok = save_volatile_ok;
 456
 457       /* Add OFFSET into OP0's address.  */
 458       if (GET_CODE (xop0) == MEM)
 459         xop0 = change_address (xop0, byte_mode,
 460                                plus_constant (XEXP (xop0, 0), offset));
 461
 462       /* If xop0 is a register, we need it in MAXMODE
 463          to make it acceptable to the format of insv.  */
 464       if (GET_CODE (xop0) == SUBREG)
 465         /* We can't just change the mode, because this might clobber op0,
 466            and we will need the original value of op0 if insv fails.  */
 467         xop0 = gen_rtx (SUBREG, maxmode, SUBREG_REG (xop0), SUBREG_WORD (xop0));
 468       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 469         xop0 = gen_rtx (SUBREG, maxmode, xop0, 0);
 470
 471       /* On big-endian machines, we count bits from the most significant.
 472          If the bit field insn does not, we must invert.  */
 473
 474       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 475         xbitpos = unit - bitsize - xbitpos;
 476
 477       /* We have been counting XBITPOS within UNIT.
 478          Count instead within the size of the register.  */
 479       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 480         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 481
 482       unit = GET_MODE_BITSIZE (maxmode);
 483
 484       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 485       value1 = value;
 486       if (GET_MODE (value) != maxmode)
 487         {
 488           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 489             {
 490               /* Optimization: Don't bother really extending VALUE
 491                  if it has all the bits we will actually use.  However,
 492                  if we must narrow it, be sure we do it correctly.  */
 493
 494               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 495                 {
 496                   /* Avoid making subreg of a subreg, or of a mem.  */
 497                   if (GET_CODE (value1) != REG)
 498                 value1 = copy_to_reg (value1);
 499                   value1 = gen_rtx (SUBREG, maxmode, value1, 0);
 500                 }
 501               else
 502                 value1 = gen_lowpart (maxmode, value1);
 503             }
 504           else if (!CONSTANT_P (value))
 505             /* Parse phase is supposed to make VALUE's data type
 506                match that of the component reference, which is a type
 507                at least as wide as the field; so VALUE should have
 508                a mode that corresponds to that type.  */
 509             abort ();
 510         }
 511
 512       /* If this machine's insv insists on a register,
 513          get VALUE1 into a register.  */
 514       if (! ((*insn_operand_predicate[(int) CODE_FOR_insv][3])
 515              (value1, maxmode)))
 516         value1 = force_reg (maxmode, value1);
 517
 518       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 519       if (pat)
 520         emit_insn (pat);
 521       else
 522         {
 523           delete_insns_since (last);
 524           store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 525         }
 526     }
 527   else
 528     insv_loses:
 529 #endif
 530     /* Insv is not available; store using shifts and boolean ops.  */
 531     store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 532   return value;
 533 }
 534 \f
 535 /* Use shifts and boolean operations to store VALUE
 536    into a bit field of width BITSIZE
 537    in a memory location specified by OP0 except offset by OFFSET bytes.
 538      (OFFSET must be 0 if OP0 is a register.)
 539    The field starts at position BITPOS within the byte.
 540     (If OP0 is a register, it may be a full word or a narrower mode,
 541      but BITPOS still counts within a full word,
 542      which is significant on bigendian machines.)
 543    STRUCT_ALIGN is the alignment the structure is known to have (in bytes).
 544
 545    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 546
 547 static void
 548 store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align)
 549      register rtx op0;
 550      register int offset, bitsize, bitpos;
 551      register rtx value;
 552      int struct_align;
 553 {
 554   register enum machine_mode mode;
 555   int total_bits = BITS_PER_WORD;
 556   rtx subtarget, temp;
 557   int all_zero = 0;
 558   int all_one = 0;
 559
 560   /* There is a case not handled here:
 561      a structure with a known alignment of just a halfword
 562      and a field split across two aligned halfwords within the structure.
 563      Or likewise a structure with a known alignment of just a byte
 564      and a field split across two bytes.
 565      Such cases are not supposed to be able to occur.  */
 566
 567   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 568     {
 569       if (offset != 0)
 570         abort ();
 571       /* Special treatment for a bit field split across two registers.  */
 572       if (bitsize + bitpos > BITS_PER_WORD)
 573         {
 574           store_split_bit_field (op0, bitsize, bitpos,
 575                                  value, BITS_PER_WORD);
 576           return;
 577         }
 578     }
 579   else
 580     {
 581       /* Get the proper mode to use for this field.  We want a mode that
 582          includes the entire field.  If such a mode would be larger than
 583          a word, we won't be doing the extraction the normal way.  */
 584
 585       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 586                             struct_align * BITS_PER_UNIT, word_mode,
 587                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
 588
 589       if (mode == VOIDmode)
 590         {
 591           /* The only way this should occur is if the field spans word
 592              boundaries.  */
 593           store_split_bit_field (op0,
 594                                  bitsize, bitpos + offset * BITS_PER_UNIT,
 595                                  value, struct_align);
 596           return;
 597         }
 598
 599       total_bits = GET_MODE_BITSIZE (mode);
 600
 601       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 602          be be in the range 0 to total_bits-1, and put any excess bytes in
 603          OFFSET.  */
 604       if (bitpos >= total_bits)
 605         {
 606           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 607           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 608                      * BITS_PER_UNIT);
 609         }
 610
 611       /* Get ref to an aligned byte, halfword, or word containing the field.
 612          Adjust BITPOS to be position within a word,
 613          and OFFSET to be the offset of that word.
 614          Then alter OP0 to refer to that word.  */
 615       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 616       offset -= (offset % (total_bits / BITS_PER_UNIT));
 617       op0 = change_address (op0, mode,
 618                             plus_constant (XEXP (op0, 0), offset));
 619     }
 620
 621   mode = GET_MODE (op0);
 622
 623   /* Now MODE is either some integral mode for a MEM as OP0,
 624      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 625      The bit field is contained entirely within OP0.
 626      BITPOS is the starting bit number within OP0.
 627      (OP0's mode may actually be narrower than MODE.)  */
 628
 629   if (BYTES_BIG_ENDIAN)
 630       /* BITPOS is the distance between our msb
 631          and that of the containing datum.
 632          Convert it to the distance from the lsb.  */
 633       bitpos = total_bits - bitsize - bitpos;
 634
 635   /* Now BITPOS is always the distance between our lsb
 636      and that of OP0.  */
 637
 638   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 639      we must first convert its mode to MODE.  */
 640
 641   if (GET_CODE (value) == CONST_INT)
 642     {
 643       register HOST_WIDE_INT v = INTVAL (value);
 644
 645       if (bitsize < HOST_BITS_PER_WIDE_INT)
 646         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 647
 648       if (v == 0)
 649         all_zero = 1;
 650       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 651                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 652                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 653         all_one = 1;
 654
 655       value = lshift_value (mode, value, bitpos, bitsize);
 656     }
 657   else
 658     {
 659       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 660                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 661
 662       if (GET_MODE (value) != mode)
 663         {
 664           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 665               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 666             value = gen_lowpart (mode, value);
 667           else
 668             value = convert_to_mode (mode, value, 1);
 669         }
 670
 671       if (must_and)
 672         value = expand_binop (mode, and_optab, value,
 673                               mask_rtx (mode, 0, bitsize, 0),
 674                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 675       if (bitpos > 0)
 676         value = expand_shift (LSHIFT_EXPR, mode, value,
 677                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 678     }
 679
 680   /* Now clear the chosen bits in OP0,
 681      except that if VALUE is -1 we need not bother.  */
 682
 683   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 684
 685   if (! all_one)
 686     {
 687       temp = expand_binop (mode, and_optab, op0,
 688                            mask_rtx (mode, bitpos, bitsize, 1),
 689                            subtarget, 1, OPTAB_LIB_WIDEN);
 690       subtarget = temp;
 691     }
 692   else
 693     temp = op0;
 694
 695   /* Now logical-or VALUE into OP0, unless it is zero.  */
 696
 697   if (! all_zero)
 698     temp = expand_binop (mode, ior_optab, temp, value,
 699                          subtarget, 1, OPTAB_LIB_WIDEN);
 700   if (op0 != temp)
 701     emit_move_insn (op0, temp);
 702 }
 703 \f
 704 /* Store a bit field that is split across multiple accessible memory objects.
 705
 706    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 707    BITSIZE is the field width; BITPOS the position of its first bit
 708    (within the word).
 709    VALUE is the value to store.
 710    ALIGN is the known alignment of OP0, measured in bytes.
 711    This is also the size of the memory objects to be used.
 712
 713    This does not yet handle fields wider than BITS_PER_WORD.  */
 714
 715 static void
 716 store_split_bit_field (op0, bitsize, bitpos, value, align)
 717      rtx op0;
 718      int bitsize, bitpos;
 719      rtx value;
 720      int align;
 721 {
 722   int unit;
 723   int bitsdone = 0;
 724
 725   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 726      much at a time.  */
 727   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 728     unit = BITS_PER_WORD;
 729   else
 730     unit = MIN (align * BITS_PER_UNIT, BITS_PER_WORD);
 731
 732   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 733      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 734      that VALUE might be a floating-point constant.  */
 735   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 736     {
 737       rtx word = gen_lowpart_common (word_mode, value);
 738
 739       if (word && (value != word))
 740         value = word;
 741       else
 742         value = gen_lowpart_common (word_mode,
 743                                     force_reg (GET_MODE (value) != VOIDmode
 744                                                ? GET_MODE (value)
 745                                                : word_mode, value));
 746     }
 747
 748   while (bitsdone < bitsize)
 749     {
 750       int thissize;
 751       rtx part, word;
 752       int thispos;
 753       int offset;
 754
 755       offset = (bitpos + bitsdone) / unit;
 756       thispos = (bitpos + bitsdone) % unit;
 757
 758       /* THISSIZE must not overrun a word boundary.  Otherwise,
 759          store_fixed_bit_field will call us again, and we will mutually
 760          recurse forever.  */
 761       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 762       thissize = MIN (thissize, unit - thispos);
 763
 764       if (BYTES_BIG_ENDIAN)
 765         {
 766           int total_bits;
 767
 768           /* We must do an endian conversion exactly the same way as it is
 769              done in extract_bit_field, so that the two calls to
 770              extract_fixed_bit_field will have comparable arguments.  */
 771           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 772             total_bits = BITS_PER_WORD;
 773           else
 774             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 775
 776           /* Fetch successively less significant portions.  */
 777           if (GET_CODE (value) == CONST_INT)
 778             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 779                              >> (bitsize - bitsdone - thissize))
 780                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 781           else
 782             /* The args are chosen so that the last part includes the
 783                lsb.  Give extract_bit_field the value it needs (with
 784                endianness compensation) to fetch the piece we want.
 785
 786                ??? We have no idea what the alignment of VALUE is, so
 787                we have to use a guess.  */
 788             part
 789               = extract_fixed_bit_field
 790                 (word_mode, value, 0, thissize,
 791                  total_bits - bitsize + bitsdone, NULL_RTX, 1,
 792                  GET_MODE (value) == VOIDmode
 793                  ? UNITS_PER_WORD
 794                  : (GET_MODE (value) == BLKmode
 795                     ? 1
 796                     : GET_MODE_ALIGNMENT (GET_MODE (value)) / BITS_PER_UNIT));
 797         }
 798       else
 799         {
 800           /* Fetch successively more significant portions.  */
 801           if (GET_CODE (value) == CONST_INT)
 802             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 803                              >> bitsdone)
 804                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 805           else
 806             part
 807               = extract_fixed_bit_field
 808                 (word_mode, value, 0, thissize, bitsdone, NULL_RTX, 1,
 809                  GET_MODE (value) == VOIDmode
 810                  ? UNITS_PER_WORD
 811                  : (GET_MODE (value) == BLKmode
 812                     ? 1
 813                     : GET_MODE_ALIGNMENT (GET_MODE (value)) / BITS_PER_UNIT));
 814         }
 815
 816       /* If OP0 is a register, then handle OFFSET here.
 817
 818          When handling multiword bitfields, extract_bit_field may pass
 819          down a word_mode SUBREG of a larger REG for a bitfield that actually
 820          crosses a word boundary.  Thus, for a SUBREG, we must find
 821          the current word starting from the base register.  */
 822       if (GET_CODE (op0) == SUBREG)
 823         {
 824           word = operand_subword_force (SUBREG_REG (op0),
 825                                         SUBREG_WORD (op0) + offset,
 826                                         GET_MODE (SUBREG_REG (op0)));
 827           offset = 0;
 828         }
 829       else if (GET_CODE (op0) == REG)
 830         {
 831           word = operand_subword_force (op0, offset, GET_MODE (op0));
 832           offset = 0;
 833         }
 834       else
 835         word = op0;
 836
 837       /* OFFSET is in UNITs, and UNIT is in bits.
 838          store_fixed_bit_field wants offset in bytes.  */
 839       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT,
 840                              thissize, thispos, part, align);
 841       bitsdone += thissize;
 842     }
 843 }
 844 \f
 845 /* Generate code to extract a byte-field from STR_RTX
 846    containing BITSIZE bits, starting at BITNUM,
 847    and put it in TARGET if possible (if TARGET is nonzero).
 848    Regardless of TARGET, we return the rtx for where the value is placed.
 849    It may be a QUEUED.
 850
 851    STR_RTX is the structure containing the byte (a REG or MEM).
 852    UNSIGNEDP is nonzero if this is an unsigned bit field.
 853    MODE is the natural mode of the field value once extracted.
 854    TMODE is the mode the caller would like the value to have;
 855    but the value may be returned with type MODE instead.
 856
 857    ALIGN is the alignment that STR_RTX is known to have, measured in bytes.
 858    TOTAL_SIZE is the size in bytes of the containing structure,
 859    or -1 if varying.
 860
 861    If a TARGET is specified and we can store in it at no extra cost,
 862    we do so, and return TARGET.
 863    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
 864    if they are equally easy.  */
 865
 866 rtx
 867 extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 868                    target, mode, tmode, align, total_size)
 869      rtx str_rtx;
 870      register int bitsize;
 871      int bitnum;
 872      int unsignedp;
 873      rtx target;
 874      enum machine_mode mode, tmode;
 875      int align;
 876      int total_size;
 877 {
 878   int unit = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 879   register int offset = bitnum / unit;
 880   register int bitpos = bitnum % unit;
 881   register rtx op0 = str_rtx;
 882   rtx spec_target = target;
 883   rtx spec_target_subreg = 0;
 884
 885   /* Discount the part of the structure before the desired byte.
 886      We need to know how many bytes are safe to reference after it.  */
 887   if (total_size >= 0)
 888     total_size -= (bitpos / BIGGEST_ALIGNMENT
 889                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 890
 891   if (tmode == VOIDmode)
 892     tmode = mode;
 893   while (GET_CODE (op0) == SUBREG)
 894     {
 895       int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
 896       int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
 897
 898       offset += SUBREG_WORD (op0);
 899
 900       if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
 901         {
 902           bitpos += inner_size - outer_size;
 903           if (bitpos > unit)
 904             {
 905               offset += (bitpos / unit);
 906               bitpos %= unit;
 907             }
 908         }
 909
 910       op0 = SUBREG_REG (op0);
 911     }
 912
 913   /* ??? We currently assume TARGET is at least as big as BITSIZE.
 914      If that's wrong, the solution is to test for it and set TARGET to 0
 915      if needed.  */
 916
 917   /* If OP0 is a register, BITPOS must count within a word.
 918      But as we have it, it counts within whatever size OP0 now has.
 919      On a bigendian machine, these are not the same, so convert.  */
 920   if (BYTES_BIG_ENDIAN &&
 921       GET_CODE (op0) != MEM
 922       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 923     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 924
 925   /* Extracting a full-word or multi-word value
 926      from a structure in a register or aligned memory.
 927      This can be done with just SUBREG.
 928      So too extracting a subword value in
 929      the least significant part of the register.  */
 930
 931   if (((GET_CODE (op0) == REG
 932         && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
 933                                   GET_MODE_BITSIZE (GET_MODE (op0))))
 934        || (GET_CODE (op0) == MEM
 935            && (! SLOW_UNALIGNED_ACCESS
 936                || (offset * BITS_PER_UNIT % bitsize == 0
 937                    && align * BITS_PER_UNIT % bitsize == 0))))
 938       && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
 939            && bitpos % BITS_PER_WORD == 0)
 940           || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
 941               && (BYTES_BIG_ENDIAN
 942                   ? bitpos + bitsize == BITS_PER_WORD
 943                   : bitpos == 0))))
 944     {
 945       enum machine_mode mode1
 946         = mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0);
 947
 948       if (mode1 != GET_MODE (op0))
 949         {
 950           if (GET_CODE (op0) == REG)
 951             op0 = gen_rtx (SUBREG, mode1, op0, offset);
 952           else
 953             op0 = change_address (op0, mode1,
 954                                   plus_constant (XEXP (op0, 0), offset));
 955         }
 956       if (mode1 != mode)
 957         return convert_to_mode (tmode, op0, unsignedp);
 958       return op0;
 959     }
 960
 961   /* Handle fields bigger than a word.  */
 962
 963   if (bitsize > BITS_PER_WORD)
 964     {
 965       /* Here we transfer the words of the field
 966          in the order least significant first.
 967          This is because the most significant word is the one which may
 968          be less than full.  */
 969
 970       int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 971       int i;
 972
 973       if (target == 0 || GET_CODE (target) != REG)
 974         target = gen_reg_rtx (mode);
 975
 976       /* Indicate for flow that the entire target reg is being set.  */
 977       emit_insn (gen_rtx (CLOBBER, VOIDmode, target));
 978
 979       for (i = 0; i < nwords; i++)
 980         {
 981           /* If I is 0, use the low-order word in both field and target;
 982              if I is 1, use the next to lowest word; and so on.  */
 983           /* Word number in TARGET to use.  */
 984           int wordnum = (WORDS_BIG_ENDIAN
 985                          ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
 986                          : i);
 987           /* Offset from start of field in OP0.  */
 988           int bit_offset = (WORDS_BIG_ENDIAN
 989                             ? MAX (0, bitsize - (i + 1) * BITS_PER_WORD)
 990                             : i * BITS_PER_WORD);
 991           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
 992           rtx result_part
 993             = extract_bit_field (op0, MIN (BITS_PER_WORD,
 994                                            bitsize - i * BITS_PER_WORD),
 995                                  bitnum + bit_offset,
 996                                  1, target_part, mode, word_mode,
 997                                  align, total_size);
 998
 999           if (target_part == 0)
1000             abort ();
1001
1002           if (result_part != target_part)
1003             emit_move_insn (target_part, result_part);
1004         }
1005
1006       if (unsignedp)
1007         {
1008           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1009              need to be zero'd out.  */
1010           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1011             {
1012               int i,total_words;
1013
1014               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1015               for (i = nwords; i < total_words; i++)
1016                 {
1017                   int wordnum = WORDS_BIG_ENDIAN ? total_words - i - 1 : i;
1018                   rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1019                   emit_move_insn (target_part, const0_rtx);
1020                 }
1021             }
1022           return target;
1023         }
1024
1025       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1026       target = expand_shift (LSHIFT_EXPR, mode, target,
1027                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1028                              NULL_RTX, 0);
1029       return expand_shift (RSHIFT_EXPR, mode, target,
1030                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1031                            NULL_RTX, 0);
1032     }
1033
1034   /* From here on we know the desired field is smaller than a word
1035      so we can assume it is an integer.  So we can safely extract it as one
1036      size of integer, if necessary, and then truncate or extend
1037      to the size that is wanted.  */
1038
1039   /* OFFSET is the number of words or bytes (UNIT says which)
1040      from STR_RTX to the first word or byte containing part of the field.  */
1041
1042   if (GET_CODE (op0) == REG)
1043     {
1044       if (offset != 0
1045           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1046         op0 = gen_rtx (SUBREG, TYPE_MODE (type_for_size (BITS_PER_WORD, 0)),
1047                        op0, offset);
1048       offset = 0;
1049     }
1050   else
1051     {
1052       op0 = protect_from_queue (str_rtx, 1);
1053     }
1054
1055   /* Now OFFSET is nonzero only for memory operands.  */
1056
1057   if (unsignedp)
1058     {
1059 #ifdef HAVE_extzv
1060       if (HAVE_extzv
1061           && (GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extzv][0])
1062               >= bitsize)
1063           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1064                 && (bitsize + bitpos
1065                     > GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extzv][0]))))
1066         {
1067           int xbitpos = bitpos, xoffset = offset;
1068           rtx bitsize_rtx, bitpos_rtx;
1069           rtx last = get_last_insn();
1070           rtx xop0 = op0;
1071           rtx xtarget = target;
1072           rtx xspec_target = spec_target;
1073           rtx xspec_target_subreg = spec_target_subreg;
1074           rtx pat;
1075           enum machine_mode maxmode
1076             = insn_operand_mode[(int) CODE_FOR_extzv][0];
1077
1078           if (GET_CODE (xop0) == MEM)
1079             {
1080               int save_volatile_ok = volatile_ok;
1081               volatile_ok = 1;
1082
1083               /* Is the memory operand acceptable?  */
1084               if (flag_force_mem
1085                   || ! ((*insn_operand_predicate[(int) CODE_FOR_extzv][1])
1086                         (xop0, GET_MODE (xop0))))
1087                 {
1088                   /* No, load into a reg and extract from there.  */
1089                   enum machine_mode bestmode;
1090
1091                   /* Get the mode to use for inserting into this field.  If
1092                      OP0 is BLKmode, get the smallest mode consistent with the
1093                      alignment. If OP0 is a non-BLKmode object that is no
1094                      wider than MAXMODE, use its mode. Otherwise, use the
1095                      smallest mode containing the field.  */
1096
1097                   if (GET_MODE (xop0) == BLKmode
1098                       || (GET_MODE_SIZE (GET_MODE (op0))
1099                           > GET_MODE_SIZE (maxmode)))
1100                     bestmode = get_best_mode (bitsize, bitnum,
1101                                               align * BITS_PER_UNIT, maxmode,
1102                                               MEM_VOLATILE_P (xop0));
1103                   else
1104                     bestmode = GET_MODE (xop0);
1105
1106                   if (bestmode == VOIDmode
1107                       || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
1108                     goto extzv_loses;
1109
1110                   /* Compute offset as multiple of this unit,
1111                      counting in bytes.  */
1112                   unit = GET_MODE_BITSIZE (bestmode);
1113                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1114                   xbitpos = bitnum % unit;
1115                   xop0 = change_address (xop0, bestmode,
1116                                          plus_constant (XEXP (xop0, 0),
1117                                                         xoffset));
1118                   /* Fetch it to a register in that size.  */
1119                   xop0 = force_reg (bestmode, xop0);
1120
1121                   /* XBITPOS counts within UNIT, which is what is expected.  */
1122                 }
1123               else
1124                 /* Get ref to first byte containing part of the field.  */
1125                 xop0 = change_address (xop0, byte_mode,
1126                                        plus_constant (XEXP (xop0, 0), xoffset));
1127
1128               volatile_ok = save_volatile_ok;
1129             }
1130
1131           /* If op0 is a register, we need it in MAXMODE (which is usually
1132              SImode). to make it acceptable to the format of extzv.  */
1133           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1134             abort ();
1135           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1136             xop0 = gen_rtx (SUBREG, maxmode, xop0, 0);
1137
1138           /* On big-endian machines, we count bits from the most significant.
1139              If the bit field insn does not, we must invert.  */
1140           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1141             xbitpos = unit - bitsize - xbitpos;
1142
1143           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1144           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1145             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1146
1147           unit = GET_MODE_BITSIZE (maxmode);
1148
1149           if (xtarget == 0
1150               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1151             xtarget = xspec_target = gen_reg_rtx (tmode);
1152
1153           if (GET_MODE (xtarget) != maxmode)
1154             {
1155               if (GET_CODE (xtarget) == REG)
1156                 {
1157                   int wider = (GET_MODE_SIZE (maxmode)
1158                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1159                   xtarget = gen_lowpart (maxmode, xtarget);
1160                   if (wider)
1161                     xspec_target_subreg = xtarget;
1162                 }
1163               else
1164                 xtarget = gen_reg_rtx (maxmode);
1165             }
1166
1167           /* If this machine's extzv insists on a register target,
1168              make sure we have one.  */
1169           if (! ((*insn_operand_predicate[(int) CODE_FOR_extzv][0])
1170                  (xtarget, maxmode)))
1171             xtarget = gen_reg_rtx (maxmode);
1172
1173           bitsize_rtx = GEN_INT (bitsize);
1174           bitpos_rtx = GEN_INT (xbitpos);
1175
1176           pat = gen_extzv (protect_from_queue (xtarget, 1),
1177                            xop0, bitsize_rtx, bitpos_rtx);
1178           if (pat)
1179             {
1180               emit_insn (pat);
1181               target = xtarget;
1182               spec_target = xspec_target;
1183               spec_target_subreg = xspec_target_subreg;
1184             }
1185           else
1186             {
1187               delete_insns_since (last);
1188               target = extract_fixed_bit_field (tmode, op0, offset, bitsize,
1189                                                 bitpos, target, 1, align);
1190             }
1191         }
1192       else
1193         extzv_loses:
1194 #endif
1195         target = extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1196                                           target, 1, align);
1197     }
1198   else
1199     {
1200 #ifdef HAVE_extv
1201       if (HAVE_extv
1202           && (GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extv][0])
1203               >= bitsize)
1204           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1205                 && (bitsize + bitpos
1206                     > GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extv][0]))))
1207         {
1208           int xbitpos = bitpos, xoffset = offset;
1209           rtx bitsize_rtx, bitpos_rtx;
1210           rtx last = get_last_insn();
1211           rtx xop0 = op0, xtarget = target;
1212           rtx xspec_target = spec_target;
1213           rtx xspec_target_subreg = spec_target_subreg;
1214           rtx pat;
1215           enum machine_mode maxmode
1216             = insn_operand_mode[(int) CODE_FOR_extv][0];
1217
1218           if (GET_CODE (xop0) == MEM)
1219             {
1220               /* Is the memory operand acceptable?  */
1221               if (! ((*insn_operand_predicate[(int) CODE_FOR_extv][1])
1222                      (xop0, GET_MODE (xop0))))
1223                 {
1224                   /* No, load into a reg and extract from there.  */
1225                   enum machine_mode bestmode;
1226
1227                   /* Get the mode to use for inserting into this field.  If
1228                      OP0 is BLKmode, get the smallest mode consistent with the
1229                      alignment. If OP0 is a non-BLKmode object that is no
1230                      wider than MAXMODE, use its mode. Otherwise, use the
1231                      smallest mode containing the field.  */
1232
1233                   if (GET_MODE (xop0) == BLKmode
1234                       || (GET_MODE_SIZE (GET_MODE (op0))
1235                           > GET_MODE_SIZE (maxmode)))
1236                     bestmode = get_best_mode (bitsize, bitnum,
1237                                               align * BITS_PER_UNIT, maxmode,
1238                                               MEM_VOLATILE_P (xop0));
1239                   else
1240                     bestmode = GET_MODE (xop0);
1241
1242                   if (bestmode == VOIDmode
1243                       || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
1244                     goto extv_loses;
1245
1246                   /* Compute offset as multiple of this unit,
1247                      counting in bytes.  */
1248                   unit = GET_MODE_BITSIZE (bestmode);
1249                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1250                   xbitpos = bitnum % unit;
1251                   xop0 = change_address (xop0, bestmode,
1252                                          plus_constant (XEXP (xop0, 0),
1253                                                         xoffset));
1254                   /* Fetch it to a register in that size.  */
1255                   xop0 = force_reg (bestmode, xop0);
1256
1257                   /* XBITPOS counts within UNIT, which is what is expected.  */
1258                 }
1259               else
1260                 /* Get ref to first byte containing part of the field.  */
1261                 xop0 = change_address (xop0, byte_mode,
1262                                        plus_constant (XEXP (xop0, 0), xoffset));
1263             }
1264
1265           /* If op0 is a register, we need it in MAXMODE (which is usually
1266              SImode) to make it acceptable to the format of extv.  */
1267           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1268             abort ();
1269           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1270             xop0 = gen_rtx (SUBREG, maxmode, xop0, 0);
1271
1272           /* On big-endian machines, we count bits from the most significant.
1273              If the bit field insn does not, we must invert.  */
1274           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1275             xbitpos = unit - bitsize - xbitpos;
1276
1277           /* XBITPOS counts within a size of UNIT.
1278              Adjust to count within a size of MAXMODE.  */
1279           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1280             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1281
1282           unit = GET_MODE_BITSIZE (maxmode);
1283
1284           if (xtarget == 0
1285               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1286             xtarget = xspec_target = gen_reg_rtx (tmode);
1287
1288           if (GET_MODE (xtarget) != maxmode)
1289             {
1290               if (GET_CODE (xtarget) == REG)
1291                 {
1292                   int wider = (GET_MODE_SIZE (maxmode)
1293                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1294                   xtarget = gen_lowpart (maxmode, xtarget);
1295                   if (wider)
1296                     xspec_target_subreg = xtarget;
1297                 }
1298               else
1299                 xtarget = gen_reg_rtx (maxmode);
1300             }
1301
1302           /* If this machine's extv insists on a register target,
1303              make sure we have one.  */
1304           if (! ((*insn_operand_predicate[(int) CODE_FOR_extv][0])
1305                  (xtarget, maxmode)))
1306             xtarget = gen_reg_rtx (maxmode);
1307
1308           bitsize_rtx = GEN_INT (bitsize);
1309           bitpos_rtx = GEN_INT (xbitpos);
1310
1311           pat = gen_extv (protect_from_queue (xtarget, 1),
1312                           xop0, bitsize_rtx, bitpos_rtx);
1313           if (pat)
1314             {
1315               emit_insn (pat);
1316               target = xtarget;
1317               spec_target = xspec_target;
1318               spec_target_subreg = xspec_target_subreg;
1319             }
1320           else
1321             {
1322               delete_insns_since (last);
1323               target = extract_fixed_bit_field (tmode, op0, offset, bitsize,
1324                                                 bitpos, target, 0, align);
1325             }
1326         }
1327       else
1328         extv_loses:
1329 #endif
1330         target = extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1331                                           target, 0, align);
1332     }
1333   if (target == spec_target)
1334     return target;
1335   if (target == spec_target_subreg)
1336     return spec_target;
1337   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1338     {
1339       /* If the target mode is floating-point, first convert to the
1340          integer mode of that size and then access it as a floating-point
1341          value via a SUBREG.  */
1342       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1343         {
1344           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1345                                                    MODE_INT, 0),
1346                                     target, unsignedp);
1347           if (GET_CODE (target) != REG)
1348             target = copy_to_reg (target);
1349           return gen_rtx (SUBREG, tmode, target, 0);
1350         }
1351       else
1352         return convert_to_mode (tmode, target, unsignedp);
1353     }
1354   return target;
1355 }
1356 \f
1357 /* Extract a bit field using shifts and boolean operations
1358    Returns an rtx to represent the value.
1359    OP0 addresses a register (word) or memory (byte).
1360    BITPOS says which bit within the word or byte the bit field starts in.
1361    OFFSET says how many bytes farther the bit field starts;
1362     it is 0 if OP0 is a register.
1363    BITSIZE says how many bits long the bit field is.
1364     (If OP0 is a register, it may be narrower than a full word,
1365      but BITPOS still counts within a full word,
1366      which is significant on bigendian machines.)
1367
1368    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1369    If TARGET is nonzero, attempts to store the value there
1370    and return TARGET, but this is not guaranteed.
1371    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.
1372
1373    ALIGN is the alignment that STR_RTX is known to have, measured in bytes.  */
1374
1375 static rtx
1376 extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1377                          target, unsignedp, align)
1378      enum machine_mode tmode;
1379      register rtx op0, target;
1380      register int offset, bitsize, bitpos;
1381      int unsignedp;
1382      int align;
1383 {
1384   int total_bits = BITS_PER_WORD;
1385   enum machine_mode mode;
1386
1387   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1388     {
1389       /* Special treatment for a bit field split across two registers.  */
1390       if (bitsize + bitpos > BITS_PER_WORD)
1391         return extract_split_bit_field (op0, bitsize, bitpos,
1392                                         unsignedp, align);
1393     }
1394   else
1395     {
1396       /* Get the proper mode to use for this field.  We want a mode that
1397          includes the entire field.  If such a mode would be larger than
1398          a word, we won't be doing the extraction the normal way.  */
1399
1400       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1401                             align * BITS_PER_UNIT, word_mode,
1402                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
1403
1404       if (mode == VOIDmode)
1405         /* The only way this should occur is if the field spans word
1406            boundaries.  */
1407         return extract_split_bit_field (op0, bitsize,
1408                                         bitpos + offset * BITS_PER_UNIT,
1409                                         unsignedp, align);
1410
1411       total_bits = GET_MODE_BITSIZE (mode);
1412
1413       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1414          be be in the range 0 to total_bits-1, and put any excess bytes in
1415          OFFSET.  */
1416       if (bitpos >= total_bits)
1417         {
1418           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1419           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1420                      * BITS_PER_UNIT);
1421         }
1422
1423       /* Get ref to an aligned byte, halfword, or word containing the field.
1424          Adjust BITPOS to be position within a word,
1425          and OFFSET to be the offset of that word.
1426          Then alter OP0 to refer to that word.  */
1427       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1428       offset -= (offset % (total_bits / BITS_PER_UNIT));
1429       op0 = change_address (op0, mode,
1430                             plus_constant (XEXP (op0, 0), offset));
1431     }
1432
1433   mode = GET_MODE (op0);
1434
1435   if (BYTES_BIG_ENDIAN)
1436     {
1437       /* BITPOS is the distance between our msb and that of OP0.
1438          Convert it to the distance from the lsb.  */
1439
1440       bitpos = total_bits - bitsize - bitpos;
1441     }
1442
1443   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1444      We have reduced the big-endian case to the little-endian case.  */
1445
1446   if (unsignedp)
1447     {
1448       if (bitpos)
1449         {
1450           /* If the field does not already start at the lsb,
1451              shift it so it does.  */
1452           tree amount = build_int_2 (bitpos, 0);
1453           /* Maybe propagate the target for the shift.  */
1454           /* But not if we will return it--could confuse integrate.c.  */
1455           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1456                            && !REG_FUNCTION_VALUE_P (target)
1457                            ? target : 0);
1458           if (tmode != mode) subtarget = 0;
1459           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1460         }
1461       /* Convert the value to the desired mode.  */
1462       if (mode != tmode)
1463         op0 = convert_to_mode (tmode, op0, 1);
1464
1465       /* Unless the msb of the field used to be the msb when we shifted,
1466          mask out the upper bits.  */
1467
1468       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize
1469 #if 0
1470 #ifdef SLOW_ZERO_EXTEND
1471           /* Always generate an `and' if
1472              we just zero-extended op0 and SLOW_ZERO_EXTEND, since it
1473              will combine fruitfully with the zero-extend.  */
1474           || tmode != mode
1475 #endif
1476 #endif
1477           )
1478         return expand_binop (GET_MODE (op0), and_optab, op0,
1479                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1480                              target, 1, OPTAB_LIB_WIDEN);
1481       return op0;
1482     }
1483
1484   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1485      then arithmetic-shift its lsb to the lsb of the word.  */
1486   op0 = force_reg (mode, op0);
1487   if (mode != tmode)
1488     target = 0;
1489
1490   /* Find the narrowest integer mode that contains the field.  */
1491
1492   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1493        mode = GET_MODE_WIDER_MODE (mode))
1494     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1495       {
1496         op0 = convert_to_mode (mode, op0, 0);
1497         break;
1498       }
1499
1500   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1501     {
1502       tree amount = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1503       /* Maybe propagate the target for the shift.  */
1504       /* But not if we will return the result--could confuse integrate.c.  */
1505       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1506                        && ! REG_FUNCTION_VALUE_P (target)
1507                        ? target : 0);
1508       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1509     }
1510
1511   return expand_shift (RSHIFT_EXPR, mode, op0,
1512                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1513                        target, 0);
1514 }
1515 \f
1516 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1517    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1518    complement of that if COMPLEMENT.  The mask is truncated if
1519    necessary to the width of mode MODE.  The mask is zero-extended if
1520    BITSIZE+BITPOS is too small for MODE.  */
1521
1522 static rtx
1523 mask_rtx (mode, bitpos, bitsize, complement)
1524      enum machine_mode mode;
1525      int bitpos, bitsize, complement;
1526 {
1527   HOST_WIDE_INT masklow, maskhigh;
1528
1529   if (bitpos < HOST_BITS_PER_WIDE_INT)
1530     masklow = (HOST_WIDE_INT) -1 << bitpos;
1531   else
1532     masklow = 0;
1533
1534   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1535     masklow &= ((unsigned HOST_WIDE_INT) -1
1536                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1537
1538   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1539     maskhigh = -1;
1540   else
1541     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1542
1543   if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1544     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1545                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1546   else
1547     maskhigh = 0;
1548
1549   if (complement)
1550     {
1551       maskhigh = ~maskhigh;
1552       masklow = ~masklow;
1553     }
1554
1555   return immed_double_const (masklow, maskhigh, mode);
1556 }
1557
1558 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1559    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1560
1561 static rtx
1562 lshift_value (mode, value, bitpos, bitsize)
1563      enum machine_mode mode;
1564      rtx value;
1565      int bitpos, bitsize;
1566 {
1567   unsigned HOST_WIDE_INT v = INTVAL (value);
1568   HOST_WIDE_INT low, high;
1569
1570   if (bitsize < HOST_BITS_PER_WIDE_INT)
1571     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1572
1573   if (bitpos < HOST_BITS_PER_WIDE_INT)
1574     {
1575       low = v << bitpos;
1576       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1577     }
1578   else
1579     {
1580       low = 0;
1581       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1582     }
1583
1584   return immed_double_const (low, high, mode);
1585 }
1586 \f
1587 /* Extract a bit field that is split across two words
1588    and return an RTX for the result.
1589
1590    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1591    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1592    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
1593
1594    ALIGN is the known alignment of OP0, measured in bytes.
1595    This is also the size of the memory objects to be used.  */
1596
1597 static rtx
1598 extract_split_bit_field (op0, bitsize, bitpos, unsignedp, align)
1599      rtx op0;
1600      int bitsize, bitpos, unsignedp, align;
1601 {
1602   int unit;
1603   int bitsdone = 0;
1604   rtx result;
1605   int first = 1;
1606
1607   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1608      much at a time.  */
1609   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1610     unit = BITS_PER_WORD;
1611   else
1612     unit = MIN (align * BITS_PER_UNIT, BITS_PER_WORD);
1613
1614   while (bitsdone < bitsize)
1615     {
1616       int thissize;
1617       rtx part, word;
1618       int thispos;
1619       int offset;
1620
1621       offset = (bitpos + bitsdone) / unit;
1622       thispos = (bitpos + bitsdone) % unit;
1623
1624       /* THISSIZE must not overrun a word boundary.  Otherwise,
1625          extract_fixed_bit_field will call us again, and we will mutually
1626          recurse forever.  */
1627       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1628       thissize = MIN (thissize, unit - thispos);
1629
1630       /* If OP0 is a register, then handle OFFSET here.
1631
1632          When handling multiword bitfields, extract_bit_field may pass
1633          down a word_mode SUBREG of a larger REG for a bitfield that actually
1634          crosses a word boundary.  Thus, for a SUBREG, we must find
1635          the current word starting from the base register.  */
1636       if (GET_CODE (op0) == SUBREG)
1637         {
1638           word = operand_subword_force (SUBREG_REG (op0),
1639                                         SUBREG_WORD (op0) + offset,
1640                                         GET_MODE (SUBREG_REG (op0)));
1641           offset = 0;
1642         }
1643       else if (GET_CODE (op0) == REG)
1644         {
1645           word = operand_subword_force (op0, offset, GET_MODE (op0));
1646           offset = 0;
1647         }
1648       else
1649         word = op0;
1650
1651       /* Extract the parts in bit-counting order,
1652          whose meaning is determined by BYTES_PER_UNIT.
1653          OFFSET is in UNITs, and UNIT is in bits.
1654          extract_fixed_bit_field wants offset in bytes.  */
1655       part = extract_fixed_bit_field (word_mode, word,
1656                                       offset * unit / BITS_PER_UNIT,
1657                                       thissize, thispos, 0, 1, align);
1658       bitsdone += thissize;
1659
1660       /* Shift this part into place for the result.  */
1661       if (BYTES_BIG_ENDIAN)
1662         {
1663           if (bitsize != bitsdone)
1664             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1665                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1666         }
1667       else
1668         {
1669           if (bitsdone != thissize)
1670             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1671                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1672         }
1673
1674       if (first)
1675         result = part;
1676       else
1677         /* Combine the parts with bitwise or.  This works
1678            because we extracted each part as an unsigned bit field.  */
1679         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1680                                OPTAB_LIB_WIDEN);
1681
1682       first = 0;
1683     }
1684
1685   /* Unsigned bit field: we are done.  */
1686   if (unsignedp)
1687     return result;
1688   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1689   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1690                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1691                          NULL_RTX, 0);
1692   return expand_shift (RSHIFT_EXPR, word_mode, result,
1693                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1694 }
1695 \f
1696 /* Add INC into TARGET.  */
1697
1698 void
1699 expand_inc (target, inc)
1700      rtx target, inc;
1701 {
1702   rtx value = expand_binop (GET_MODE (target), add_optab,
1703                             target, inc,
1704                             target, 0, OPTAB_LIB_WIDEN);
1705   if (value != target)
1706     emit_move_insn (target, value);
1707 }
1708
1709 /* Subtract DEC from TARGET.  */
1710
1711 void
1712 expand_dec (target, dec)
1713      rtx target, dec;
1714 {
1715   rtx value = expand_binop (GET_MODE (target), sub_optab,
1716                             target, dec,
1717                             target, 0, OPTAB_LIB_WIDEN);
1718   if (value != target)
1719     emit_move_insn (target, value);
1720 }
1721 \f
1722 /* Output a shift instruction for expression code CODE,
1723    with SHIFTED being the rtx for the value to shift,
1724    and AMOUNT the tree for the amount to shift by.
1725    Store the result in the rtx TARGET, if that is convenient.
1726    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1727    Return the rtx for where the value is.  */
1728
1729 rtx
1730 expand_shift (code, mode, shifted, amount, target, unsignedp)
1731      enum tree_code code;
1732      register enum machine_mode mode;
1733      rtx shifted;
1734      tree amount;
1735      register rtx target;
1736      int unsignedp;
1737 {
1738   register rtx op1, temp = 0;
1739   register int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1740   register int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1741   int try;
1742
1743   /* Previously detected shift-counts computed by NEGATE_EXPR
1744      and shifted in the other direction; but that does not work
1745      on all machines.  */
1746
1747   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1748
1749 #ifdef SHIFT_COUNT_TRUNCATED
1750   if (SHIFT_COUNT_TRUNCATED
1751       && GET_CODE (op1) == CONST_INT
1752       && (unsigned HOST_WIDE_INT) INTVAL (op1) >= GET_MODE_BITSIZE (mode))
1753     op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1754                    % GET_MODE_BITSIZE (mode));
1755 #endif
1756
1757   if (op1 == const0_rtx)
1758     return shifted;
1759
1760   for (try = 0; temp == 0 && try < 3; try++)
1761     {
1762       enum optab_methods methods;
1763
1764       if (try == 0)
1765         methods = OPTAB_DIRECT;
1766       else if (try == 1)
1767         methods = OPTAB_WIDEN;
1768       else
1769         methods = OPTAB_LIB_WIDEN;
1770
1771       if (rotate)
1772         {
1773           /* Widening does not work for rotation.  */
1774           if (methods == OPTAB_WIDEN)
1775             continue;
1776           else if (methods == OPTAB_LIB_WIDEN)
1777             {
1778               /* If we have been unable to open-code this by a rotation,
1779                  do it as the IOR of two shifts.  I.e., to rotate A
1780                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1781                  where C is the bitsize of A.
1782
1783                  It is theoretically possible that the target machine might
1784                  not be able to perform either shift and hence we would
1785                  be making two libcalls rather than just the one for the
1786                  shift (similarly if IOR could not be done).  We will allow
1787                  this extremely unlikely lossage to avoid complicating the
1788                  code below.  */
1789
1790               rtx subtarget = target == shifted ? 0 : target;
1791               rtx temp1;
1792               tree type = TREE_TYPE (amount);
1793               tree new_amount = make_tree (type, op1);
1794               tree other_amount
1795                 = fold (build (MINUS_EXPR, type,
1796                                convert (type,
1797                                         build_int_2 (GET_MODE_BITSIZE (mode),
1798                                                      0)),
1799                                amount));
1800
1801               shifted = force_reg (mode, shifted);
1802
1803               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1804                                    mode, shifted, new_amount, subtarget, 1);
1805               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1806                                     mode, shifted, other_amount, 0, 1);
1807               return expand_binop (mode, ior_optab, temp, temp1, target,
1808                                    unsignedp, methods);
1809             }
1810
1811           temp = expand_binop (mode,
1812                                left ? rotl_optab : rotr_optab,
1813                                shifted, op1, target, unsignedp, methods);
1814
1815           /* If we don't have the rotate, but we are rotating by a constant
1816              that is in range, try a rotate in the opposite direction.  */
1817
1818           if (temp == 0 && GET_CODE (op1) == CONST_INT
1819               && INTVAL (op1) > 0 && INTVAL (op1) < GET_MODE_BITSIZE (mode))
1820             temp = expand_binop (mode,
1821                                  left ? rotr_optab : rotl_optab,
1822                                  shifted,
1823                                  GEN_INT (GET_MODE_BITSIZE (mode)
1824                                           - INTVAL (op1)),
1825                                  target, unsignedp, methods);
1826         }
1827       else if (unsignedp)
1828         temp = expand_binop (mode,
1829                              left ? ashl_optab : lshr_optab,
1830                              shifted, op1, target, unsignedp, methods);
1831
1832       /* Do arithmetic shifts.
1833          Also, if we are going to widen the operand, we can just as well
1834          use an arithmetic right-shift instead of a logical one.  */
1835       if (temp == 0 && ! rotate
1836           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
1837         {
1838           enum optab_methods methods1 = methods;
1839
1840           /* If trying to widen a log shift to an arithmetic shift,
1841              don't accept an arithmetic shift of the same size.  */
1842           if (unsignedp)
1843             methods1 = OPTAB_MUST_WIDEN;
1844
1845           /* Arithmetic shift */
1846
1847           temp = expand_binop (mode,
1848                                left ? ashl_optab : ashr_optab,
1849                                shifted, op1, target, unsignedp, methods1);
1850         }
1851
1852       /* We used to try extzv here for logical right shifts, but that was
1853          only useful for one machine, the VAX, and caused poor code
1854          generation there for lshrdi3, so the code was deleted and a
1855          define_expand for lshrsi3 was added to vax.md.  */
1856     }
1857
1858   if (temp == 0)
1859     abort ();
1860   return temp;
1861 }
1862 \f
1863 enum alg_code { alg_zero, alg_m, alg_shift,
1864                   alg_add_t_m2, alg_sub_t_m2,
1865                   alg_add_factor, alg_sub_factor,
1866                   alg_add_t2_m, alg_sub_t2_m,
1867                   alg_add, alg_subtract, alg_factor, alg_shiftop };
1868
1869 /* This structure records a sequence of operations.
1870    `ops' is the number of operations recorded.
1871    `cost' is their total cost.
1872    The operations are stored in `op' and the corresponding
1873    logarithms of the integer coefficients in `log'.
1874
1875    These are the operations:
1876    alg_zero             total := 0;
1877    alg_m                total := multiplicand;
1878    alg_shift            total := total * coeff
1879    alg_add_t_m2         total := total + multiplicand * coeff;
1880    alg_sub_t_m2         total := total - multiplicand * coeff;
1881    alg_add_factor       total := total * coeff + total;
1882    alg_sub_factor       total := total * coeff - total;
1883    alg_add_t2_m         total := total * coeff + multiplicand;
1884    alg_sub_t2_m         total := total * coeff - multiplicand;
1885
1886    The first operand must be either alg_zero or alg_m.  */
1887
1888 struct algorithm
1889 {
1890   short cost;
1891   short ops;
1892   /* The size of the OP and LOG fields are not directly related to the
1893      word size, but the worst-case algorithms will be if we have few
1894      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
1895      In that case we will generate shift-by-2, add, shift-by-2, add,...,
1896      in total wordsize operations.  */
1897   enum alg_code op[MAX_BITS_PER_WORD];
1898   char log[MAX_BITS_PER_WORD];
1899 };
1900
1901 /* Compute and return the best algorithm for multiplying by T.
1902    The algorithm must cost less than cost_limit
1903    If retval.cost >= COST_LIMIT, no algorithm was found and all
1904    other field of the returned struct are undefined.  */
1905
1906 static void
1907 synth_mult (alg_out, t, cost_limit)
1908      struct algorithm *alg_out;
1909      unsigned HOST_WIDE_INT t;
1910      int cost_limit;
1911 {
1912   int m;
1913   struct algorithm *alg_in, *best_alg;
1914   unsigned int cost;
1915   unsigned HOST_WIDE_INT q;
1916
1917   /* Indicate that no algorithm is yet found.  If no algorithm
1918      is found, this value will be returned and indicate failure.  */
1919   alg_out->cost = cost_limit;
1920
1921   if (cost_limit <= 0)
1922     return;
1923
1924   /* t == 1 can be done in zero cost.  */
1925   if (t == 1)
1926     {
1927       alg_out->ops = 1;
1928       alg_out->cost = 0;
1929       alg_out->op[0] = alg_m;
1930       return;
1931     }
1932
1933   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
1934      fail now.  */
1935   if (t == 0)
1936     {
1937       if (zero_cost >= cost_limit)
1938         return;
1939       else
1940         {
1941           alg_out->ops = 1;
1942           alg_out->cost = zero_cost;
1943           alg_out->op[0] = alg_zero;
1944           return;
1945         }
1946     }
1947
1948   /* We'll be needing a couple extra algorithm structures now.  */
1949
1950   alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
1951   best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
1952
1953   /* If we have a group of zero bits at the low-order part of T, try
1954      multiplying by the remaining bits and then doing a shift.  */
1955
1956   if ((t & 1) == 0)
1957     {
1958       m = floor_log2 (t & -t);  /* m = number of low zero bits */
1959       q = t >> m;
1960       cost = shift_cost[m];
1961       synth_mult (alg_in, q, cost_limit - cost);
1962
1963       cost += alg_in->cost;
1964       if (cost < cost_limit)
1965         {
1966           struct algorithm *x;
1967           x = alg_in, alg_in = best_alg, best_alg = x;
1968           best_alg->log[best_alg->ops] = m;
1969           best_alg->op[best_alg->ops] = alg_shift;
1970           cost_limit = cost;
1971         }
1972     }
1973
1974   /* If we have an odd number, add or subtract one.  */
1975   if ((t & 1) != 0)
1976     {
1977       unsigned HOST_WIDE_INT w;
1978
1979       for (w = 1; (w & t) != 0; w <<= 1)
1980         ;
1981       if (w > 2
1982           /* Reject the case where t is 3.
1983              Thus we prefer addition in that case.  */
1984           && t != 3)
1985         {
1986           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
1987
1988           cost = add_cost;
1989           synth_mult (alg_in, t + 1, cost_limit - cost);
1990
1991           cost += alg_in->cost;
1992           if (cost < cost_limit)
1993             {
1994               struct algorithm *x;
1995               x = alg_in, alg_in = best_alg, best_alg = x;
1996               best_alg->log[best_alg->ops] = 0;
1997               best_alg->op[best_alg->ops] = alg_sub_t_m2;
1998               cost_limit = cost;
1999             }
2000         }
2001       else
2002         {
2003           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2004
2005           cost = add_cost;
2006           synth_mult (alg_in, t - 1, cost_limit - cost);
2007
2008           cost += alg_in->cost;
2009           if (cost < cost_limit)
2010             {
2011               struct algorithm *x;
2012               x = alg_in, alg_in = best_alg, best_alg = x;
2013               best_alg->log[best_alg->ops] = 0;
2014               best_alg->op[best_alg->ops] = alg_add_t_m2;
2015               cost_limit = cost;
2016             }
2017         }
2018     }
2019
2020   /* Look for factors of t of the form
2021      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2022      If we find such a factor, we can multiply by t using an algorithm that
2023      multiplies by q, shift the result by m and add/subtract it to itself.
2024
2025      We search for large factors first and loop down, even if large factors
2026      are less probable than small; if we find a large factor we will find a
2027      good sequence quickly, and therefore be able to prune (by decreasing
2028      COST_LIMIT) the search.  */
2029
2030   for (m = floor_log2 (t - 1); m >= 2; m--)
2031     {
2032       unsigned HOST_WIDE_INT d;
2033
2034       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2035       if (t % d == 0 && t > d)
2036         {
2037           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2038           synth_mult (alg_in, t / d, cost_limit - cost);
2039
2040           cost += alg_in->cost;
2041           if (cost < cost_limit)
2042             {
2043               struct algorithm *x;
2044               x = alg_in, alg_in = best_alg, best_alg = x;
2045               best_alg->log[best_alg->ops] = m;
2046               best_alg->op[best_alg->ops] = alg_add_factor;
2047               cost_limit = cost;
2048             }
2049           /* Other factors will have been taken care of in the recursion.  */
2050           break;
2051         }
2052
2053       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2054       if (t % d == 0 && t > d)
2055         {
2056           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2057           synth_mult (alg_in, t / d, cost_limit - cost);
2058
2059           cost += alg_in->cost;
2060           if (cost < cost_limit)
2061             {
2062               struct algorithm *x;
2063               x = alg_in, alg_in = best_alg, best_alg = x;
2064               best_alg->log[best_alg->ops] = m;
2065               best_alg->op[best_alg->ops] = alg_sub_factor;
2066               cost_limit = cost;
2067             }
2068           break;
2069         }
2070     }
2071
2072   /* Try shift-and-add (load effective address) instructions,
2073      i.e. do a*3, a*5, a*9.  */
2074   if ((t & 1) != 0)
2075     {
2076       q = t - 1;
2077       q = q & -q;
2078       m = exact_log2 (q);
2079       if (m >= 0)
2080         {
2081           cost = shiftadd_cost[m];
2082           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2083
2084           cost += alg_in->cost;
2085           if (cost < cost_limit)
2086             {
2087               struct algorithm *x;
2088               x = alg_in, alg_in = best_alg, best_alg = x;
2089               best_alg->log[best_alg->ops] = m;
2090               best_alg->op[best_alg->ops] = alg_add_t2_m;
2091               cost_limit = cost;
2092             }
2093         }
2094
2095       q = t + 1;
2096       q = q & -q;
2097       m = exact_log2 (q);
2098       if (m >= 0)
2099         {
2100           cost = shiftsub_cost[m];
2101           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2102
2103           cost += alg_in->cost;
2104           if (cost < cost_limit)
2105             {
2106               struct algorithm *x;
2107               x = alg_in, alg_in = best_alg, best_alg = x;
2108               best_alg->log[best_alg->ops] = m;
2109               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2110               cost_limit = cost;
2111             }
2112         }
2113     }
2114
2115   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2116      we have not found any algorithm.  */
2117   if (cost_limit == alg_out->cost)
2118     return;
2119
2120   /* If we are getting a too long sequence for `struct algorithm'
2121      to record, make this search fail.  */
2122   if (best_alg->ops == MAX_BITS_PER_WORD)
2123     return;
2124
2125   /* Copy the algorithm from temporary space to the space at alg_out.
2126      We avoid using structure assignment because the majority of
2127      best_alg is normally undefined, and this is a critical function.  */
2128   alg_out->ops = best_alg->ops + 1;
2129   alg_out->cost = cost_limit;
2130   bcopy ((char *) best_alg->op, (char *) alg_out->op,
2131          alg_out->ops * sizeof *alg_out->op);
2132   bcopy ((char *) best_alg->log, (char *) alg_out->log,
2133          alg_out->ops * sizeof *alg_out->log);
2134 }
2135 \f
2136 /* Perform a multiplication and return an rtx for the result.
2137    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2138    TARGET is a suggestion for where to store the result (an rtx).
2139
2140    We check specially for a constant integer as OP1.
2141    If you want this check for OP0 as well, then before calling
2142    you should swap the two operands if OP0 would be constant.  */
2143
2144 rtx
2145 expand_mult (mode, op0, op1, target, unsignedp)
2146      enum machine_mode mode;
2147      register rtx op0, op1, target;
2148      int unsignedp;
2149 {
2150   rtx const_op1 = op1;
2151
2152   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2153      less than or equal in size to `unsigned int' this doesn't matter.
2154      If the mode is larger than `unsigned int', then synth_mult works only
2155      if the constant value exactly fits in an `unsigned int' without any
2156      truncation.  This means that multiplying by negative values does
2157      not work; results are off by 2^32 on a 32 bit machine.  */
2158
2159   /* If we are multiplying in DImode, it may still be a win
2160      to try to work with shifts and adds.  */
2161   if (GET_CODE (op1) == CONST_DOUBLE
2162       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2163       && HOST_BITS_PER_INT >= BITS_PER_WORD
2164       && CONST_DOUBLE_HIGH (op1) == 0)
2165     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2166   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2167            && GET_CODE (op1) == CONST_INT
2168            && INTVAL (op1) < 0)
2169     const_op1 = 0;
2170
2171   /* We used to test optimize here, on the grounds that it's better to
2172      produce a smaller program when -O is not used.
2173      But this causes such a terrible slowdown sometimes
2174      that it seems better to use synth_mult always.  */
2175
2176   if (const_op1 && GET_CODE (const_op1) == CONST_INT)
2177     {
2178       struct algorithm alg;
2179       struct algorithm alg2;
2180       HOST_WIDE_INT val = INTVAL (op1);
2181       HOST_WIDE_INT val_so_far;
2182       rtx insn;
2183       int mult_cost;
2184       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2185
2186       /* Try to do the computation three ways: multiply by the negative of OP1
2187          and then negate, do the multiplication directly, or do multiplication
2188          by OP1 - 1.  */
2189
2190       mult_cost = rtx_cost (gen_rtx (MULT, mode, op0, op1), SET);
2191       mult_cost = MIN (12 * add_cost, mult_cost);
2192
2193       synth_mult (&alg, val, mult_cost);
2194
2195       /* This works only if the inverted value actually fits in an
2196          `unsigned int' */
2197       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2198         {
2199           synth_mult (&alg2, - val,
2200                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2201           if (alg2.cost + negate_cost < alg.cost)
2202             alg = alg2, variant = negate_variant;
2203         }
2204
2205       /* This proves very useful for division-by-constant.  */
2206       synth_mult (&alg2, val - 1,
2207                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2208       if (alg2.cost + add_cost < alg.cost)
2209         alg = alg2, variant = add_variant;
2210
2211       if (alg.cost < mult_cost)
2212         {
2213           /* We found something cheaper than a multiply insn.  */
2214           int opno;
2215           rtx accum, tem;
2216
2217           op0 = protect_from_queue (op0, 0);
2218
2219           /* Avoid referencing memory over and over.
2220              For speed, but also for correctness when mem is volatile.  */
2221           if (GET_CODE (op0) == MEM)
2222             op0 = force_reg (mode, op0);
2223
2224           /* ACCUM starts out either as OP0 or as a zero, depending on
2225              the first operation.  */
2226
2227           if (alg.op[0] == alg_zero)
2228             {
2229               accum = copy_to_mode_reg (mode, const0_rtx);
2230               val_so_far = 0;
2231             }
2232           else if (alg.op[0] == alg_m)
2233             {
2234               accum = copy_to_mode_reg (mode, op0);
2235               val_so_far = 1;
2236             }
2237           else
2238             abort ();
2239
2240           for (opno = 1; opno < alg.ops; opno++)
2241             {
2242               int log = alg.log[opno];
2243               int preserve = preserve_subexpressions_p ();
2244               rtx shift_subtarget = preserve ? 0 : accum;
2245               rtx add_target
2246                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2247                   ? target : 0);
2248               rtx accum_target = preserve ? 0 : accum;
2249
2250               switch (alg.op[opno])
2251                 {
2252                 case alg_shift:
2253                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2254                                         build_int_2 (log, 0), NULL_RTX, 0);
2255                   val_so_far <<= log;
2256                   break;
2257
2258                 case alg_add_t_m2:
2259                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2260                                       build_int_2 (log, 0), NULL_RTX, 0);
2261                   accum = force_operand (gen_rtx (PLUS, mode, accum, tem),
2262                                          add_target ? add_target : accum_target);
2263                   val_so_far += (HOST_WIDE_INT) 1 << log;
2264                   break;
2265
2266                 case alg_sub_t_m2:
2267                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2268                                       build_int_2 (log, 0), NULL_RTX, 0);
2269                   accum = force_operand (gen_rtx (MINUS, mode, accum, tem),
2270                                          add_target ? add_target : accum_target);
2271                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2272                   break;
2273
2274                 case alg_add_t2_m:
2275                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2276                                         build_int_2 (log, 0), shift_subtarget,
2277                                         0);
2278                   accum = force_operand (gen_rtx (PLUS, mode, accum, op0),
2279                                          add_target ? add_target : accum_target);
2280                   val_so_far = (val_so_far << log) + 1;
2281                   break;
2282
2283                 case alg_sub_t2_m:
2284                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2285                                         build_int_2 (log, 0), shift_subtarget,
2286                                         0);
2287                   accum = force_operand (gen_rtx (MINUS, mode, accum, op0),
2288                                          add_target ? add_target : accum_target);
2289                   val_so_far = (val_so_far << log) - 1;
2290                   break;
2291
2292                 case alg_add_factor:
2293                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2294                                       build_int_2 (log, 0), NULL_RTX, 0);
2295                   accum = force_operand (gen_rtx (PLUS, mode, accum, tem),
2296                                          add_target ? add_target : accum_target);
2297                   val_so_far += val_so_far << log;
2298                   break;
2299
2300                 case alg_sub_factor:
2301                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2302                                       build_int_2 (log, 0), NULL_RTX, 0);
2303                   accum = force_operand (gen_rtx (MINUS, mode, tem, accum),
2304                                          (add_target ? add_target
2305                                           : preserve ? 0 : tem));
2306                   val_so_far = (val_so_far << log) - val_so_far;
2307                   break;
2308
2309                 default:
2310                   abort ();;
2311                 }
2312
2313               /* Write a REG_EQUAL note on the last insn so that we can cse
2314                  multiplication sequences.  */
2315
2316               insn = get_last_insn ();
2317               REG_NOTES (insn)
2318                 = gen_rtx (EXPR_LIST, REG_EQUAL,
2319                            gen_rtx (MULT, mode, op0, GEN_INT (val_so_far)),
2320                            REG_NOTES (insn));
2321             }
2322
2323           if (variant == negate_variant)
2324             {
2325               val_so_far = - val_so_far;
2326               accum = expand_unop (mode, neg_optab, accum, target, 0);
2327             }
2328           else if (variant == add_variant)
2329             {
2330               val_so_far = val_so_far + 1;
2331               accum = force_operand (gen_rtx (PLUS, mode, accum, op0), target);
2332             }
2333
2334           if (val != val_so_far)
2335             abort ();
2336
2337           return accum;
2338         }
2339     }
2340
2341   /* This used to use umul_optab if unsigned, but for non-widening multiply
2342      there is no difference between signed and unsigned.  */
2343   op0 = expand_binop (mode, smul_optab,
2344                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2345   if (op0 == 0)
2346     abort ();
2347   return op0;
2348 }
2349 \f
2350 /* Return the smallest n such that 2**n >= X.  */
2351
2352 int
2353 ceil_log2 (x)
2354      unsigned HOST_WIDE_INT x;
2355 {
2356   return floor_log2 (x - 1) + 1;
2357 }
2358
2359 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2360    replace division by D, and put the least significant N bits of the result
2361    in *MULTIPLIER_PTR and return the most significant bit.
2362
2363    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2364    needed precision is in PRECISION (should be <= N).
2365
2366    PRECISION should be as small as possible so this function can choose
2367    multiplier more freely.
2368
2369    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2370    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2371
2372    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2373    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2374
2375 static
2376 unsigned HOST_WIDE_INT
2377 choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
2378      unsigned HOST_WIDE_INT d;
2379      int n;
2380      int precision;
2381      unsigned HOST_WIDE_INT *multiplier_ptr;
2382      int *post_shift_ptr;
2383      int *lgup_ptr;
2384 {
2385   unsigned HOST_WIDE_INT mhigh_hi, mhigh_lo;
2386   unsigned HOST_WIDE_INT mlow_hi, mlow_lo;
2387   int lgup, post_shift;
2388   int pow, pow2;
2389   unsigned HOST_WIDE_INT nh, nl, dummy1, dummy2;
2390
2391   /* lgup = ceil(log2(divisor)); */
2392   lgup = ceil_log2 (d);
2393
2394   if (lgup > n)
2395     abort ();
2396
2397   pow = n + lgup;
2398   pow2 = n + lgup - precision;
2399
2400   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2401     {
2402       /* We could handle this with some effort, but this case is much better
2403          handled directly with a scc insn, so rely on caller using that.  */
2404       abort ();
2405     }
2406
2407   /* mlow = 2^(N + lgup)/d */
2408  if (pow >= HOST_BITS_PER_WIDE_INT)
2409     {
2410       nh = (unsigned HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2411       nl = 0;
2412     }
2413   else
2414     {
2415       nh = 0;
2416       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2417     }
2418   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2419                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2420
2421   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2422   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2423     nh |= (unsigned HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2424   else
2425     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2426   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2427                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2428
2429   if (mhigh_hi && nh - d >= d)
2430     abort ();
2431   if (mhigh_hi > 1 || mlow_hi > 1)
2432     abort ();
2433   /* assert that mlow < mhigh.  */
2434   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2435     abort();
2436
2437   /* If precision == N, then mlow, mhigh exceed 2^N
2438      (but they do not exceed 2^(N+1)).  */
2439
2440   /* Reduce to lowest terms */
2441   for (post_shift = lgup; post_shift > 0; post_shift--)
2442     {
2443       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2444       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2445       if (ml_lo >= mh_lo)
2446         break;
2447
2448       mlow_hi = 0;
2449       mlow_lo = ml_lo;
2450       mhigh_hi = 0;
2451       mhigh_lo = mh_lo;
2452     }
2453
2454   *post_shift_ptr = post_shift;
2455   *lgup_ptr = lgup;
2456   if (n < HOST_BITS_PER_WIDE_INT)
2457     {
2458       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2459       *multiplier_ptr = mhigh_lo & mask;
2460       return mhigh_lo >= mask;
2461     }
2462   else
2463     {
2464       *multiplier_ptr = mhigh_lo;
2465       return mhigh_hi;
2466     }
2467 }
2468
2469 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2470    congruent to 1 (mod 2**N).  */
2471
2472 static unsigned HOST_WIDE_INT
2473 invert_mod2n (x, n)
2474      unsigned HOST_WIDE_INT x;
2475      int n;
2476 {
2477   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2478
2479   /* The algorithm notes that the choice y = x satisfies
2480      x*y == 1 mod 2^3, since x is assumed odd.
2481      Each iteration doubles the number of bits of significance in y.  */
2482
2483   unsigned HOST_WIDE_INT mask;
2484   unsigned HOST_WIDE_INT y = x;
2485   int nbit = 3;
2486
2487   mask = (n == HOST_BITS_PER_WIDE_INT
2488           ? ~(unsigned HOST_WIDE_INT) 0
2489           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2490
2491   while (nbit < n)
2492     {
2493       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2494       nbit *= 2;
2495     }
2496   return y;
2497 }
2498
2499 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2500    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2501    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2502    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2503    become signed.
2504
2505    The result is put in TARGET if that is convenient.
2506
2507    MODE is the mode of operation.  */
2508
2509 rtx
2510 expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
2511      enum machine_mode mode;
2512      register rtx adj_operand, op0, op1, target;
2513      int unsignedp;
2514 {
2515   rtx tem;
2516   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2517
2518   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2519                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2520                       NULL_RTX, 0);
2521   tem = expand_and (tem, op1, NULL_RTX);
2522   adj_operand = force_operand (gen_rtx (adj_code, mode, adj_operand, tem),
2523                                adj_operand);
2524
2525   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2526                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2527                       NULL_RTX, 0);
2528   tem = expand_and (tem, op0, NULL_RTX);
2529   target = force_operand (gen_rtx (adj_code, mode, adj_operand, tem), target);
2530
2531   return target;
2532 }
2533
2534 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2535    in TARGET if that is convenient, and return where the result is.  If the
2536    operation can not be performed, 0 is returned.
2537
2538    MODE is the mode of operation and result.
2539
2540    UNSIGNEDP nonzero means unsigned multiply.
2541
2542    MAX_COST is the total allowed cost for the expanded RTL.  */
2543
2544 rtx
2545 expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
2546      enum machine_mode mode;
2547      register rtx op0, target;
2548      unsigned HOST_WIDE_INT cnst1;
2549      int unsignedp;
2550      int max_cost;
2551 {
2552   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2553   optab mul_highpart_optab;
2554   optab moptab;
2555   rtx tem;
2556   int size = GET_MODE_BITSIZE (mode);
2557   rtx op1, wide_op1;
2558
2559   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2560   if (size > HOST_BITS_PER_WIDE_INT)
2561     abort ();
2562
2563   op1 = GEN_INT (cnst1);
2564
2565   if (GET_MODE_BITSIZE (wider_mode) <= HOST_BITS_PER_INT)
2566     wide_op1 = op1;
2567   else
2568     wide_op1
2569       = immed_double_const (cnst1,
2570                             (unsignedp
2571                              ? (HOST_WIDE_INT) 0
2572                              : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2573                             wider_mode);
2574
2575   /* expand_mult handles constant multiplication of word_mode
2576      or narrower.  It does a poor job for large modes.  */
2577   if (size < BITS_PER_WORD
2578       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2579     {
2580       /* We have to do this, since expand_binop doesn't do conversion for
2581          multiply.  Maybe change expand_binop to handle widening multiply?  */
2582       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2583
2584       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, unsignedp);
2585       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2586                           build_int_2 (size, 0), NULL_RTX, 1);
2587       return convert_modes (mode, wider_mode, tem, unsignedp);
2588     }
2589
2590   if (target == 0)
2591     target = gen_reg_rtx (mode);
2592
2593   /* Firstly, try using a multiplication insn that only generates the needed
2594      high part of the product, and in the sign flavor of unsignedp.  */
2595   if (mul_highpart_cost[(int) mode] < max_cost)
2596     {
2597       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2598       target = expand_binop (mode, mul_highpart_optab,
2599                              op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2600       if (target)
2601         return target;
2602     }
2603
2604   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2605      Need to adjust the result after the multiplication.  */
2606   if (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost < max_cost)
2607     {
2608       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2609       target = expand_binop (mode, mul_highpart_optab,
2610                              op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2611       if (target)
2612         /* We used the wrong signedness.  Adjust the result.  */
2613         return expand_mult_highpart_adjust (mode, target, op0,
2614                                             op1, target, unsignedp);
2615     }
2616
2617   /* Try widening multiplication.  */
2618   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2619   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2620       && mul_widen_cost[(int) wider_mode] < max_cost)
2621     {
2622       op1 = force_reg (mode, op1);
2623       goto try;
2624     }
2625
2626   /* Try widening the mode and perform a non-widening multiplication.  */
2627   moptab = smul_optab;
2628   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2629       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2630     {
2631       op1 = wide_op1;
2632       goto try;
2633     }
2634
2635   /* Try widening multiplication of opposite signedness, and adjust.  */
2636   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2637   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2638       && (mul_widen_cost[(int) wider_mode]
2639           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2640     {
2641       rtx regop1 = force_reg (mode, op1);
2642       tem = expand_binop (wider_mode, moptab, op0, regop1,
2643                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2644       if (tem != 0)
2645         {
2646           /* Extract the high half of the just generated product.  */
2647           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2648                               build_int_2 (size, 0), NULL_RTX, 1);
2649           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2650           /* We used the wrong signedness.  Adjust the result.  */
2651           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2652                                               target, unsignedp);
2653         }
2654     }
2655
2656   return 0;
2657
2658  try:
2659   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2660   tem = expand_binop (wider_mode, moptab, op0, op1,
2661                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2662   if (tem == 0)
2663     return 0;
2664
2665   /* Extract the high half of the just generated product.  */
2666   if (mode == word_mode)
2667     {
2668       return gen_highpart (mode, tem);
2669     }
2670   else
2671     {
2672       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2673                           build_int_2 (size, 0), NULL_RTX, 1);
2674       return convert_modes (mode, wider_mode, tem, unsignedp);
2675     }
2676 }
2677 \f
2678 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2679    if that is convenient, and returning where the result is.
2680    You may request either the quotient or the remainder as the result;
2681    specify REM_FLAG nonzero to get the remainder.
2682
2683    CODE is the expression code for which kind of division this is;
2684    it controls how rounding is done.  MODE is the machine mode to use.
2685    UNSIGNEDP nonzero means do unsigned division.  */
2686
2687 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2688    and then correct it by or'ing in missing high bits
2689    if result of ANDI is nonzero.
2690    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2691    This could optimize to a bfexts instruction.
2692    But C doesn't use these operations, so their optimizations are
2693    left for later.  */
2694
2695 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2696
2697 rtx
2698 expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
2699      int rem_flag;
2700      enum tree_code code;
2701      enum machine_mode mode;
2702      register rtx op0, op1, target;
2703      int unsignedp;
2704 {
2705   enum machine_mode compute_mode;
2706   register rtx tquotient;
2707   rtx quotient = 0, remainder = 0;
2708   rtx last;
2709   int size;
2710   rtx insn, set;
2711   optab optab1, optab2;
2712   int op1_is_constant, op1_is_pow2;
2713   int max_cost, extra_cost;
2714
2715   op1_is_constant = GET_CODE (op1) == CONST_INT;
2716   op1_is_pow2 = (op1_is_constant
2717                  && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2718                       || EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))));
2719
2720   /*
2721      This is the structure of expand_divmod:
2722
2723      First comes code to fix up the operands so we can perform the operations
2724      correctly and efficiently.
2725
2726      Second comes a switch statement with code specific for each rounding mode.
2727      For some special operands this code emits all RTL for the desired
2728      operation, for other cases, it generates only a quotient and stores it in
2729      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2730      to indicate that it has not done anything.
2731
2732      Last comes code that finishes the operation.  If QUOTIENT is set and
2733      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2734      QUOTIENT is not set, it is computed using trunc rounding.
2735
2736      We try to generate special code for division and remainder when OP1 is a
2737      constant.  If |OP1| = 2**n we can use shifts and some other fast
2738      operations.  For other values of OP1, we compute a carefully selected
2739      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2740      by m.
2741
2742      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2743      half of the product.  Different strategies for generating the product are
2744      implemented in expand_mult_highpart.
2745
2746      If what we actually want is the remainder, we generate that by another
2747      by-constant multiplication and a subtraction.  */
2748
2749   /* We shouldn't be called with OP1 == const1_rtx, but some of the
2750      code below will malfunction if we are, so check here and handle
2751      the special case if so.  */
2752   if (op1 == const1_rtx)
2753     return rem_flag ? const0_rtx : op0;
2754
2755   if (target
2756       /* Don't use the function value register as a target
2757          since we have to read it as well as write it,
2758          and function-inlining gets confused by this.  */
2759       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
2760           /* Don't clobber an operand while doing a multi-step calculation.  */
2761           || ((rem_flag || op1_is_constant)
2762               && (reg_mentioned_p (target, op0)
2763                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
2764           || reg_mentioned_p (target, op1)
2765           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
2766     target = 0;
2767
2768   /* Get the mode in which to perform this computation.  Normally it will
2769      be MODE, but sometimes we can't do the desired operation in MODE.
2770      If so, pick a wider mode in which we can do the operation.  Convert
2771      to that mode at the start to avoid repeated conversions.
2772
2773      First see what operations we need.  These depend on the expression
2774      we are evaluating.  (We assume that divxx3 insns exist under the
2775      same conditions that modxx3 insns and that these insns don't normally
2776      fail.  If these assumptions are not correct, we may generate less
2777      efficient code in some cases.)
2778
2779      Then see if we find a mode in which we can open-code that operation
2780      (either a division, modulus, or shift).  Finally, check for the smallest
2781      mode for which we can do the operation with a library call.  */
2782
2783   /* We might want to refine this now that we have division-by-constant
2784      optimization.  Since expand_mult_highpart tries so many variants, it is
2785      not straightforward to generalize this.  Maybe we should make an array
2786      of possible modes in init_expmed?  Save this for GCC 2.7.  */
2787
2788   optab1 = (op1_is_pow2 ? (unsignedp ? lshr_optab : ashr_optab)
2789             : (unsignedp ? udiv_optab : sdiv_optab));
2790   optab2 = (op1_is_pow2 ? optab1 : (unsignedp ? udivmod_optab : sdivmod_optab));
2791
2792   for (compute_mode = mode; compute_mode != VOIDmode;
2793        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
2794     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
2795         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
2796       break;
2797
2798   if (compute_mode == VOIDmode)
2799     for (compute_mode = mode; compute_mode != VOIDmode;
2800          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
2801       if (optab1->handlers[(int) compute_mode].libfunc
2802           || optab2->handlers[(int) compute_mode].libfunc)
2803         break;
2804
2805   /* If we still couldn't find a mode, use MODE, but we'll probably abort
2806      in expand_binop.  */
2807   if (compute_mode == VOIDmode)
2808     compute_mode = mode;
2809
2810   if (target && GET_MODE (target) == compute_mode)
2811     tquotient = target;
2812   else
2813     tquotient = gen_reg_rtx (compute_mode);
2814
2815   size = GET_MODE_BITSIZE (compute_mode);
2816 #if 0
2817   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
2818      (mode), and thereby get better code when OP1 is a constant.  Do that
2819      later.  It will require going over all usages of SIZE below.  */
2820   size = GET_MODE_BITSIZE (mode);
2821 #endif
2822
2823   max_cost = div_cost[(int) compute_mode]
2824     - (rem_flag ? mul_cost[(int) compute_mode] + add_cost : 0);
2825
2826   /* Now convert to the best mode to use.  */
2827   if (compute_mode != mode)
2828     {
2829       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
2830       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
2831     }
2832
2833   /* If one of the operands is a volatile MEM, copy it into a register.  */
2834
2835   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
2836     op0 = force_reg (compute_mode, op0);
2837   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
2838     op1 = force_reg (compute_mode, op1);
2839
2840   /* If we need the remainder or if OP1 is constant, we need to
2841      put OP0 in a register in case it has any queued subexpressions.  */
2842   if (rem_flag || op1_is_constant)
2843     op0 = force_reg (compute_mode, op0);
2844
2845   last = get_last_insn ();
2846
2847   /* Promote floor rounding to trunc rounding for unsigned operations.  */
2848   if (unsignedp)
2849     {
2850       if (code == FLOOR_DIV_EXPR)
2851         code = TRUNC_DIV_EXPR;
2852       if (code == FLOOR_MOD_EXPR)
2853         code = TRUNC_MOD_EXPR;
2854     }
2855
2856   if (op1 != const0_rtx)
2857     switch (code)
2858       {
2859       case TRUNC_MOD_EXPR:
2860       case TRUNC_DIV_EXPR:
2861         if (op1_is_constant)
2862           {
2863             if (unsignedp)
2864               {
2865                 unsigned HOST_WIDE_INT mh, ml;
2866                 int pre_shift, post_shift;
2867                 int dummy;
2868                 unsigned HOST_WIDE_INT d = INTVAL (op1);
2869
2870                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
2871                   {
2872                     pre_shift = floor_log2 (d);
2873                     if (rem_flag)
2874                       {
2875                         remainder =
2876                           expand_binop (compute_mode, and_optab, op0,
2877                                         GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
2878                                         remainder, 1,
2879                                         OPTAB_LIB_WIDEN);
2880                         if (remainder)
2881                           return gen_lowpart (mode, remainder);
2882                       }
2883                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
2884                                              build_int_2 (pre_shift, 0),
2885                                              tquotient, 1);
2886                   }
2887                 else if (size <= HOST_BITS_PER_WIDE_INT)
2888                   {
2889                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
2890                       {
2891                         /* Most significant bit of divisor is set; emit an scc
2892                            insn.  */
2893                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
2894                                                     compute_mode, 1, 1);
2895                         if (quotient == 0)
2896                           goto fail1;
2897                       }
2898                     else
2899                       {
2900                         /* Find a suitable multiplier and right shift count
2901                            instead of multiplying with D.  */
2902
2903                         mh = choose_multiplier (d, size, size,
2904                                                 &ml, &post_shift, &dummy);
2905
2906                         /* If the suggested multiplier is more than SIZE bits,
2907                            we can do better for even divisors, using an
2908                            initial right shift.  */
2909                         if (mh != 0 && (d & 1) == 0)
2910                           {
2911                             pre_shift = floor_log2 (d & -d);
2912                             mh = choose_multiplier (d >> pre_shift, size,
2913                                                     size - pre_shift,
2914                                                     &ml, &post_shift, &dummy);
2915                             if (mh)
2916                               abort ();
2917                           }
2918                         else
2919                           pre_shift = 0;
2920
2921                         if (mh != 0)
2922                           {
2923                             rtx t1, t2, t3, t4;
2924
2925                             extra_cost = (shift_cost[post_shift - 1]
2926                                           + shift_cost[1] + 2 * add_cost);
2927                             t1 = expand_mult_highpart (compute_mode, op0, ml,
2928                                                        NULL_RTX, 1,
2929                                                        max_cost - extra_cost);
2930                             if (t1 == 0)
2931                               goto fail1;
2932                             t2 = force_operand (gen_rtx (MINUS, compute_mode,
2933                                                          op0, t1),
2934                                                 NULL_RTX);
2935                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
2936                                                build_int_2 (1, 0), NULL_RTX,1);
2937                             t4 = force_operand (gen_rtx (PLUS, compute_mode,
2938                                                          t1, t3),
2939                                                 NULL_RTX);
2940                             quotient =
2941                               expand_shift (RSHIFT_EXPR, compute_mode, t4,
2942                                             build_int_2 (post_shift - 1, 0),
2943                                             tquotient, 1);
2944                           }
2945                         else
2946                           {
2947                             rtx t1, t2;
2948
2949                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
2950                                                build_int_2 (pre_shift, 0),
2951                                                NULL_RTX, 1);
2952                             extra_cost = (shift_cost[pre_shift]
2953                                           + shift_cost[post_shift]);
2954                             t2 = expand_mult_highpart (compute_mode, t1, ml,
2955                                                        NULL_RTX, 1,
2956                                                        max_cost - extra_cost);
2957                             if (t2 == 0)
2958                               goto fail1;
2959                             quotient =
2960                               expand_shift (RSHIFT_EXPR, compute_mode, t2,
2961                                             build_int_2 (post_shift, 0),
2962                                             tquotient, 1);
2963                           }
2964                       }
2965                   }
2966                 else            /* Too wide mode to use tricky code */
2967                   break;
2968
2969                 insn = get_last_insn ();
2970                 if (insn != last
2971                     && (set = single_set (insn)) != 0
2972                     && SET_DEST (set) == quotient)
2973                   REG_NOTES (insn)
2974                     = gen_rtx (EXPR_LIST, REG_EQUAL,
2975                                gen_rtx (UDIV, compute_mode, op0, op1),
2976                                REG_NOTES (insn));
2977               }
2978             else                /* TRUNC_DIV, signed */
2979               {
2980                 unsigned HOST_WIDE_INT ml;
2981                 int lgup, post_shift;
2982                 HOST_WIDE_INT d = INTVAL (op1);
2983                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
2984
2985                 /* n rem d = n rem -d */
2986                 if (rem_flag && d < 0)
2987                   {
2988                     d = abs_d;
2989                     op1 = GEN_INT (abs_d);
2990                   }
2991
2992                 if (d == 1)
2993                   quotient = op0;
2994                 else if (d == -1)
2995                   quotient = expand_unop (compute_mode, neg_optab, op0,
2996                                           tquotient, 0);
2997                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
2998                   {
2999                     /* This case is not handled correctly below.  */
3000                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3001                                                 compute_mode, 1, 1);
3002                     if (quotient == 0)
3003                       goto fail1;
3004                   }
3005                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3006                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap))
3007                   ;
3008                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3009                   {
3010                     lgup = floor_log2 (abs_d);
3011                     if (abs_d != 2 && BRANCH_COST < 3)
3012                       {
3013                         rtx label = gen_label_rtx ();
3014                         rtx t1;
3015
3016                         t1 = copy_to_mode_reg (compute_mode, op0);
3017                         emit_cmp_insn (t1, const0_rtx, GE,
3018                                        NULL_RTX, compute_mode, 0, 0);
3019                         emit_jump_insn (gen_bge (label));
3020                         expand_inc (t1, GEN_INT (abs_d - 1));
3021                         emit_label (label);
3022                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3023                                                  build_int_2 (lgup, 0),
3024                                                  tquotient, 0);
3025                       }
3026                     else
3027                       {
3028                         rtx t1, t2, t3;
3029                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3030                                            build_int_2 (size - 1, 0),
3031                                            NULL_RTX, 0);
3032                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3033                                            build_int_2 (size - lgup, 0),
3034                                            NULL_RTX, 1);
3035                         t3 = force_operand (gen_rtx (PLUS, compute_mode,
3036                                                      op0, t2),
3037                                             NULL_RTX);
3038                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3039                                                  build_int_2 (lgup, 0),
3040                                                  tquotient, 0);
3041                       }
3042
3043                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3044                        the quotient.  */
3045                     if (d < 0)
3046                       {
3047                         insn = get_last_insn ();
3048                         if (insn != last
3049                             && (set = single_set (insn)) != 0
3050                             && SET_DEST (set) == quotient)
3051                           REG_NOTES (insn)
3052                             = gen_rtx (EXPR_LIST, REG_EQUAL,
3053                                        gen_rtx (DIV, compute_mode, op0,
3054                                                 GEN_INT (abs_d)),
3055                                        REG_NOTES (insn));
3056
3057                         quotient = expand_unop (compute_mode, neg_optab,
3058                                                 quotient, quotient, 0);
3059                       }
3060                   }
3061                 else if (size <= HOST_BITS_PER_WIDE_INT)
3062                   {
3063                     choose_multiplier (abs_d, size, size - 1,
3064                                        &ml, &post_shift, &lgup);
3065                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3066                       {
3067                         rtx t1, t2, t3;
3068
3069                         extra_cost = (shift_cost[post_shift]
3070                                       + shift_cost[size - 1] + add_cost);
3071                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3072                                                    NULL_RTX, 0,
3073                                                    max_cost - extra_cost);
3074                         if (t1 == 0)
3075                           goto fail1;
3076                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3077                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3078                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3079                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3080                         if (d < 0)
3081                           quotient = force_operand (gen_rtx (MINUS, compute_mode, t3, t2),
3082                                                     tquotient);
3083                         else
3084                           quotient = force_operand (gen_rtx (MINUS, compute_mode, t2, t3),
3085                                                     tquotient);
3086                       }
3087                     else
3088                       {
3089                         rtx t1, t2, t3, t4;
3090
3091                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3092                         extra_cost = (shift_cost[post_shift]
3093                                       + shift_cost[size - 1] + 2 * add_cost);
3094                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3095                                                    NULL_RTX, 0,
3096                                                    max_cost - extra_cost);
3097                         if (t1 == 0)
3098                           goto fail1;
3099                         t2 = force_operand (gen_rtx (PLUS, compute_mode, t1, op0),
3100                                             NULL_RTX);
3101                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3102                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3103                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3104                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3105                         if (d < 0)
3106                           quotient = force_operand (gen_rtx (MINUS, compute_mode, t4, t3),
3107                                                     tquotient);
3108                         else
3109                           quotient = force_operand (gen_rtx (MINUS, compute_mode, t3, t4),
3110                                                     tquotient);
3111                       }
3112                   }
3113                 else            /* Too wide mode to use tricky code */
3114                   break;
3115
3116                 insn = get_last_insn ();
3117                 if (insn != last
3118                     && (set = single_set (insn)) != 0
3119                     && SET_DEST (set) == quotient)
3120                   REG_NOTES (insn)
3121                     = gen_rtx (EXPR_LIST, REG_EQUAL,
3122                                gen_rtx (DIV, compute_mode, op0, op1),
3123                                REG_NOTES (insn));
3124               }
3125             break;
3126           }
3127       fail1:
3128         delete_insns_since (last);
3129         break;
3130
3131       case FLOOR_DIV_EXPR:
3132       case FLOOR_MOD_EXPR:
3133       /* We will come here only for signed operations.  */
3134         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3135           {
3136             unsigned HOST_WIDE_INT mh, ml;
3137             int pre_shift, lgup, post_shift;
3138             HOST_WIDE_INT d = INTVAL (op1);
3139
3140             if (d > 0)
3141               {
3142                 /* We could just as easily deal with negative constants here,
3143                    but it does not seem worth the trouble for GCC 2.6.  */
3144                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3145                   {
3146                     pre_shift = floor_log2 (d);
3147                     if (rem_flag)
3148                       {
3149                         remainder = expand_binop (compute_mode, and_optab, op0,
3150                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3151                                                   remainder, 0, OPTAB_LIB_WIDEN);
3152                         if (remainder)
3153                           return gen_lowpart (mode, remainder);
3154                       }
3155                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3156                                              build_int_2 (pre_shift, 0),
3157                                              tquotient, 0);
3158                   }
3159                 else
3160                   {
3161                     rtx t1, t2, t3, t4;
3162
3163                     mh = choose_multiplier (d, size, size - 1,
3164                                             &ml, &post_shift, &lgup);
3165                     if (mh)
3166                       abort ();
3167
3168                     t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3169                                        build_int_2 (size - 1, 0), NULL_RTX, 0);
3170                     t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3171                                        NULL_RTX, 0, OPTAB_WIDEN);
3172                     extra_cost = (shift_cost[post_shift]
3173                                   + shift_cost[size - 1] + 2 * add_cost);
3174                     t3 = expand_mult_highpart (compute_mode, t2, ml,
3175                                                NULL_RTX, 1,
3176                                                max_cost - extra_cost);
3177                     if (t3 != 0)
3178                       {
3179                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3180                                            build_int_2 (post_shift, 0),
3181                                            NULL_RTX, 1);
3182                         quotient = expand_binop (compute_mode, xor_optab,
3183                                                  t4, t1, tquotient, 0,
3184                                                  OPTAB_WIDEN);
3185                       }
3186                   }
3187               }
3188             else
3189               {
3190                 rtx nsign, t1, t2, t3, t4;
3191                 t1 = force_operand (gen_rtx (PLUS, compute_mode,
3192                                              op0, constm1_rtx), NULL_RTX);
3193                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3194                                    0, OPTAB_WIDEN);
3195                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3196                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3197                 t3 = force_operand (gen_rtx (MINUS, compute_mode, t1, nsign),
3198                                     NULL_RTX);
3199                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3200                                     NULL_RTX, 0);
3201                 if (t4)
3202                   {
3203                     rtx t5;
3204                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3205                                       NULL_RTX, 0);
3206                     quotient = force_operand (gen_rtx (PLUS, compute_mode,
3207                                                        t4, t5),
3208                                               tquotient);
3209                   }
3210               }
3211           }
3212
3213         if (quotient != 0)
3214           break;
3215         delete_insns_since (last);
3216
3217         /* Try using an instruction that produces both the quotient and
3218            remainder, using truncation.  We can easily compensate the quotient
3219            or remainder to get floor rounding, once we have the remainder.
3220            Notice that we compute also the final remainder value here,
3221            and return the result right away.  */
3222         if (target == 0 || GET_MODE (target) != compute_mode)
3223           target = gen_reg_rtx (compute_mode);
3224
3225         if (rem_flag)
3226           {
3227             remainder
3228               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3229             quotient = gen_reg_rtx (compute_mode);
3230           }
3231         else
3232           {
3233             quotient
3234               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3235             remainder = gen_reg_rtx (compute_mode);
3236           }
3237
3238         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3239                                  quotient, remainder, 0))
3240           {
3241             /* This could be computed with a branch-less sequence.
3242                Save that for later.  */
3243             rtx tem;
3244             rtx label = gen_label_rtx ();
3245             emit_cmp_insn (remainder, const0_rtx, EQ, NULL_RTX,
3246                            compute_mode, 0, 0);
3247             emit_jump_insn (gen_beq (label));
3248             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3249                                 NULL_RTX, 0, OPTAB_WIDEN);
3250             emit_cmp_insn (tem, const0_rtx, GE, NULL_RTX, compute_mode, 0, 0);
3251             emit_jump_insn (gen_bge (label));
3252             expand_dec (quotient, const1_rtx);
3253             expand_inc (remainder, op1);
3254             emit_label (label);
3255             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3256           }
3257
3258         /* No luck with division elimination or divmod.  Have to do it
3259            by conditionally adjusting op0 *and* the result.  */
3260         {
3261           rtx label1, label2, label3, label4, label5;
3262           rtx adjusted_op0;
3263           rtx tem;
3264
3265           quotient = gen_reg_rtx (compute_mode);
3266           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3267           label1 = gen_label_rtx ();
3268           label2 = gen_label_rtx ();
3269           label3 = gen_label_rtx ();
3270           label4 = gen_label_rtx ();
3271           label5 = gen_label_rtx ();
3272           emit_cmp_insn (op1, const0_rtx, LT, NULL_RTX, compute_mode, 0, 0);
3273           emit_jump_insn (gen_blt (label2));
3274           emit_cmp_insn (adjusted_op0, const0_rtx, LT, NULL_RTX,
3275                          compute_mode, 0, 0);
3276           emit_jump_insn (gen_blt (label1));
3277           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3278                               quotient, 0, OPTAB_LIB_WIDEN);
3279           if (tem != quotient)
3280             emit_move_insn (quotient, tem);
3281           emit_jump_insn (gen_jump (label5));
3282           emit_barrier ();
3283           emit_label (label1);
3284           expand_inc (adjusted_op0, const1_rtx);
3285           emit_jump_insn (gen_jump (label4));
3286           emit_barrier ();
3287           emit_label (label2);
3288           emit_cmp_insn (adjusted_op0, const0_rtx, GT, NULL_RTX,
3289                          compute_mode, 0, 0);
3290           emit_jump_insn (gen_bgt (label3));
3291           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3292                               quotient, 0, OPTAB_LIB_WIDEN);
3293           if (tem != quotient)
3294             emit_move_insn (quotient, tem);
3295           emit_jump_insn (gen_jump (label5));
3296           emit_barrier ();
3297           emit_label (label3);
3298           expand_dec (adjusted_op0, const1_rtx);
3299           emit_label (label4);
3300           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3301                               quotient, 0, OPTAB_LIB_WIDEN);
3302           if (tem != quotient)
3303             emit_move_insn (quotient, tem);
3304           expand_dec (quotient, const1_rtx);
3305           emit_label (label5);
3306         }
3307         break;
3308
3309       case CEIL_DIV_EXPR:
3310       case CEIL_MOD_EXPR:
3311         if (unsignedp)
3312           {
3313             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3314               {
3315                 rtx t1, t2, t3;
3316                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3317                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3318                                    build_int_2 (floor_log2 (d), 0),
3319                                    tquotient, 1);
3320                 t2 = expand_binop (compute_mode, and_optab, op0,
3321                                    GEN_INT (d - 1),
3322                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3323                 t3 = gen_reg_rtx (compute_mode);
3324                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3325                                       compute_mode, 1, 1);
3326                 if (t3 == 0)
3327                   {
3328                     rtx lab;
3329                     lab = gen_label_rtx ();
3330                     emit_cmp_insn (t2, const0_rtx, EQ, NULL_RTX,
3331                                    compute_mode, 0, 0);
3332                     emit_jump_insn (gen_beq (lab));
3333                     expand_inc (t1, const1_rtx);
3334                     emit_label (lab);
3335                     quotient = t1;
3336                   }
3337                 else
3338                   quotient = force_operand (gen_rtx (PLUS, compute_mode,
3339                                                      t1, t3),
3340                                             tquotient);
3341                 break;
3342               }
3343
3344             /* Try using an instruction that produces both the quotient and
3345                remainder, using truncation.  We can easily compensate the
3346                quotient or remainder to get ceiling rounding, once we have the
3347                remainder.  Notice that we compute also the final remainder
3348                value here, and return the result right away.  */
3349             if (target == 0 || GET_MODE (target) != compute_mode)
3350               target = gen_reg_rtx (compute_mode);
3351
3352             if (rem_flag)
3353               {
3354                 remainder = (GET_CODE (target) == REG
3355                              ? target : gen_reg_rtx (compute_mode));
3356                 quotient = gen_reg_rtx (compute_mode);
3357               }
3358             else
3359               {
3360                 quotient = (GET_CODE (target) == REG
3361                             ? target : gen_reg_rtx (compute_mode));
3362                 remainder = gen_reg_rtx (compute_mode);
3363               }
3364
3365             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3366                                      remainder, 1))
3367               {
3368                 /* This could be computed with a branch-less sequence.
3369                    Save that for later.  */
3370                 rtx label = gen_label_rtx ();
3371                 emit_cmp_insn (remainder, const0_rtx, EQ, NULL_RTX,
3372                                compute_mode, 0, 0);
3373                 emit_jump_insn (gen_beq (label));
3374                 expand_inc (quotient, const1_rtx);
3375                 expand_dec (remainder, op1);
3376                 emit_label (label);
3377                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3378               }
3379
3380             /* No luck with division elimination or divmod.  Have to do it
3381                by conditionally adjusting op0 *and* the result.  */
3382             {
3383               rtx label1, label2;
3384               rtx adjusted_op0, tem;
3385
3386               quotient = gen_reg_rtx (compute_mode);
3387               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3388               label1 = gen_label_rtx ();
3389               label2 = gen_label_rtx ();
3390               emit_cmp_insn (adjusted_op0, const0_rtx, NE, NULL_RTX,
3391                              compute_mode, 0, 0);
3392               emit_jump_insn (gen_bne (label1));
3393               emit_move_insn  (quotient, const0_rtx);
3394               emit_jump_insn (gen_jump (label2));
3395               emit_barrier ();
3396               emit_label (label1);
3397               expand_dec (adjusted_op0, const1_rtx);
3398               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3399                                   quotient, 1, OPTAB_LIB_WIDEN);
3400               if (tem != quotient)
3401                 emit_move_insn (quotient, tem);
3402               expand_inc (quotient, const1_rtx);
3403               emit_label (label2);
3404             }
3405           }
3406         else /* signed */
3407           {
3408             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3409                 && INTVAL (op1) >= 0)
3410               {
3411                 /* This is extremely similar to the code for the unsigned case
3412                    above.  For 2.7 we should merge these variants, but for
3413                    2.6.1 I don't want to touch the code for unsigned since that
3414                    get used in C.  The signed case will only be used by other
3415                    languages (Ada).  */
3416
3417                 rtx t1, t2, t3;
3418                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3419                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3420                                    build_int_2 (floor_log2 (d), 0),
3421                                    tquotient, 0);
3422                 t2 = expand_binop (compute_mode, and_optab, op0,
3423                                    GEN_INT (d - 1),
3424                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3425                 t3 = gen_reg_rtx (compute_mode);
3426                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3427                                       compute_mode, 1, 1);
3428                 if (t3 == 0)
3429                   {
3430                     rtx lab;
3431                     lab = gen_label_rtx ();
3432                     emit_cmp_insn (t2, const0_rtx, EQ, NULL_RTX,
3433                                    compute_mode, 0, 0);
3434                     emit_jump_insn (gen_beq (lab));
3435                     expand_inc (t1, const1_rtx);
3436                     emit_label (lab);
3437                     quotient = t1;
3438                   }
3439                 else
3440                   quotient = force_operand (gen_rtx (PLUS, compute_mode,
3441                                                      t1, t3),
3442                                             tquotient);
3443                 break;
3444               }
3445
3446             /* Try using an instruction that produces both the quotient and
3447                remainder, using truncation.  We can easily compensate the
3448                quotient or remainder to get ceiling rounding, once we have the
3449                remainder.  Notice that we compute also the final remainder
3450                value here, and return the result right away.  */
3451             if (target == 0 || GET_MODE (target) != compute_mode)
3452               target = gen_reg_rtx (compute_mode);
3453             if (rem_flag)
3454               {
3455                 remainder= (GET_CODE (target) == REG
3456                             ? target : gen_reg_rtx (compute_mode));
3457                 quotient = gen_reg_rtx (compute_mode);
3458               }
3459             else
3460               {
3461                 quotient = (GET_CODE (target) == REG
3462                             ? target : gen_reg_rtx (compute_mode));
3463                 remainder = gen_reg_rtx (compute_mode);
3464               }
3465
3466             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3467                                      remainder, 0))
3468               {
3469                 /* This could be computed with a branch-less sequence.
3470                    Save that for later.  */
3471                 rtx tem;
3472                 rtx label = gen_label_rtx ();
3473                 emit_cmp_insn (remainder, const0_rtx, EQ, NULL_RTX,
3474                                compute_mode, 0, 0);
3475                 emit_jump_insn (gen_beq (label));
3476                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3477                                     NULL_RTX, 0, OPTAB_WIDEN);
3478                 emit_cmp_insn (tem, const0_rtx, LT, NULL_RTX,
3479                                compute_mode, 0, 0);
3480                 emit_jump_insn (gen_blt (label));
3481                 expand_inc (quotient, const1_rtx);
3482                 expand_dec (remainder, op1);
3483                 emit_label (label);
3484                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3485               }
3486
3487             /* No luck with division elimination or divmod.  Have to do it
3488                by conditionally adjusting op0 *and* the result.  */
3489             {
3490               rtx label1, label2, label3, label4, label5;
3491               rtx adjusted_op0;
3492               rtx tem;
3493
3494               quotient = gen_reg_rtx (compute_mode);
3495               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3496               label1 = gen_label_rtx ();
3497               label2 = gen_label_rtx ();
3498               label3 = gen_label_rtx ();
3499               label4 = gen_label_rtx ();
3500               label5 = gen_label_rtx ();
3501               emit_cmp_insn (op1, const0_rtx, LT, NULL_RTX,
3502                              compute_mode, 0, 0);
3503               emit_jump_insn (gen_blt (label2));
3504               emit_cmp_insn (adjusted_op0, const0_rtx, GT, NULL_RTX,
3505                              compute_mode, 0, 0);
3506               emit_jump_insn (gen_bgt (label1));
3507               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3508                                   quotient, 0, OPTAB_LIB_WIDEN);
3509               if (tem != quotient)
3510                 emit_move_insn (quotient, tem);
3511               emit_jump_insn (gen_jump (label5));
3512               emit_barrier ();
3513               emit_label (label1);
3514               expand_dec (adjusted_op0, const1_rtx);
3515               emit_jump_insn (gen_jump (label4));
3516               emit_barrier ();
3517               emit_label (label2);
3518               emit_cmp_insn (adjusted_op0, const0_rtx, LT, NULL_RTX,
3519                              compute_mode, 0, 0);
3520               emit_jump_insn (gen_blt (label3));
3521               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3522                                   quotient, 0, OPTAB_LIB_WIDEN);
3523               if (tem != quotient)
3524                 emit_move_insn (quotient, tem);
3525               emit_jump_insn (gen_jump (label5));
3526               emit_barrier ();
3527               emit_label (label3);
3528               expand_inc (adjusted_op0, const1_rtx);
3529               emit_label (label4);
3530               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3531                                   quotient, 0, OPTAB_LIB_WIDEN);
3532               if (tem != quotient)
3533                 emit_move_insn (quotient, tem);
3534               expand_inc (quotient, const1_rtx);
3535               emit_label (label5);
3536             }
3537           }
3538         break;
3539
3540       case EXACT_DIV_EXPR:
3541         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3542           {
3543             HOST_WIDE_INT d = INTVAL (op1);
3544             unsigned HOST_WIDE_INT ml;
3545             int post_shift;
3546             rtx t1;
3547
3548             post_shift = floor_log2 (d & -d);
3549             ml = invert_mod2n (d >> post_shift, size);
3550             t1 = expand_mult (compute_mode, op0, GEN_INT (ml), NULL_RTX,
3551                               unsignedp);
3552             quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3553                                      build_int_2 (post_shift, 0),
3554                                      NULL_RTX, unsignedp);
3555
3556             insn = get_last_insn ();
3557             REG_NOTES (insn)
3558               = gen_rtx (EXPR_LIST, REG_EQUAL,
3559                          gen_rtx (unsignedp ? UDIV : DIV, compute_mode,
3560                                   op0, op1),
3561                          REG_NOTES (insn));
3562           }
3563         break;
3564
3565       case ROUND_DIV_EXPR:
3566       case ROUND_MOD_EXPR:
3567         if (unsignedp)
3568           {
3569             rtx tem;
3570             rtx label;
3571             label = gen_label_rtx ();
3572             quotient = gen_reg_rtx (compute_mode);
3573             remainder = gen_reg_rtx (compute_mode);
3574             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3575               {
3576                 rtx tem;
3577                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3578                                          quotient, 1, OPTAB_LIB_WIDEN);
3579                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3580                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3581                                           remainder, 1, OPTAB_LIB_WIDEN);
3582               }
3583             tem = plus_constant (op1, -1);
3584             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3585                                 build_int_2 (1, 0), NULL_RTX, 1);
3586             emit_cmp_insn (remainder, tem, LEU, NULL_RTX, compute_mode, 0, 0);
3587             emit_jump_insn (gen_bleu (label));
3588             expand_inc (quotient, const1_rtx);
3589             expand_dec (remainder, op1);
3590             emit_label (label);
3591           }
3592         else
3593           {
3594             rtx abs_rem, abs_op1, tem, mask;
3595             rtx label;
3596             label = gen_label_rtx ();
3597             quotient = gen_reg_rtx (compute_mode);
3598             remainder = gen_reg_rtx (compute_mode);
3599             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3600               {
3601                 rtx tem;
3602                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3603                                          quotient, 0, OPTAB_LIB_WIDEN);
3604                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3605                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3606                                           remainder, 0, OPTAB_LIB_WIDEN);
3607               }
3608             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 0, 0);
3609             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 0, 0);
3610             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3611                                 build_int_2 (1, 0), NULL_RTX, 1);
3612             emit_cmp_insn (tem, abs_op1, LTU, NULL_RTX, compute_mode, 0, 0);
3613             emit_jump_insn (gen_bltu (label));
3614             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3615                                 NULL_RTX, 0, OPTAB_WIDEN);
3616             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3617                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3618             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3619                                 NULL_RTX, 0, OPTAB_WIDEN);
3620             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3621                                 NULL_RTX, 0, OPTAB_WIDEN);
3622             expand_inc (quotient, tem);
3623             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3624                                 NULL_RTX, 0, OPTAB_WIDEN);
3625             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3626                                 NULL_RTX, 0, OPTAB_WIDEN);
3627             expand_dec (remainder, tem);
3628             emit_label (label);
3629           }
3630         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3631       }
3632
3633   if (quotient == 0)
3634     {
3635       if (target && GET_MODE (target) != compute_mode)
3636         target = 0;
3637
3638       if (rem_flag)
3639         {
3640           /* Try to produce the remainder directly without a library call.  */
3641           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3642                                          op0, op1, target,
3643                                          unsignedp, OPTAB_WIDEN);
3644           if (remainder == 0)
3645             {
3646               /* No luck there.  Can we do remainder and divide at once
3647                  without a library call?  */
3648               remainder = gen_reg_rtx (compute_mode);
3649               if (! expand_twoval_binop ((unsignedp
3650                                           ? udivmod_optab
3651                                           : sdivmod_optab),
3652                                          op0, op1,
3653                                          NULL_RTX, remainder, unsignedp))
3654                 remainder = 0;
3655             }
3656
3657           if (remainder)
3658             return gen_lowpart (mode, remainder);
3659         }
3660
3661       /* Produce the quotient.  */
3662       /* Try a quotient insn, but not a library call.  */
3663       quotient = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3664                                     op0, op1, rem_flag ? NULL_RTX : target,
3665                                     unsignedp, OPTAB_WIDEN);
3666       if (quotient == 0)
3667         {
3668           /* No luck there.  Try a quotient-and-remainder insn,
3669              keeping the quotient alone.  */
3670           quotient = gen_reg_rtx (compute_mode);
3671           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3672                                      op0, op1,
3673                                      quotient, NULL_RTX, unsignedp))
3674             {
3675               quotient = 0;
3676               if (! rem_flag)
3677                 /* Still no luck.  If we are not computing the remainder,
3678                    use a library call for the quotient.  */
3679                 quotient = sign_expand_binop (compute_mode,
3680                                               udiv_optab, sdiv_optab,
3681                                               op0, op1, target,
3682                                               unsignedp, OPTAB_LIB_WIDEN);
3683             }
3684         }
3685     }
3686
3687   if (rem_flag)
3688     {
3689       if (target && GET_MODE (target) != compute_mode)
3690         target = 0;
3691
3692       if (quotient == 0)
3693         /* No divide instruction either.  Use library for remainder.  */
3694         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3695                                        op0, op1, target,
3696                                        unsignedp, OPTAB_LIB_WIDEN);
3697       else
3698         {
3699           /* We divided.  Now finish doing X - Y * (X / Y).  */
3700           remainder = expand_mult (compute_mode, quotient, op1,
3701                                    NULL_RTX, unsignedp);
3702           remainder = expand_binop (compute_mode, sub_optab, op0,
3703                                     remainder, target, unsignedp,
3704                                     OPTAB_LIB_WIDEN);
3705         }
3706     }
3707
3708   return gen_lowpart (mode, rem_flag ? remainder : quotient);
3709 }
3710 \f
3711 /* Return a tree node with data type TYPE, describing the value of X.
3712    Usually this is an RTL_EXPR, if there is no obvious better choice.
3713    X may be an expression, however we only support those expressions
3714    generated by loop.c.   */
3715
3716 tree
3717 make_tree (type, x)
3718      tree type;
3719      rtx x;
3720 {
3721   tree t;
3722
3723   switch (GET_CODE (x))
3724     {
3725     case CONST_INT:
3726       t = build_int_2 (INTVAL (x),
3727                        TREE_UNSIGNED (type) || INTVAL (x) >= 0 ? 0 : -1);
3728       TREE_TYPE (t) = type;
3729       return t;
3730
3731     case CONST_DOUBLE:
3732       if (GET_MODE (x) == VOIDmode)
3733         {
3734           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
3735           TREE_TYPE (t) = type;
3736         }
3737       else
3738         {
3739           REAL_VALUE_TYPE d;
3740
3741           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
3742           t = build_real (type, d);
3743         }
3744
3745       return t;
3746
3747     case PLUS:
3748       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3749                           make_tree (type, XEXP (x, 1))));
3750
3751     case MINUS:
3752       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3753                           make_tree (type, XEXP (x, 1))));
3754
3755     case NEG:
3756       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
3757
3758     case MULT:
3759       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
3760                           make_tree (type, XEXP (x, 1))));
3761
3762     case ASHIFT:
3763       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
3764                           make_tree (type, XEXP (x, 1))));
3765
3766     case LSHIFTRT:
3767       return fold (convert (type,
3768                             build (RSHIFT_EXPR, unsigned_type (type),
3769                                    make_tree (unsigned_type (type),
3770                                               XEXP (x, 0)),
3771                                    make_tree (type, XEXP (x, 1)))));
3772
3773     case ASHIFTRT:
3774       return fold (convert (type,
3775                             build (RSHIFT_EXPR, signed_type (type),
3776                                    make_tree (signed_type (type), XEXP (x, 0)),
3777                                    make_tree (type, XEXP (x, 1)))));
3778
3779     case DIV:
3780       if (TREE_CODE (type) != REAL_TYPE)
3781         t = signed_type (type);
3782       else
3783         t = type;
3784
3785       return fold (convert (type,
3786                             build (TRUNC_DIV_EXPR, t,
3787                                    make_tree (t, XEXP (x, 0)),
3788                                    make_tree (t, XEXP (x, 1)))));
3789     case UDIV:
3790       t = unsigned_type (type);
3791       return fold (convert (type,
3792                             build (TRUNC_DIV_EXPR, t,
3793                                    make_tree (t, XEXP (x, 0)),
3794                                    make_tree (t, XEXP (x, 1)))));
3795    default:
3796       t = make_node (RTL_EXPR);
3797       TREE_TYPE (t) = type;
3798       RTL_EXPR_RTL (t) = x;
3799       /* There are no insns to be output
3800          when this rtl_expr is used.  */
3801       RTL_EXPR_SEQUENCE (t) = 0;
3802       return t;
3803     }
3804 }
3805
3806 /* Return an rtx representing the value of X * MULT + ADD.
3807    TARGET is a suggestion for where to store the result (an rtx).
3808    MODE is the machine mode for the computation.
3809    X and MULT must have mode MODE.  ADD may have a different mode.
3810    So can X (defaults to same as MODE).
3811    UNSIGNEDP is non-zero to do unsigned multiplication.
3812    This may emit insns.  */
3813
3814 rtx
3815 expand_mult_add (x, target, mult, add, mode, unsignedp)
3816      rtx x, target, mult, add;
3817      enum machine_mode mode;
3818      int unsignedp;
3819 {
3820   tree type = type_for_mode (mode, unsignedp);
3821   tree add_type = (GET_MODE (add) == VOIDmode
3822                    ? type : type_for_mode (GET_MODE (add), unsignedp));
3823   tree result =  fold (build (PLUS_EXPR, type,
3824                               fold (build (MULT_EXPR, type,
3825                                            make_tree (type, x),
3826                                            make_tree (type, mult))),
3827                               make_tree (add_type, add)));
3828
3829   return expand_expr (result, target, VOIDmode, 0);
3830 }
3831 \f
3832 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
3833    and returning TARGET.
3834
3835    If TARGET is 0, a pseudo-register or constant is returned.  */
3836
3837 rtx
3838 expand_and (op0, op1, target)
3839      rtx op0, op1, target;
3840 {
3841   enum machine_mode mode = VOIDmode;
3842   rtx tem;
3843
3844   if (GET_MODE (op0) != VOIDmode)
3845     mode = GET_MODE (op0);
3846   else if (GET_MODE (op1) != VOIDmode)
3847     mode = GET_MODE (op1);
3848
3849   if (mode != VOIDmode)
3850     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
3851   else if (GET_CODE (op0) == CONST_INT && GET_CODE (op1) == CONST_INT)
3852     tem = GEN_INT (INTVAL (op0) & INTVAL (op1));
3853   else
3854     abort ();
3855
3856   if (target == 0)
3857     target = tem;
3858   else if (tem != target)
3859     emit_move_insn (target, tem);
3860   return target;
3861 }
3862 \f
3863 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
3864    and storing in TARGET.  Normally return TARGET.
3865    Return 0 if that cannot be done.
3866
3867    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
3868    it is VOIDmode, they cannot both be CONST_INT.
3869
3870    UNSIGNEDP is for the case where we have to widen the operands
3871    to perform the operation.  It says to use zero-extension.
3872
3873    NORMALIZEP is 1 if we should convert the result to be either zero
3874    or one.  Normalize is -1 if we should convert the result to be
3875    either zero or -1.  If NORMALIZEP is zero, the result will be left
3876    "raw" out of the scc insn.  */
3877
3878 rtx
3879 emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
3880      rtx target;
3881      enum rtx_code code;
3882      rtx op0, op1;
3883      enum machine_mode mode;
3884      int unsignedp;
3885      int normalizep;
3886 {
3887   rtx subtarget;
3888   enum insn_code icode;
3889   enum machine_mode compare_mode;
3890   enum machine_mode target_mode = GET_MODE (target);
3891   rtx tem;
3892   rtx last = get_last_insn ();
3893   rtx pattern, comparison;
3894
3895   /* If one operand is constant, make it the second one.  Only do this
3896      if the other operand is not constant as well.  */
3897
3898   if ((CONSTANT_P (op0) && ! CONSTANT_P (op1))
3899       || (GET_CODE (op0) == CONST_INT && GET_CODE (op1) != CONST_INT))
3900     {
3901       tem = op0;
3902       op0 = op1;
3903       op1 = tem;
3904       code = swap_condition (code);
3905     }
3906
3907   if (mode == VOIDmode)
3908     mode = GET_MODE (op0);
3909
3910   /* For some comparisons with 1 and -1, we can convert this to
3911      comparisons with zero.  This will often produce more opportunities for
3912      store-flag insns.  */
3913
3914   switch (code)
3915     {
3916     case LT:
3917       if (op1 == const1_rtx)
3918         op1 = const0_rtx, code = LE;
3919       break;
3920     case LE:
3921       if (op1 == constm1_rtx)
3922         op1 = const0_rtx, code = LT;
3923       break;
3924     case GE:
3925       if (op1 == const1_rtx)
3926         op1 = const0_rtx, code = GT;
3927       break;
3928     case GT:
3929       if (op1 == constm1_rtx)
3930         op1 = const0_rtx, code = GE;
3931       break;
3932     case GEU:
3933       if (op1 == const1_rtx)
3934         op1 = const0_rtx, code = NE;
3935       break;
3936     case LTU:
3937       if (op1 == const1_rtx)
3938         op1 = const0_rtx, code = EQ;
3939       break;
3940     }
3941
3942   /* From now on, we won't change CODE, so set ICODE now.  */
3943   icode = setcc_gen_code[(int) code];
3944
3945   /* If this is A < 0 or A >= 0, we can do this by taking the ones
3946      complement of A (for GE) and shifting the sign bit to the low bit.  */
3947   if (op1 == const0_rtx && (code == LT || code == GE)
3948       && GET_MODE_CLASS (mode) == MODE_INT
3949       && (normalizep || STORE_FLAG_VALUE == 1
3950           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
3951               && (STORE_FLAG_VALUE
3952                   == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
3953     {
3954       subtarget = target;
3955
3956       /* If the result is to be wider than OP0, it is best to convert it
3957          first.  If it is to be narrower, it is *incorrect* to convert it
3958          first.  */
3959       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
3960         {
3961           op0 = protect_from_queue (op0, 0);
3962           op0 = convert_modes (target_mode, mode, op0, 0);
3963           mode = target_mode;
3964         }
3965
3966       if (target_mode != mode)
3967         subtarget = 0;
3968
3969       if (code == GE)
3970         op0 = expand_unop (mode, one_cmpl_optab, op0,
3971                            ((STORE_FLAG_VALUE == 1 || normalizep)
3972                             ? 0 : subtarget), 0);
3973
3974       if (STORE_FLAG_VALUE == 1 || normalizep)
3975         /* If we are supposed to produce a 0/1 value, we want to do
3976            a logical shift from the sign bit to the low-order bit; for
3977            a -1/0 value, we do an arithmetic shift.  */
3978         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
3979                             size_int (GET_MODE_BITSIZE (mode) - 1),
3980                             subtarget, normalizep != -1);
3981
3982       if (mode != target_mode)
3983         op0 = convert_modes (target_mode, mode, op0, 0);
3984
3985       return op0;
3986     }
3987
3988   if (icode != CODE_FOR_nothing)
3989     {
3990       /* We think we may be able to do this with a scc insn.  Emit the
3991          comparison and then the scc insn.
3992
3993          compare_from_rtx may call emit_queue, which would be deleted below
3994          if the scc insn fails.  So call it ourselves before setting LAST.  */
3995
3996       emit_queue ();
3997       last = get_last_insn ();
3998
3999       comparison
4000         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
4001       if (GET_CODE (comparison) == CONST_INT)
4002         return (comparison == const0_rtx ? const0_rtx
4003                 : normalizep == 1 ? const1_rtx
4004                 : normalizep == -1 ? constm1_rtx
4005                 : const_true_rtx);
4006
4007       /* If the code of COMPARISON doesn't match CODE, something is
4008          wrong; we can no longer be sure that we have the operation.
4009          We could handle this case, but it should not happen.  */
4010
4011       if (GET_CODE (comparison) != code)
4012         abort ();
4013
4014       /* Get a reference to the target in the proper mode for this insn.  */
4015       compare_mode = insn_operand_mode[(int) icode][0];
4016       subtarget = target;
4017       if (preserve_subexpressions_p ()
4018           || ! (*insn_operand_predicate[(int) icode][0]) (subtarget, compare_mode))
4019         subtarget = gen_reg_rtx (compare_mode);
4020
4021       pattern = GEN_FCN (icode) (subtarget);
4022       if (pattern)
4023         {
4024           emit_insn (pattern);
4025
4026           /* If we are converting to a wider mode, first convert to
4027              TARGET_MODE, then normalize.  This produces better combining
4028              opportunities on machines that have a SIGN_EXTRACT when we are
4029              testing a single bit.  This mostly benefits the 68k.
4030
4031              If STORE_FLAG_VALUE does not have the sign bit set when
4032              interpreted in COMPARE_MODE, we can do this conversion as
4033              unsigned, which is usually more efficient.  */
4034           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4035             {
4036               convert_move (target, subtarget,
4037                             (GET_MODE_BITSIZE (compare_mode)
4038                              <= HOST_BITS_PER_WIDE_INT)
4039                             && 0 == (STORE_FLAG_VALUE
4040                                      & ((HOST_WIDE_INT) 1
4041                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4042               op0 = target;
4043               compare_mode = target_mode;
4044             }
4045           else
4046             op0 = subtarget;
4047
4048           /* If we want to keep subexpressions around, don't reuse our
4049              last target.  */
4050
4051           if (preserve_subexpressions_p ())
4052             subtarget = 0;
4053
4054           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4055              we don't have to do anything.  */
4056           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4057             ;
4058           else if (normalizep == - STORE_FLAG_VALUE)
4059             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4060
4061           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4062              makes it hard to use a value of just the sign bit due to
4063              ANSI integer constant typing rules.  */
4064           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4065                    && (STORE_FLAG_VALUE
4066                        & ((HOST_WIDE_INT) 1
4067                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4068             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4069                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4070                                 subtarget, normalizep == 1);
4071           else if (STORE_FLAG_VALUE & 1)
4072             {
4073               op0 = expand_and (op0, const1_rtx, subtarget);
4074               if (normalizep == -1)
4075                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4076             }
4077           else
4078             abort ();
4079
4080           /* If we were converting to a smaller mode, do the
4081              conversion now.  */
4082           if (target_mode != compare_mode)
4083             {
4084               convert_move (target, op0, 0);
4085               return target;
4086             }
4087           else
4088             return op0;
4089         }
4090     }
4091
4092   delete_insns_since (last);
4093
4094   /* If expensive optimizations, use different pseudo registers for each
4095      insn, instead of reusing the same pseudo.  This leads to better CSE,
4096      but slows down the compiler, since there are more pseudos */
4097   subtarget = (!flag_expensive_optimizations
4098                && (target_mode == mode)) ? target : NULL_RTX;
4099
4100   /* If we reached here, we can't do this with a scc insn.  However, there
4101      are some comparisons that can be done directly.  For example, if
4102      this is an equality comparison of integers, we can try to exclusive-or
4103      (or subtract) the two operands and use a recursive call to try the
4104      comparison with zero.  Don't do any of these cases if branches are
4105      very cheap.  */
4106
4107   if (BRANCH_COST > 0
4108       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4109       && op1 != const0_rtx)
4110     {
4111       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4112                           OPTAB_WIDEN);
4113
4114       if (tem == 0)
4115         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4116                             OPTAB_WIDEN);
4117       if (tem != 0)
4118         tem = emit_store_flag (target, code, tem, const0_rtx,
4119                                mode, unsignedp, normalizep);
4120       if (tem == 0)
4121         delete_insns_since (last);
4122       return tem;
4123     }
4124
4125   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4126      the constant zero.  Reject all other comparisons at this point.  Only
4127      do LE and GT if branches are expensive since they are expensive on
4128      2-operand machines.  */
4129
4130   if (BRANCH_COST == 0
4131       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4132       || (code != EQ && code != NE
4133           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4134     return 0;
4135
4136   /* See what we need to return.  We can only return a 1, -1, or the
4137      sign bit.  */
4138
4139   if (normalizep == 0)
4140     {
4141       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4142         normalizep = STORE_FLAG_VALUE;
4143
4144       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4145                && (STORE_FLAG_VALUE
4146                    == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4147         ;
4148       else
4149         return 0;
4150     }
4151
4152   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4153      do the necessary operation below.  */
4154
4155   tem = 0;
4156
4157   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4158      the sign bit set.  */
4159
4160   if (code == LE)
4161     {
4162       /* This is destructive, so SUBTARGET can't be OP0.  */
4163       if (rtx_equal_p (subtarget, op0))
4164         subtarget = 0;
4165
4166       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4167                           OPTAB_WIDEN);
4168       if (tem)
4169         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4170                             OPTAB_WIDEN);
4171     }
4172
4173   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4174      number of bits in the mode of OP0, minus one.  */
4175
4176   if (code == GT)
4177     {
4178       if (rtx_equal_p (subtarget, op0))
4179         subtarget = 0;
4180
4181       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4182                           size_int (GET_MODE_BITSIZE (mode) - 1),
4183                           subtarget, 0);
4184       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4185                           OPTAB_WIDEN);
4186     }
4187
4188   if (code == EQ || code == NE)
4189     {
4190       /* For EQ or NE, one way to do the comparison is to apply an operation
4191          that converts the operand into a positive number if it is non-zero
4192          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4193          for NE we negate.  This puts the result in the sign bit.  Then we
4194          normalize with a shift, if needed.
4195
4196          Two operations that can do the above actions are ABS and FFS, so try
4197          them.  If that doesn't work, and MODE is smaller than a full word,
4198          we can use zero-extension to the wider mode (an unsigned conversion)
4199          as the operation.  */
4200
4201       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4202         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4203       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4204         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4205       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4206         {
4207           op0 = protect_from_queue (op0, 0);
4208           tem = convert_modes (word_mode, mode, op0, 1);
4209           mode = word_mode;
4210         }
4211
4212       if (tem != 0)
4213         {
4214           if (code == EQ)
4215             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4216                                 0, OPTAB_WIDEN);
4217           else
4218             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4219         }
4220
4221       /* If we couldn't do it that way, for NE we can "or" the two's complement
4222          of the value with itself.  For EQ, we take the one's complement of
4223          that "or", which is an extra insn, so we only handle EQ if branches
4224          are expensive.  */
4225
4226       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4227         {
4228           if (rtx_equal_p (subtarget, op0))
4229             subtarget = 0;
4230
4231           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4232           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4233                               OPTAB_WIDEN);
4234
4235           if (tem && code == EQ)
4236             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4237         }
4238     }
4239
4240   if (tem && normalizep)
4241     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4242                         size_int (GET_MODE_BITSIZE (mode) - 1),
4243                         subtarget, normalizep == 1);
4244
4245   if (tem)
4246     {
4247       if (GET_MODE (tem) != target_mode)
4248         {
4249           convert_move (target, tem, 0);
4250           tem = target;
4251         }
4252       else if (!subtarget)
4253         {
4254           emit_move_insn (target, tem);
4255           tem = target;
4256         }
4257     }
4258   else
4259     delete_insns_since (last);
4260
4261   return tem;
4262 }
4263
4264 /* Like emit_store_flag, but always succeeds.  */
4265
4266 rtx
4267 emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
4268      rtx target;
4269      enum rtx_code code;
4270      rtx op0, op1;
4271      enum machine_mode mode;
4272      int unsignedp;
4273      int normalizep;
4274 {
4275   rtx tem, label;
4276
4277   /* First see if emit_store_flag can do the job.  */
4278   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4279   if (tem != 0)
4280     return tem;
4281
4282   if (normalizep == 0)
4283     normalizep = 1;
4284
4285   /* If this failed, we have to do this with set/compare/jump/set code.  */
4286
4287   if (GET_CODE (target) != REG
4288       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4289     target = gen_reg_rtx (GET_MODE (target));
4290
4291   emit_move_insn (target, const1_rtx);
4292   tem = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
4293   if (GET_CODE (tem) == CONST_INT)
4294     return tem;
4295
4296   label = gen_label_rtx ();
4297   if (bcc_gen_fctn[(int) code] == 0)
4298     abort ();
4299
4300   emit_jump_insn ((*bcc_gen_fctn[(int) code]) (label));
4301   emit_move_insn (target, const0_rtx);
4302   emit_label (label);
4303
4304   return target;
4305 }