gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 88, 89, 92-5, 1996 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include "config.h"
  24 #include "rtl.h"
  25 #include "tree.h"
  26 #include "flags.h"
  27 #include "insn-flags.h"
  28 #include "insn-codes.h"
  29 #include "insn-config.h"
  30 #include "expr.h"
  31 #include "real.h"
  32 #include "recog.h"
  33
  34 static void store_fixed_bit_field       PROTO((rtx, int, int, int, rtx, int));
  35 static void store_split_bit_field       PROTO((rtx, int, int, rtx, int));
  36 static rtx extract_fixed_bit_field      PROTO((enum machine_mode, rtx, int,
  37                                                int, int, rtx, int, int));
  38 static rtx mask_rtx                     PROTO((enum machine_mode, int,
  39                                                int, int));
  40 static rtx lshift_value                 PROTO((enum machine_mode, rtx,
  41                                                int, int));
  42 static rtx extract_split_bit_field      PROTO((rtx, int, int, int, int));
  43
  44 #define CEIL(x,y) (((x) + (y) - 1) / (y))
  45
  46 /* Non-zero means divides or modulus operations are relatively cheap for
  47    powers of two, so don't use branches; emit the operation instead.
  48    Usually, this will mean that the MD file will emit non-branch
  49    sequences.  */
  50
  51 static int sdiv_pow2_cheap, smod_pow2_cheap;
  52
  53 #ifndef SLOW_UNALIGNED_ACCESS
  54 #define SLOW_UNALIGNED_ACCESS STRICT_ALIGNMENT
  55 #endif
  56
  57 /* For compilers that support multiple targets with different word sizes,
  58    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  59    is the H8/300(H) compiler.  */
  60
  61 #ifndef MAX_BITS_PER_WORD
  62 #define MAX_BITS_PER_WORD BITS_PER_WORD
  63 #endif
  64
  65 /* Cost of various pieces of RTL.  Note that some of these are indexed by shift count,
  66    and some by mode.  */
  67 static int add_cost, negate_cost, zero_cost;
  68 static int shift_cost[MAX_BITS_PER_WORD];
  69 static int shiftadd_cost[MAX_BITS_PER_WORD];
  70 static int shiftsub_cost[MAX_BITS_PER_WORD];
  71 static int mul_cost[NUM_MACHINE_MODES];
  72 static int div_cost[NUM_MACHINE_MODES];
  73 static int mul_widen_cost[NUM_MACHINE_MODES];
  74 static int mul_highpart_cost[NUM_MACHINE_MODES];
  75
  76 void
  77 init_expmed ()
  78 {
  79   char *free_point;
  80   /* This is "some random pseudo register" for purposes of calling recog
  81      to see what insns exist.  */
  82   rtx reg = gen_rtx (REG, word_mode, 10000);
  83   rtx shift_insn, shiftadd_insn, shiftsub_insn;
  84   int dummy;
  85   int m;
  86   enum machine_mode mode, wider_mode;
  87
  88   start_sequence ();
  89
  90   /* Since we are on the permanent obstack, we must be sure we save this
  91      spot AFTER we call start_sequence, since it will reuse the rtl it
  92      makes.  */
  93
  94   free_point = (char *) oballoc (0);
  95
  96   zero_cost = rtx_cost (const0_rtx, 0);
  97   add_cost = rtx_cost (gen_rtx (PLUS, word_mode, reg, reg), SET);
  98
  99   shift_insn = emit_insn (gen_rtx (SET, VOIDmode, reg,
 100                                    gen_rtx (ASHIFT, word_mode, reg,
 101                                             const0_rtx)));
 102
 103   shiftadd_insn = emit_insn (gen_rtx (SET, VOIDmode, reg,
 104                                       gen_rtx (PLUS, word_mode,
 105                                                gen_rtx (MULT, word_mode,
 106                                                         reg, const0_rtx),
 107                                                reg)));
 108
 109   shiftsub_insn = emit_insn (gen_rtx (SET, VOIDmode, reg,
 110                                       gen_rtx (MINUS, word_mode,
 111                                                gen_rtx (MULT, word_mode,
 112                                                          reg, const0_rtx),
 113                                                 reg)));
 114
 115   init_recog ();
 116
 117   shift_cost[0] = 0;
 118   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 119
 120   for (m = 1; m < BITS_PER_WORD; m++)
 121     {
 122       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 123
 124       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 125       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 126         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 127
 128       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1)
 129         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 130       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 131         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 132
 133       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1)
 134         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 135       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 136         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 137     }
 138
 139   negate_cost = rtx_cost (gen_rtx (NEG, word_mode, reg), SET);
 140
 141   sdiv_pow2_cheap
 142     = (rtx_cost (gen_rtx (DIV, word_mode, reg, GEN_INT (32)), SET)
 143        <= 2 * add_cost);
 144   smod_pow2_cheap
 145     = (rtx_cost (gen_rtx (MOD, word_mode, reg, GEN_INT (32)), SET)
 146        <= 2 * add_cost);
 147
 148   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 149        mode != VOIDmode;
 150        mode = GET_MODE_WIDER_MODE (mode))
 151     {
 152       reg = gen_rtx (REG, mode, 10000);
 153       div_cost[(int) mode] = rtx_cost (gen_rtx (UDIV, mode, reg, reg), SET);
 154       mul_cost[(int) mode] = rtx_cost (gen_rtx (MULT, mode, reg, reg), SET);
 155       wider_mode = GET_MODE_WIDER_MODE (mode);
 156       if (wider_mode != VOIDmode)
 157         {
 158           mul_widen_cost[(int) wider_mode]
 159             = rtx_cost (gen_rtx (MULT, wider_mode,
 160                                  gen_rtx (ZERO_EXTEND, wider_mode, reg),
 161                                  gen_rtx (ZERO_EXTEND, wider_mode, reg)),
 162                         SET);
 163           mul_highpart_cost[(int) mode]
 164             = rtx_cost (gen_rtx (TRUNCATE, mode,
 165                                  gen_rtx (LSHIFTRT, wider_mode,
 166                                           gen_rtx (MULT, wider_mode,
 167                                                    gen_rtx (ZERO_EXTEND, wider_mode, reg),
 168                                                    gen_rtx (ZERO_EXTEND, wider_mode, reg)),
 169                                           GEN_INT (GET_MODE_BITSIZE (mode)))),
 170                         SET);
 171         }
 172     }
 173
 174   /* Free the objects we just allocated.  */
 175   end_sequence ();
 176   obfree (free_point);
 177 }
 178
 179 /* Return an rtx representing minus the value of X.
 180    MODE is the intended mode of the result,
 181    useful if X is a CONST_INT.  */
 182
 183 rtx
 184 negate_rtx (mode, x)
 185      enum machine_mode mode;
 186      rtx x;
 187 {
 188   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 189
 190   if (result == 0)
 191     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 192
 193   return result;
 194 }
 195 \f
 196 /* Generate code to store value from rtx VALUE
 197    into a bit-field within structure STR_RTX
 198    containing BITSIZE bits starting at bit BITNUM.
 199    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 200    ALIGN is the alignment that STR_RTX is known to have, measured in bytes.
 201    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 202
 203 /* ??? Note that there are two different ideas here for how
 204    to determine the size to count bits within, for a register.
 205    One is BITS_PER_WORD, and the other is the size of operand 3
 206    of the insv pattern.  (The latter assumes that an n-bit machine
 207    will be able to insert bit fields up to n bits wide.)
 208    It isn't certain that either of these is right.
 209    extract_bit_field has the same quandary.  */
 210
 211 rtx
 212 store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
 213      rtx str_rtx;
 214      register int bitsize;
 215      int bitnum;
 216      enum machine_mode fieldmode;
 217      rtx value;
 218      int align;
 219      int total_size;
 220 {
 221   int unit = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 222   register int offset = bitnum / unit;
 223   register int bitpos = bitnum % unit;
 224   register rtx op0 = str_rtx;
 225
 226   if (GET_CODE (str_rtx) == MEM && ! MEM_IN_STRUCT_P (str_rtx))
 227     abort ();
 228
 229   /* Discount the part of the structure before the desired byte.
 230      We need to know how many bytes are safe to reference after it.  */
 231   if (total_size >= 0)
 232     total_size -= (bitpos / BIGGEST_ALIGNMENT
 233                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 234
 235   while (GET_CODE (op0) == SUBREG)
 236     {
 237       /* The following line once was done only if WORDS_BIG_ENDIAN,
 238          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 239          meaningful at a much higher level; when structures are copied
 240          between memory and regs, the higher-numbered regs
 241          always get higher addresses.  */
 242       offset += SUBREG_WORD (op0);
 243       /* We used to adjust BITPOS here, but now we do the whole adjustment
 244          right after the loop.  */
 245       op0 = SUBREG_REG (op0);
 246     }
 247
 248   /* If OP0 is a register, BITPOS must count within a word.
 249      But as we have it, it counts within whatever size OP0 now has.
 250      On a bigendian machine, these are not the same, so convert.  */
 251   if (BYTES_BIG_ENDIAN
 252       && GET_CODE (op0) != MEM
 253       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 254     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 255
 256   value = protect_from_queue (value, 0);
 257
 258   if (flag_force_mem)
 259     value = force_not_mem (value);
 260
 261   /* Note that the adjustment of BITPOS above has no effect on whether
 262      BITPOS is 0 in a REG bigger than a word.  */
 263   if (GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 264       && (GET_CODE (op0) != MEM
 265           || ! SLOW_UNALIGNED_ACCESS
 266           || (offset * BITS_PER_UNIT % bitsize == 0
 267               && align % GET_MODE_SIZE (fieldmode) == 0))
 268       && bitpos == 0 && bitsize == GET_MODE_BITSIZE (fieldmode))
 269     {
 270       /* Storing in a full-word or multi-word field in a register
 271          can be done with just SUBREG.  */
 272       if (GET_MODE (op0) != fieldmode)
 273         {
 274           if (GET_CODE (op0) == REG)
 275             op0 = gen_rtx (SUBREG, fieldmode, op0, offset);
 276           else
 277             op0 = change_address (op0, fieldmode,
 278                                   plus_constant (XEXP (op0, 0), offset));
 279         }
 280       emit_move_insn (op0, value);
 281       return value;
 282     }
 283
 284   /* Storing an lsb-aligned field in a register
 285      can be done with a movestrict instruction.  */
 286
 287   if (GET_CODE (op0) != MEM
 288       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 289       && bitsize == GET_MODE_BITSIZE (fieldmode)
 290       && (GET_MODE (op0) == fieldmode
 291           || (movstrict_optab->handlers[(int) fieldmode].insn_code
 292               != CODE_FOR_nothing)))
 293     {
 294       /* Get appropriate low part of the value being stored.  */
 295       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 296         value = gen_lowpart (fieldmode, value);
 297       else if (!(GET_CODE (value) == SYMBOL_REF
 298                  || GET_CODE (value) == LABEL_REF
 299                  || GET_CODE (value) == CONST))
 300         value = convert_to_mode (fieldmode, value, 0);
 301
 302       if (GET_MODE (op0) == fieldmode)
 303         emit_move_insn (op0, value);
 304       else
 305         {
 306           int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 307           if(! (*insn_operand_predicate[icode][1]) (value, fieldmode))
 308             value = copy_to_mode_reg (fieldmode, value);
 309           emit_insn (GEN_FCN (icode)
 310                    (gen_rtx (SUBREG, fieldmode, op0, offset), value));
 311         }
 312       return value;
 313     }
 314
 315   /* Handle fields bigger than a word.  */
 316
 317   if (bitsize > BITS_PER_WORD)
 318     {
 319       /* Here we transfer the words of the field
 320          in the order least significant first.
 321          This is because the most significant word is the one which may
 322          be less than full.
 323          However, only do that if the value is not BLKmode.  */
 324
 325       int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 326
 327       int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 328       int i;
 329
 330       /* This is the mode we must force value to, so that there will be enough
 331          subwords to extract.  Note that fieldmode will often (always?) be
 332          VOIDmode, because that is what store_field uses to indicate that this
 333          is a bit field, but passing VOIDmode to operand_subword_force will
 334          result in an abort.  */
 335       fieldmode = mode_for_size (nwords * BITS_PER_WORD, MODE_INT, 0);
 336
 337       for (i = 0; i < nwords; i++)
 338         {
 339           /* If I is 0, use the low-order word in both field and target;
 340              if I is 1, use the next to lowest word; and so on.  */
 341           int wordnum = (backwards ? nwords - i - 1 : i);
 342           int bit_offset = (backwards
 343                             ? MAX (bitsize - (i + 1) * BITS_PER_WORD, 0)
 344                             : i * BITS_PER_WORD);
 345           store_bit_field (op0, MIN (BITS_PER_WORD,
 346                                      bitsize - i * BITS_PER_WORD),
 347                            bitnum + bit_offset, word_mode,
 348                            operand_subword_force (value, wordnum,
 349                                                   (GET_MODE (value) == VOIDmode
 350                                                    ? fieldmode
 351                                                    : GET_MODE (value))),
 352                            align, total_size);
 353         }
 354       return value;
 355     }
 356
 357   /* From here on we can assume that the field to be stored in is
 358      a full-word (whatever type that is), since it is shorter than a word.  */
 359
 360   /* OFFSET is the number of words or bytes (UNIT says which)
 361      from STR_RTX to the first word or byte containing part of the field.  */
 362
 363   if (GET_CODE (op0) == REG)
 364     {
 365       if (offset != 0
 366           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 367         op0 = gen_rtx (SUBREG, TYPE_MODE (type_for_size (BITS_PER_WORD, 0)),
 368                        op0, offset);
 369       offset = 0;
 370     }
 371   else
 372     {
 373       op0 = protect_from_queue (op0, 1);
 374     }
 375
 376   /* If VALUE is a floating-point mode, access it as an integer of the
 377      corresponding size.  This can occur on a machine with 64 bit registers
 378      that uses SFmode for float.  This can also occur for unaligned float
 379      structure fields.  */
 380   if (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT)
 381     {
 382       if (GET_CODE (value) != REG)
 383         value = copy_to_reg (value);
 384       value = gen_rtx (SUBREG, word_mode, value, 0);
 385     }
 386
 387   /* Now OFFSET is nonzero only if OP0 is memory
 388      and is therefore always measured in bytes.  */
 389
 390 #ifdef HAVE_insv
 391   if (HAVE_insv
 392       && GET_MODE (value) != BLKmode
 393       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 394       /* Ensure insv's size is wide enough for this field.  */
 395       && (GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_insv][3])
 396           >= bitsize)
 397       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 398             && (bitsize + bitpos
 399                 > GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_insv][3]))))
 400     {
 401       int xbitpos = bitpos;
 402       rtx value1;
 403       rtx xop0 = op0;
 404       rtx last = get_last_insn ();
 405       rtx pat;
 406       enum machine_mode maxmode
 407         = insn_operand_mode[(int) CODE_FOR_insv][3];
 408
 409       int save_volatile_ok = volatile_ok;
 410       volatile_ok = 1;
 411
 412       /* If this machine's insv can only insert into a register, or if we
 413          are to force MEMs into a register, copy OP0 into a register and
 414          save it back later.  */
 415       if (GET_CODE (op0) == MEM
 416           && (flag_force_mem
 417               || ! ((*insn_operand_predicate[(int) CODE_FOR_insv][0])
 418                     (op0, VOIDmode))))
 419         {
 420           rtx tempreg;
 421           enum machine_mode bestmode;
 422
 423           /* Get the mode to use for inserting into this field.  If OP0 is
 424              BLKmode, get the smallest mode consistent with the alignment. If
 425              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 426              mode. Otherwise, use the smallest mode containing the field.  */
 427
 428           if (GET_MODE (op0) == BLKmode
 429               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 430             bestmode
 431               = get_best_mode (bitsize, bitnum, align * BITS_PER_UNIT, maxmode,
 432                                MEM_VOLATILE_P (op0));
 433           else
 434             bestmode = GET_MODE (op0);
 435
 436           if (bestmode == VOIDmode
 437               || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
 438             goto insv_loses;
 439
 440           /* Adjust address to point to the containing unit of that mode.  */
 441           unit = GET_MODE_BITSIZE (bestmode);
 442           /* Compute offset as multiple of this unit, counting in bytes.  */
 443           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 444           bitpos = bitnum % unit;
 445           op0 = change_address (op0, bestmode,
 446                                 plus_constant (XEXP (op0, 0), offset));
 447
 448           /* Fetch that unit, store the bitfield in it, then store the unit.  */
 449           tempreg = copy_to_reg (op0);
 450           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 451                            align, total_size);
 452           emit_move_insn (op0, tempreg);
 453           return value;
 454         }
 455       volatile_ok = save_volatile_ok;
 456
 457       /* Add OFFSET into OP0's address.  */
 458       if (GET_CODE (xop0) == MEM)
 459         xop0 = change_address (xop0, byte_mode,
 460                                plus_constant (XEXP (xop0, 0), offset));
 461
 462       /* If xop0 is a register, we need it in MAXMODE
 463          to make it acceptable to the format of insv.  */
 464       if (GET_CODE (xop0) == SUBREG)
 465         /* We can't just change the mode, because this might clobber op0,
 466            and we will need the original value of op0 if insv fails.  */
 467         xop0 = gen_rtx (SUBREG, maxmode, SUBREG_REG (xop0), SUBREG_WORD (xop0));
 468       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 469         xop0 = gen_rtx (SUBREG, maxmode, xop0, 0);
 470
 471       /* On big-endian machines, we count bits from the most significant.
 472          If the bit field insn does not, we must invert.  */
 473
 474       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 475         xbitpos = unit - bitsize - xbitpos;
 476
 477       /* We have been counting XBITPOS within UNIT.
 478          Count instead within the size of the register.  */
 479       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 480         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 481
 482       unit = GET_MODE_BITSIZE (maxmode);
 483
 484       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 485       value1 = value;
 486       if (GET_MODE (value) != maxmode)
 487         {
 488           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 489             {
 490               /* Optimization: Don't bother really extending VALUE
 491                  if it has all the bits we will actually use.  However,
 492                  if we must narrow it, be sure we do it correctly.  */
 493
 494               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 495                 {
 496                   /* Avoid making subreg of a subreg, or of a mem.  */
 497                   if (GET_CODE (value1) != REG)
 498                 value1 = copy_to_reg (value1);
 499                   value1 = gen_rtx (SUBREG, maxmode, value1, 0);
 500                 }
 501               else
 502                 value1 = gen_lowpart (maxmode, value1);
 503             }
 504           else if (!CONSTANT_P (value))
 505             /* Parse phase is supposed to make VALUE's data type
 506                match that of the component reference, which is a type
 507                at least as wide as the field; so VALUE should have
 508                a mode that corresponds to that type.  */
 509             abort ();
 510         }
 511
 512       /* If this machine's insv insists on a register,
 513          get VALUE1 into a register.  */
 514       if (! ((*insn_operand_predicate[(int) CODE_FOR_insv][3])
 515              (value1, maxmode)))
 516         value1 = force_reg (maxmode, value1);
 517
 518       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 519       if (pat)
 520         emit_insn (pat);
 521       else
 522         {
 523           delete_insns_since (last);
 524           store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 525         }
 526     }
 527   else
 528     insv_loses:
 529 #endif
 530     /* Insv is not available; store using shifts and boolean ops.  */
 531     store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 532   return value;
 533 }
 534 \f
 535 /* Use shifts and boolean operations to store VALUE
 536    into a bit field of width BITSIZE
 537    in a memory location specified by OP0 except offset by OFFSET bytes.
 538      (OFFSET must be 0 if OP0 is a register.)
 539    The field starts at position BITPOS within the byte.
 540     (If OP0 is a register, it may be a full word or a narrower mode,
 541      but BITPOS still counts within a full word,
 542      which is significant on bigendian machines.)
 543    STRUCT_ALIGN is the alignment the structure is known to have (in bytes).
 544
 545    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 546
 547 static void
 548 store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align)
 549      register rtx op0;
 550      register int offset, bitsize, bitpos;
 551      register rtx value;
 552      int struct_align;
 553 {
 554   register enum machine_mode mode;
 555   int total_bits = BITS_PER_WORD;
 556   rtx subtarget, temp;
 557   int all_zero = 0;
 558   int all_one = 0;
 559
 560   /* There is a case not handled here:
 561      a structure with a known alignment of just a halfword
 562      and a field split across two aligned halfwords within the structure.
 563      Or likewise a structure with a known alignment of just a byte
 564      and a field split across two bytes.
 565      Such cases are not supposed to be able to occur.  */
 566
 567   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 568     {
 569       if (offset != 0)
 570         abort ();
 571       /* Special treatment for a bit field split across two registers.  */
 572       if (bitsize + bitpos > BITS_PER_WORD)
 573         {
 574           store_split_bit_field (op0, bitsize, bitpos,
 575                                  value, BITS_PER_WORD);
 576           return;
 577         }
 578     }
 579   else
 580     {
 581       /* Get the proper mode to use for this field.  We want a mode that
 582          includes the entire field.  If such a mode would be larger than
 583          a word, we won't be doing the extraction the normal way.  */
 584
 585       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 586                             struct_align * BITS_PER_UNIT, word_mode,
 587                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
 588
 589       if (mode == VOIDmode)
 590         {
 591           /* The only way this should occur is if the field spans word
 592              boundaries.  */
 593           store_split_bit_field (op0,
 594                                  bitsize, bitpos + offset * BITS_PER_UNIT,
 595                                  value, struct_align);
 596           return;
 597         }
 598
 599       total_bits = GET_MODE_BITSIZE (mode);
 600
 601       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 602          be be in the range 0 to total_bits-1, and put any excess bytes in
 603          OFFSET.  */
 604       if (bitpos >= total_bits)
 605         {
 606           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 607           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 608                      * BITS_PER_UNIT);
 609         }
 610
 611       /* Get ref to an aligned byte, halfword, or word containing the field.
 612          Adjust BITPOS to be position within a word,
 613          and OFFSET to be the offset of that word.
 614          Then alter OP0 to refer to that word.  */
 615       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 616       offset -= (offset % (total_bits / BITS_PER_UNIT));
 617       op0 = change_address (op0, mode,
 618                             plus_constant (XEXP (op0, 0), offset));
 619     }
 620
 621   mode = GET_MODE (op0);
 622
 623   /* Now MODE is either some integral mode for a MEM as OP0,
 624      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 625      The bit field is contained entirely within OP0.
 626      BITPOS is the starting bit number within OP0.
 627      (OP0's mode may actually be narrower than MODE.)  */
 628
 629   if (BYTES_BIG_ENDIAN)
 630       /* BITPOS is the distance between our msb
 631          and that of the containing datum.
 632          Convert it to the distance from the lsb.  */
 633       bitpos = total_bits - bitsize - bitpos;
 634
 635   /* Now BITPOS is always the distance between our lsb
 636      and that of OP0.  */
 637
 638   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 639      we must first convert its mode to MODE.  */
 640
 641   if (GET_CODE (value) == CONST_INT)
 642     {
 643       register HOST_WIDE_INT v = INTVAL (value);
 644
 645       if (bitsize < HOST_BITS_PER_WIDE_INT)
 646         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 647
 648       if (v == 0)
 649         all_zero = 1;
 650       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 651                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 652                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 653         all_one = 1;
 654
 655       value = lshift_value (mode, value, bitpos, bitsize);
 656     }
 657   else
 658     {
 659       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 660                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 661
 662       if (GET_MODE (value) != mode)
 663         {
 664           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 665               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 666             value = gen_lowpart (mode, value);
 667           else
 668             value = convert_to_mode (mode, value, 1);
 669         }
 670
 671       if (must_and)
 672         value = expand_binop (mode, and_optab, value,
 673                               mask_rtx (mode, 0, bitsize, 0),
 674                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 675       if (bitpos > 0)
 676         value = expand_shift (LSHIFT_EXPR, mode, value,
 677                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 678     }
 679
 680   /* Now clear the chosen bits in OP0,
 681      except that if VALUE is -1 we need not bother.  */
 682
 683   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 684
 685   if (! all_one)
 686     {
 687       temp = expand_binop (mode, and_optab, op0,
 688                            mask_rtx (mode, bitpos, bitsize, 1),
 689                            subtarget, 1, OPTAB_LIB_WIDEN);
 690       subtarget = temp;
 691     }
 692   else
 693     temp = op0;
 694
 695   /* Now logical-or VALUE into OP0, unless it is zero.  */
 696
 697   if (! all_zero)
 698     temp = expand_binop (mode, ior_optab, temp, value,
 699                          subtarget, 1, OPTAB_LIB_WIDEN);
 700   if (op0 != temp)
 701     emit_move_insn (op0, temp);
 702 }
 703 \f
 704 /* Store a bit field that is split across multiple accessible memory objects.
 705
 706    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 707    BITSIZE is the field width; BITPOS the position of its first bit
 708    (within the word).
 709    VALUE is the value to store.
 710    ALIGN is the known alignment of OP0, measured in bytes.
 711    This is also the size of the memory objects to be used.
 712
 713    This does not yet handle fields wider than BITS_PER_WORD.  */
 714
 715 static void
 716 store_split_bit_field (op0, bitsize, bitpos, value, align)
 717      rtx op0;
 718      int bitsize, bitpos;
 719      rtx value;
 720      int align;
 721 {
 722   int unit;
 723   int bitsdone = 0;
 724
 725   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 726      much at a time.  */
 727   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 728     unit = BITS_PER_WORD;
 729   else
 730     unit = MIN (align * BITS_PER_UNIT, BITS_PER_WORD);
 731
 732   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 733      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 734      that VALUE might be a floating-point constant.  */
 735   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 736     {
 737       rtx word = gen_lowpart_common (word_mode, value);
 738
 739       if (word && (value != word))
 740         value = word;
 741       else
 742         value = gen_lowpart_common (word_mode,
 743                                     force_reg (GET_MODE (value) != VOIDmode
 744                                                ? GET_MODE (value)
 745                                                : word_mode, value));
 746     }
 747
 748   while (bitsdone < bitsize)
 749     {
 750       int thissize;
 751       rtx part, word;
 752       int thispos;
 753       int offset;
 754
 755       offset = (bitpos + bitsdone) / unit;
 756       thispos = (bitpos + bitsdone) % unit;
 757
 758       /* THISSIZE must not overrun a word boundary.  Otherwise,
 759          store_fixed_bit_field will call us again, and we will mutually
 760          recurse forever.  */
 761       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 762       thissize = MIN (thissize, unit - thispos);
 763
 764       if (BYTES_BIG_ENDIAN)
 765         {
 766           int total_bits;
 767
 768           /* We must do an endian conversion exactly the same way as it is
 769              done in extract_bit_field, so that the two calls to
 770              extract_fixed_bit_field will have comparable arguments.  */
 771           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 772             total_bits = BITS_PER_WORD;
 773           else
 774             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 775
 776           /* Fetch successively less significant portions.  */
 777           if (GET_CODE (value) == CONST_INT)
 778             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 779                              >> (bitsize - bitsdone - thissize))
 780                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 781           else
 782             /* The args are chosen so that the last part includes the
 783                lsb.  Give extract_bit_field the value it needs (with
 784                endianness compensation) to fetch the piece we want.
 785
 786                ??? We have no idea what the alignment of VALUE is, so
 787                we have to use a guess.  */
 788             part
 789               = extract_fixed_bit_field
 790                 (word_mode, value, 0, thissize,
 791                  total_bits - bitsize + bitsdone, NULL_RTX, 1,
 792                  GET_MODE (value) == VOIDmode
 793                  ? UNITS_PER_WORD
 794                  : (GET_MODE (value) == BLKmode
 795                     ? 1
 796                     : GET_MODE_ALIGNMENT (GET_MODE (value)) / BITS_PER_UNIT));
 797         }
 798       else
 799         {
 800           /* Fetch successively more significant portions.  */
 801           if (GET_CODE (value) == CONST_INT)
 802             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 803                              >> bitsdone)
 804                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 805           else
 806             part
 807               = extract_fixed_bit_field
 808                 (word_mode, value, 0, thissize, bitsdone, NULL_RTX, 1,
 809                  GET_MODE (value) == VOIDmode
 810                  ? UNITS_PER_WORD
 811                  : (GET_MODE (value) == BLKmode
 812                     ? 1
 813                     : GET_MODE_ALIGNMENT (GET_MODE (value)) / BITS_PER_UNIT));
 814         }
 815
 816       /* If OP0 is a register, then handle OFFSET here.
 817
 818          When handling multiword bitfields, extract_bit_field may pass
 819          down a word_mode SUBREG of a larger REG for a bitfield that actually
 820          crosses a word boundary.  Thus, for a SUBREG, we must find
 821          the current word starting from the base register.  */
 822       if (GET_CODE (op0) == SUBREG)
 823         {
 824           word = operand_subword_force (SUBREG_REG (op0),
 825                                         SUBREG_WORD (op0) + offset,
 826                                         GET_MODE (SUBREG_REG (op0)));
 827           offset = 0;
 828         }
 829       else if (GET_CODE (op0) == REG)
 830         {
 831           word = operand_subword_force (op0, offset, GET_MODE (op0));
 832           offset = 0;
 833         }
 834       else
 835         word = op0;
 836
 837       /* OFFSET is in UNITs, and UNIT is in bits.
 838          store_fixed_bit_field wants offset in bytes.  */
 839       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT,
 840                              thissize, thispos, part, align);
 841       bitsdone += thissize;
 842     }
 843 }
 844 \f
 845 /* Generate code to extract a byte-field from STR_RTX
 846    containing BITSIZE bits, starting at BITNUM,
 847    and put it in TARGET if possible (if TARGET is nonzero).
 848    Regardless of TARGET, we return the rtx for where the value is placed.
 849    It may be a QUEUED.
 850
 851    STR_RTX is the structure containing the byte (a REG or MEM).
 852    UNSIGNEDP is nonzero if this is an unsigned bit field.
 853    MODE is the natural mode of the field value once extracted.
 854    TMODE is the mode the caller would like the value to have;
 855    but the value may be returned with type MODE instead.
 856
 857    ALIGN is the alignment that STR_RTX is known to have, measured in bytes.
 858    TOTAL_SIZE is the size in bytes of the containing structure,
 859    or -1 if varying.
 860
 861    If a TARGET is specified and we can store in it at no extra cost,
 862    we do so, and return TARGET.
 863    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
 864    if they are equally easy.  */
 865
 866 rtx
 867 extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 868                    target, mode, tmode, align, total_size)
 869      rtx str_rtx;
 870      register int bitsize;
 871      int bitnum;
 872      int unsignedp;
 873      rtx target;
 874      enum machine_mode mode, tmode;
 875      int align;
 876      int total_size;
 877 {
 878   int unit = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 879   register int offset = bitnum / unit;
 880   register int bitpos = bitnum % unit;
 881   register rtx op0 = str_rtx;
 882   rtx spec_target = target;
 883   rtx spec_target_subreg = 0;
 884
 885   /* Discount the part of the structure before the desired byte.
 886      We need to know how many bytes are safe to reference after it.  */
 887   if (total_size >= 0)
 888     total_size -= (bitpos / BIGGEST_ALIGNMENT
 889                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 890
 891   if (tmode == VOIDmode)
 892     tmode = mode;
 893   while (GET_CODE (op0) == SUBREG)
 894     {
 895       int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
 896       int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
 897
 898       offset += SUBREG_WORD (op0);
 899
 900       if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
 901         {
 902           bitpos += inner_size - outer_size;
 903           if (bitpos > unit)
 904             {
 905               offset += (bitpos / unit);
 906               bitpos %= unit;
 907             }
 908         }
 909
 910       op0 = SUBREG_REG (op0);
 911     }
 912
 913   /* ??? We currently assume TARGET is at least as big as BITSIZE.
 914      If that's wrong, the solution is to test for it and set TARGET to 0
 915      if needed.  */
 916
 917   /* If OP0 is a register, BITPOS must count within a word.
 918      But as we have it, it counts within whatever size OP0 now has.
 919      On a bigendian machine, these are not the same, so convert.  */
 920   if (BYTES_BIG_ENDIAN &&
 921       GET_CODE (op0) != MEM
 922       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 923     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 924
 925   /* Extracting a full-word or multi-word value
 926      from a structure in a register or aligned memory.
 927      This can be done with just SUBREG.
 928      So too extracting a subword value in
 929      the least significant part of the register.  */
 930
 931   if ((GET_CODE (op0) == REG
 932        || (GET_CODE (op0) == MEM
 933            && (! SLOW_UNALIGNED_ACCESS
 934                || (offset * BITS_PER_UNIT % bitsize == 0
 935                    && align * BITS_PER_UNIT % bitsize == 0))))
 936       && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
 937            && bitpos % BITS_PER_WORD == 0)
 938           || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
 939               && (BYTES_BIG_ENDIAN
 940                   ? bitpos + bitsize == BITS_PER_WORD
 941                   : bitpos == 0))))
 942     {
 943       enum machine_mode mode1
 944         = mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0);
 945
 946       if (mode1 != GET_MODE (op0))
 947         {
 948           if (GET_CODE (op0) == REG)
 949             op0 = gen_rtx (SUBREG, mode1, op0, offset);
 950           else
 951             op0 = change_address (op0, mode1,
 952                                   plus_constant (XEXP (op0, 0), offset));
 953         }
 954       if (mode1 != mode)
 955         return convert_to_mode (tmode, op0, unsignedp);
 956       return op0;
 957     }
 958
 959   /* Handle fields bigger than a word.  */
 960
 961   if (bitsize > BITS_PER_WORD)
 962     {
 963       /* Here we transfer the words of the field
 964          in the order least significant first.
 965          This is because the most significant word is the one which may
 966          be less than full.  */
 967
 968       int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 969       int i;
 970
 971       if (target == 0 || GET_CODE (target) != REG)
 972         target = gen_reg_rtx (mode);
 973
 974       /* Indicate for flow that the entire target reg is being set.  */
 975       emit_insn (gen_rtx (CLOBBER, VOIDmode, target));
 976
 977       for (i = 0; i < nwords; i++)
 978         {
 979           /* If I is 0, use the low-order word in both field and target;
 980              if I is 1, use the next to lowest word; and so on.  */
 981           /* Word number in TARGET to use.  */
 982           int wordnum = (WORDS_BIG_ENDIAN
 983                          ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
 984                          : i);
 985           /* Offset from start of field in OP0.  */
 986           int bit_offset = (WORDS_BIG_ENDIAN
 987                             ? MAX (0, bitsize - (i + 1) * BITS_PER_WORD)
 988                             : i * BITS_PER_WORD);
 989           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
 990           rtx result_part
 991             = extract_bit_field (op0, MIN (BITS_PER_WORD,
 992                                            bitsize - i * BITS_PER_WORD),
 993                                  bitnum + bit_offset,
 994                                  1, target_part, mode, word_mode,
 995                                  align, total_size);
 996
 997           if (target_part == 0)
 998             abort ();
 999
1000           if (result_part != target_part)
1001             emit_move_insn (target_part, result_part);
1002         }
1003
1004       if (unsignedp)
1005         {
1006           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1007              need to be zero'd out.  */
1008           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1009             {
1010               int i,total_words;
1011
1012               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1013               for (i = nwords; i < total_words; i++)
1014                 {
1015                   int wordnum = WORDS_BIG_ENDIAN ? total_words - i - 1 : i;
1016                   rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1017                   emit_move_insn (target_part, const0_rtx);
1018                 }
1019             }
1020           return target;
1021         }
1022
1023       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1024       target = expand_shift (LSHIFT_EXPR, mode, target,
1025                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1026                              NULL_RTX, 0);
1027       return expand_shift (RSHIFT_EXPR, mode, target,
1028                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1029                            NULL_RTX, 0);
1030     }
1031
1032   /* From here on we know the desired field is smaller than a word
1033      so we can assume it is an integer.  So we can safely extract it as one
1034      size of integer, if necessary, and then truncate or extend
1035      to the size that is wanted.  */
1036
1037   /* OFFSET is the number of words or bytes (UNIT says which)
1038      from STR_RTX to the first word or byte containing part of the field.  */
1039
1040   if (GET_CODE (op0) == REG)
1041     {
1042       if (offset != 0
1043           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1044         op0 = gen_rtx (SUBREG, TYPE_MODE (type_for_size (BITS_PER_WORD, 0)),
1045                        op0, offset);
1046       offset = 0;
1047     }
1048   else
1049     {
1050       op0 = protect_from_queue (str_rtx, 1);
1051     }
1052
1053   /* Now OFFSET is nonzero only for memory operands.  */
1054
1055   if (unsignedp)
1056     {
1057 #ifdef HAVE_extzv
1058       if (HAVE_extzv
1059           && (GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extzv][0])
1060               >= bitsize)
1061           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1062                 && (bitsize + bitpos
1063                     > GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extzv][0]))))
1064         {
1065           int xbitpos = bitpos, xoffset = offset;
1066           rtx bitsize_rtx, bitpos_rtx;
1067           rtx last = get_last_insn();
1068           rtx xop0 = op0;
1069           rtx xtarget = target;
1070           rtx xspec_target = spec_target;
1071           rtx xspec_target_subreg = spec_target_subreg;
1072           rtx pat;
1073           enum machine_mode maxmode
1074             = insn_operand_mode[(int) CODE_FOR_extzv][0];
1075
1076           if (GET_CODE (xop0) == MEM)
1077             {
1078               int save_volatile_ok = volatile_ok;
1079               volatile_ok = 1;
1080
1081               /* Is the memory operand acceptable?  */
1082               if (flag_force_mem
1083                   || ! ((*insn_operand_predicate[(int) CODE_FOR_extzv][1])
1084                         (xop0, GET_MODE (xop0))))
1085                 {
1086                   /* No, load into a reg and extract from there.  */
1087                   enum machine_mode bestmode;
1088
1089                   /* Get the mode to use for inserting into this field.  If
1090                      OP0 is BLKmode, get the smallest mode consistent with the
1091                      alignment. If OP0 is a non-BLKmode object that is no
1092                      wider than MAXMODE, use its mode. Otherwise, use the
1093                      smallest mode containing the field.  */
1094
1095                   if (GET_MODE (xop0) == BLKmode
1096                       || (GET_MODE_SIZE (GET_MODE (op0))
1097                           > GET_MODE_SIZE (maxmode)))
1098                     bestmode = get_best_mode (bitsize, bitnum,
1099                                               align * BITS_PER_UNIT, maxmode,
1100                                               MEM_VOLATILE_P (xop0));
1101                   else
1102                     bestmode = GET_MODE (xop0);
1103
1104                   if (bestmode == VOIDmode
1105                       || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
1106                     goto extzv_loses;
1107
1108                   /* Compute offset as multiple of this unit,
1109                      counting in bytes.  */
1110                   unit = GET_MODE_BITSIZE (bestmode);
1111                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1112                   xbitpos = bitnum % unit;
1113                   xop0 = change_address (xop0, bestmode,
1114                                          plus_constant (XEXP (xop0, 0),
1115                                                         xoffset));
1116                   /* Fetch it to a register in that size.  */
1117                   xop0 = force_reg (bestmode, xop0);
1118
1119                   /* XBITPOS counts within UNIT, which is what is expected.  */
1120                 }
1121               else
1122                 /* Get ref to first byte containing part of the field.  */
1123                 xop0 = change_address (xop0, byte_mode,
1124                                        plus_constant (XEXP (xop0, 0), xoffset));
1125
1126               volatile_ok = save_volatile_ok;
1127             }
1128
1129           /* If op0 is a register, we need it in MAXMODE (which is usually
1130              SImode). to make it acceptable to the format of extzv.  */
1131           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1132             abort ();
1133           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1134             xop0 = gen_rtx (SUBREG, maxmode, xop0, 0);
1135
1136           /* On big-endian machines, we count bits from the most significant.
1137              If the bit field insn does not, we must invert.  */
1138           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1139             xbitpos = unit - bitsize - xbitpos;
1140
1141           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1142           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1143             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1144
1145           unit = GET_MODE_BITSIZE (maxmode);
1146
1147           if (xtarget == 0
1148               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1149             xtarget = xspec_target = gen_reg_rtx (tmode);
1150
1151           if (GET_MODE (xtarget) != maxmode)
1152             {
1153               if (GET_CODE (xtarget) == REG)
1154                 {
1155                   int wider = (GET_MODE_SIZE (maxmode)
1156                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1157                   xtarget = gen_lowpart (maxmode, xtarget);
1158                   if (wider)
1159                     xspec_target_subreg = xtarget;
1160                 }
1161               else
1162                 xtarget = gen_reg_rtx (maxmode);
1163             }
1164
1165           /* If this machine's extzv insists on a register target,
1166              make sure we have one.  */
1167           if (! ((*insn_operand_predicate[(int) CODE_FOR_extzv][0])
1168                  (xtarget, maxmode)))
1169             xtarget = gen_reg_rtx (maxmode);
1170
1171           bitsize_rtx = GEN_INT (bitsize);
1172           bitpos_rtx = GEN_INT (xbitpos);
1173
1174           pat = gen_extzv (protect_from_queue (xtarget, 1),
1175                            xop0, bitsize_rtx, bitpos_rtx);
1176           if (pat)
1177             {
1178               emit_insn (pat);
1179               target = xtarget;
1180               spec_target = xspec_target;
1181               spec_target_subreg = xspec_target_subreg;
1182             }
1183           else
1184             {
1185               delete_insns_since (last);
1186               target = extract_fixed_bit_field (tmode, op0, offset, bitsize,
1187                                                 bitpos, target, 1, align);
1188             }
1189         }
1190       else
1191         extzv_loses:
1192 #endif
1193         target = extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1194                                           target, 1, align);
1195     }
1196   else
1197     {
1198 #ifdef HAVE_extv
1199       if (HAVE_extv
1200           && (GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extv][0])
1201               >= bitsize)
1202           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1203                 && (bitsize + bitpos
1204                     > GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extv][0]))))
1205         {
1206           int xbitpos = bitpos, xoffset = offset;
1207           rtx bitsize_rtx, bitpos_rtx;
1208           rtx last = get_last_insn();
1209           rtx xop0 = op0, xtarget = target;
1210           rtx xspec_target = spec_target;
1211           rtx xspec_target_subreg = spec_target_subreg;
1212           rtx pat;
1213           enum machine_mode maxmode
1214             = insn_operand_mode[(int) CODE_FOR_extv][0];
1215
1216           if (GET_CODE (xop0) == MEM)
1217             {
1218               /* Is the memory operand acceptable?  */
1219               if (! ((*insn_operand_predicate[(int) CODE_FOR_extv][1])
1220                      (xop0, GET_MODE (xop0))))
1221                 {
1222                   /* No, load into a reg and extract from there.  */
1223                   enum machine_mode bestmode;
1224
1225                   /* Get the mode to use for inserting into this field.  If
1226                      OP0 is BLKmode, get the smallest mode consistent with the
1227                      alignment. If OP0 is a non-BLKmode object that is no
1228                      wider than MAXMODE, use its mode. Otherwise, use the
1229                      smallest mode containing the field.  */
1230
1231                   if (GET_MODE (xop0) == BLKmode
1232                       || (GET_MODE_SIZE (GET_MODE (op0))
1233                           > GET_MODE_SIZE (maxmode)))
1234                     bestmode = get_best_mode (bitsize, bitnum,
1235                                               align * BITS_PER_UNIT, maxmode,
1236                                               MEM_VOLATILE_P (xop0));
1237                   else
1238                     bestmode = GET_MODE (xop0);
1239
1240                   if (bestmode == VOIDmode
1241                       || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
1242                     goto extv_loses;
1243
1244                   /* Compute offset as multiple of this unit,
1245                      counting in bytes.  */
1246                   unit = GET_MODE_BITSIZE (bestmode);
1247                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1248                   xbitpos = bitnum % unit;
1249                   xop0 = change_address (xop0, bestmode,
1250                                          plus_constant (XEXP (xop0, 0),
1251                                                         xoffset));
1252                   /* Fetch it to a register in that size.  */
1253                   xop0 = force_reg (bestmode, xop0);
1254
1255                   /* XBITPOS counts within UNIT, which is what is expected.  */
1256                 }
1257               else
1258                 /* Get ref to first byte containing part of the field.  */
1259                 xop0 = change_address (xop0, byte_mode,
1260                                        plus_constant (XEXP (xop0, 0), xoffset));
1261             }
1262
1263           /* If op0 is a register, we need it in MAXMODE (which is usually
1264              SImode) to make it acceptable to the format of extv.  */
1265           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1266             abort ();
1267           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1268             xop0 = gen_rtx (SUBREG, maxmode, xop0, 0);
1269
1270           /* On big-endian machines, we count bits from the most significant.
1271              If the bit field insn does not, we must invert.  */
1272           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1273             xbitpos = unit - bitsize - xbitpos;
1274
1275           /* XBITPOS counts within a size of UNIT.
1276              Adjust to count within a size of MAXMODE.  */
1277           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1278             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1279
1280           unit = GET_MODE_BITSIZE (maxmode);
1281
1282           if (xtarget == 0
1283               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1284             xtarget = xspec_target = gen_reg_rtx (tmode);
1285
1286           if (GET_MODE (xtarget) != maxmode)
1287             {
1288               if (GET_CODE (xtarget) == REG)
1289                 {
1290                   int wider = (GET_MODE_SIZE (maxmode)
1291                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1292                   xtarget = gen_lowpart (maxmode, xtarget);
1293                   if (wider)
1294                     xspec_target_subreg = xtarget;
1295                 }
1296               else
1297                 xtarget = gen_reg_rtx (maxmode);
1298             }
1299
1300           /* If this machine's extv insists on a register target,
1301              make sure we have one.  */
1302           if (! ((*insn_operand_predicate[(int) CODE_FOR_extv][0])
1303                  (xtarget, maxmode)))
1304             xtarget = gen_reg_rtx (maxmode);
1305
1306           bitsize_rtx = GEN_INT (bitsize);
1307           bitpos_rtx = GEN_INT (xbitpos);
1308
1309           pat = gen_extv (protect_from_queue (xtarget, 1),
1310                           xop0, bitsize_rtx, bitpos_rtx);
1311           if (pat)
1312             {
1313               emit_insn (pat);
1314               target = xtarget;
1315               spec_target = xspec_target;
1316               spec_target_subreg = xspec_target_subreg;
1317             }
1318           else
1319             {
1320               delete_insns_since (last);
1321               target = extract_fixed_bit_field (tmode, op0, offset, bitsize,
1322                                                 bitpos, target, 0, align);
1323             }
1324         }
1325       else
1326         extv_loses:
1327 #endif
1328         target = extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1329                                           target, 0, align);
1330     }
1331   if (target == spec_target)
1332     return target;
1333   if (target == spec_target_subreg)
1334     return spec_target;
1335   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1336     {
1337       /* If the target mode is floating-point, first convert to the
1338          integer mode of that size and then access it as a floating-point
1339          value via a SUBREG.  */
1340       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1341         {
1342           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1343                                                    MODE_INT, 0),
1344                                     target, unsignedp);
1345           if (GET_CODE (target) != REG)
1346             target = copy_to_reg (target);
1347           return gen_rtx (SUBREG, tmode, target, 0);
1348         }
1349       else
1350         return convert_to_mode (tmode, target, unsignedp);
1351     }
1352   return target;
1353 }
1354 \f
1355 /* Extract a bit field using shifts and boolean operations
1356    Returns an rtx to represent the value.
1357    OP0 addresses a register (word) or memory (byte).
1358    BITPOS says which bit within the word or byte the bit field starts in.
1359    OFFSET says how many bytes farther the bit field starts;
1360     it is 0 if OP0 is a register.
1361    BITSIZE says how many bits long the bit field is.
1362     (If OP0 is a register, it may be narrower than a full word,
1363      but BITPOS still counts within a full word,
1364      which is significant on bigendian machines.)
1365
1366    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1367    If TARGET is nonzero, attempts to store the value there
1368    and return TARGET, but this is not guaranteed.
1369    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.
1370
1371    ALIGN is the alignment that STR_RTX is known to have, measured in bytes.  */
1372
1373 static rtx
1374 extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1375                          target, unsignedp, align)
1376      enum machine_mode tmode;
1377      register rtx op0, target;
1378      register int offset, bitsize, bitpos;
1379      int unsignedp;
1380      int align;
1381 {
1382   int total_bits = BITS_PER_WORD;
1383   enum machine_mode mode;
1384
1385   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1386     {
1387       /* Special treatment for a bit field split across two registers.  */
1388       if (bitsize + bitpos > BITS_PER_WORD)
1389         return extract_split_bit_field (op0, bitsize, bitpos,
1390                                         unsignedp, align);
1391     }
1392   else
1393     {
1394       /* Get the proper mode to use for this field.  We want a mode that
1395          includes the entire field.  If such a mode would be larger than
1396          a word, we won't be doing the extraction the normal way.  */
1397
1398       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1399                             align * BITS_PER_UNIT, word_mode,
1400                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
1401
1402       if (mode == VOIDmode)
1403         /* The only way this should occur is if the field spans word
1404            boundaries.  */
1405         return extract_split_bit_field (op0, bitsize,
1406                                         bitpos + offset * BITS_PER_UNIT,
1407                                         unsignedp, align);
1408
1409       total_bits = GET_MODE_BITSIZE (mode);
1410
1411       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1412          be be in the range 0 to total_bits-1, and put any excess bytes in
1413          OFFSET.  */
1414       if (bitpos >= total_bits)
1415         {
1416           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1417           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1418                      * BITS_PER_UNIT);
1419         }
1420
1421       /* Get ref to an aligned byte, halfword, or word containing the field.
1422          Adjust BITPOS to be position within a word,
1423          and OFFSET to be the offset of that word.
1424          Then alter OP0 to refer to that word.  */
1425       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1426       offset -= (offset % (total_bits / BITS_PER_UNIT));
1427       op0 = change_address (op0, mode,
1428                             plus_constant (XEXP (op0, 0), offset));
1429     }
1430
1431   mode = GET_MODE (op0);
1432
1433   if (BYTES_BIG_ENDIAN)
1434     {
1435       /* BITPOS is the distance between our msb and that of OP0.
1436          Convert it to the distance from the lsb.  */
1437
1438       bitpos = total_bits - bitsize - bitpos;
1439     }
1440
1441   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1442      We have reduced the big-endian case to the little-endian case.  */
1443
1444   if (unsignedp)
1445     {
1446       if (bitpos)
1447         {
1448           /* If the field does not already start at the lsb,
1449              shift it so it does.  */
1450           tree amount = build_int_2 (bitpos, 0);
1451           /* Maybe propagate the target for the shift.  */
1452           /* But not if we will return it--could confuse integrate.c.  */
1453           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1454                            && !REG_FUNCTION_VALUE_P (target)
1455                            ? target : 0);
1456           if (tmode != mode) subtarget = 0;
1457           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1458         }
1459       /* Convert the value to the desired mode.  */
1460       if (mode != tmode)
1461         op0 = convert_to_mode (tmode, op0, 1);
1462
1463       /* Unless the msb of the field used to be the msb when we shifted,
1464          mask out the upper bits.  */
1465
1466       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize
1467 #if 0
1468 #ifdef SLOW_ZERO_EXTEND
1469           /* Always generate an `and' if
1470              we just zero-extended op0 and SLOW_ZERO_EXTEND, since it
1471              will combine fruitfully with the zero-extend. */
1472           || tmode != mode
1473 #endif
1474 #endif
1475           )
1476         return expand_binop (GET_MODE (op0), and_optab, op0,
1477                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1478                              target, 1, OPTAB_LIB_WIDEN);
1479       return op0;
1480     }
1481
1482   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1483      then arithmetic-shift its lsb to the lsb of the word.  */
1484   op0 = force_reg (mode, op0);
1485   if (mode != tmode)
1486     target = 0;
1487
1488   /* Find the narrowest integer mode that contains the field.  */
1489
1490   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1491        mode = GET_MODE_WIDER_MODE (mode))
1492     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1493       {
1494         op0 = convert_to_mode (mode, op0, 0);
1495         break;
1496       }
1497
1498   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1499     {
1500       tree amount = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1501       /* Maybe propagate the target for the shift.  */
1502       /* But not if we will return the result--could confuse integrate.c.  */
1503       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1504                        && ! REG_FUNCTION_VALUE_P (target)
1505                        ? target : 0);
1506       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1507     }
1508
1509   return expand_shift (RSHIFT_EXPR, mode, op0,
1510                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1511                        target, 0);
1512 }
1513 \f
1514 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1515    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1516    complement of that if COMPLEMENT.  The mask is truncated if
1517    necessary to the width of mode MODE.  The mask is zero-extended if
1518    BITSIZE+BITPOS is too small for MODE.  */
1519
1520 static rtx
1521 mask_rtx (mode, bitpos, bitsize, complement)
1522      enum machine_mode mode;
1523      int bitpos, bitsize, complement;
1524 {
1525   HOST_WIDE_INT masklow, maskhigh;
1526
1527   if (bitpos < HOST_BITS_PER_WIDE_INT)
1528     masklow = (HOST_WIDE_INT) -1 << bitpos;
1529   else
1530     masklow = 0;
1531
1532   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1533     masklow &= ((unsigned HOST_WIDE_INT) -1
1534                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1535
1536   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1537     maskhigh = -1;
1538   else
1539     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1540
1541   if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1542     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1543                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1544   else
1545     maskhigh = 0;
1546
1547   if (complement)
1548     {
1549       maskhigh = ~maskhigh;
1550       masklow = ~masklow;
1551     }
1552
1553   return immed_double_const (masklow, maskhigh, mode);
1554 }
1555
1556 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1557    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1558
1559 static rtx
1560 lshift_value (mode, value, bitpos, bitsize)
1561      enum machine_mode mode;
1562      rtx value;
1563      int bitpos, bitsize;
1564 {
1565   unsigned HOST_WIDE_INT v = INTVAL (value);
1566   HOST_WIDE_INT low, high;
1567
1568   if (bitsize < HOST_BITS_PER_WIDE_INT)
1569     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1570
1571   if (bitpos < HOST_BITS_PER_WIDE_INT)
1572     {
1573       low = v << bitpos;
1574       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1575     }
1576   else
1577     {
1578       low = 0;
1579       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1580     }
1581
1582   return immed_double_const (low, high, mode);
1583 }
1584 \f
1585 /* Extract a bit field that is split across two words
1586    and return an RTX for the result.
1587
1588    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1589    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1590    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
1591
1592    ALIGN is the known alignment of OP0, measured in bytes.
1593    This is also the size of the memory objects to be used.  */
1594
1595 static rtx
1596 extract_split_bit_field (op0, bitsize, bitpos, unsignedp, align)
1597      rtx op0;
1598      int bitsize, bitpos, unsignedp, align;
1599 {
1600   int unit;
1601   int bitsdone = 0;
1602   rtx result;
1603   int first = 1;
1604
1605   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1606      much at a time.  */
1607   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1608     unit = BITS_PER_WORD;
1609   else
1610     unit = MIN (align * BITS_PER_UNIT, BITS_PER_WORD);
1611
1612   while (bitsdone < bitsize)
1613     {
1614       int thissize;
1615       rtx part, word;
1616       int thispos;
1617       int offset;
1618
1619       offset = (bitpos + bitsdone) / unit;
1620       thispos = (bitpos + bitsdone) % unit;
1621
1622       /* THISSIZE must not overrun a word boundary.  Otherwise,
1623          extract_fixed_bit_field will call us again, and we will mutually
1624          recurse forever.  */
1625       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1626       thissize = MIN (thissize, unit - thispos);
1627
1628       /* If OP0 is a register, then handle OFFSET here.
1629
1630          When handling multiword bitfields, extract_bit_field may pass
1631          down a word_mode SUBREG of a larger REG for a bitfield that actually
1632          crosses a word boundary.  Thus, for a SUBREG, we must find
1633          the current word starting from the base register.  */
1634       if (GET_CODE (op0) == SUBREG)
1635         {
1636           word = operand_subword_force (SUBREG_REG (op0),
1637                                         SUBREG_WORD (op0) + offset,
1638                                         GET_MODE (SUBREG_REG (op0)));
1639           offset = 0;
1640         }
1641       else if (GET_CODE (op0) == REG)
1642         {
1643           word = operand_subword_force (op0, offset, GET_MODE (op0));
1644           offset = 0;
1645         }
1646       else
1647         word = op0;
1648
1649       /* Extract the parts in bit-counting order,
1650          whose meaning is determined by BYTES_PER_UNIT.
1651          OFFSET is in UNITs, and UNIT is in bits.
1652          extract_fixed_bit_field wants offset in bytes.  */
1653       part = extract_fixed_bit_field (word_mode, word,
1654                                       offset * unit / BITS_PER_UNIT,
1655                                       thissize, thispos, 0, 1, align);
1656       bitsdone += thissize;
1657
1658       /* Shift this part into place for the result.  */
1659       if (BYTES_BIG_ENDIAN)
1660         {
1661           if (bitsize != bitsdone)
1662             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1663                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1664         }
1665       else
1666         {
1667           if (bitsdone != thissize)
1668             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1669                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1670         }
1671
1672       if (first)
1673         result = part;
1674       else
1675         /* Combine the parts with bitwise or.  This works
1676            because we extracted each part as an unsigned bit field.  */
1677         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1678                                OPTAB_LIB_WIDEN);
1679
1680       first = 0;
1681     }
1682
1683   /* Unsigned bit field: we are done.  */
1684   if (unsignedp)
1685     return result;
1686   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1687   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1688                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1689                          NULL_RTX, 0);
1690   return expand_shift (RSHIFT_EXPR, word_mode, result,
1691                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1692 }
1693 \f
1694 /* Add INC into TARGET.  */
1695
1696 void
1697 expand_inc (target, inc)
1698      rtx target, inc;
1699 {
1700   rtx value = expand_binop (GET_MODE (target), add_optab,
1701                             target, inc,
1702                             target, 0, OPTAB_LIB_WIDEN);
1703   if (value != target)
1704     emit_move_insn (target, value);
1705 }
1706
1707 /* Subtract DEC from TARGET.  */
1708
1709 void
1710 expand_dec (target, dec)
1711      rtx target, dec;
1712 {
1713   rtx value = expand_binop (GET_MODE (target), sub_optab,
1714                             target, dec,
1715                             target, 0, OPTAB_LIB_WIDEN);
1716   if (value != target)
1717     emit_move_insn (target, value);
1718 }
1719 \f
1720 /* Output a shift instruction for expression code CODE,
1721    with SHIFTED being the rtx for the value to shift,
1722    and AMOUNT the tree for the amount to shift by.
1723    Store the result in the rtx TARGET, if that is convenient.
1724    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1725    Return the rtx for where the value is.  */
1726
1727 rtx
1728 expand_shift (code, mode, shifted, amount, target, unsignedp)
1729      enum tree_code code;
1730      register enum machine_mode mode;
1731      rtx shifted;
1732      tree amount;
1733      register rtx target;
1734      int unsignedp;
1735 {
1736   register rtx op1, temp = 0;
1737   register int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1738   register int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1739   int try;
1740
1741   /* Previously detected shift-counts computed by NEGATE_EXPR
1742      and shifted in the other direction; but that does not work
1743      on all machines.  */
1744
1745   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1746
1747 #ifdef SHIFT_COUNT_TRUNCATED
1748   if (SHIFT_COUNT_TRUNCATED
1749       && GET_CODE (op1) == CONST_INT
1750       && (unsigned HOST_WIDE_INT) INTVAL (op1) >= GET_MODE_BITSIZE (mode))
1751     op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1752                    % GET_MODE_BITSIZE (mode));
1753 #endif
1754
1755   if (op1 == const0_rtx)
1756     return shifted;
1757
1758   for (try = 0; temp == 0 && try < 3; try++)
1759     {
1760       enum optab_methods methods;
1761
1762       if (try == 0)
1763         methods = OPTAB_DIRECT;
1764       else if (try == 1)
1765         methods = OPTAB_WIDEN;
1766       else
1767         methods = OPTAB_LIB_WIDEN;
1768
1769       if (rotate)
1770         {
1771           /* Widening does not work for rotation.  */
1772           if (methods == OPTAB_WIDEN)
1773             continue;
1774           else if (methods == OPTAB_LIB_WIDEN)
1775             {
1776               /* If we have been unable to open-code this by a rotation,
1777                  do it as the IOR of two shifts.  I.e., to rotate A
1778                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1779                  where C is the bitsize of A.
1780
1781                  It is theoretically possible that the target machine might
1782                  not be able to perform either shift and hence we would
1783                  be making two libcalls rather than just the one for the
1784                  shift (similarly if IOR could not be done).  We will allow
1785                  this extremely unlikely lossage to avoid complicating the
1786                  code below.  */
1787
1788               rtx subtarget = target == shifted ? 0 : target;
1789               rtx temp1;
1790               tree type = TREE_TYPE (amount);
1791               tree new_amount = make_tree (type, op1);
1792               tree other_amount
1793                 = fold (build (MINUS_EXPR, type,
1794                                convert (type,
1795                                         build_int_2 (GET_MODE_BITSIZE (mode),
1796                                                      0)),
1797                                amount));
1798
1799               shifted = force_reg (mode, shifted);
1800
1801               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1802                                    mode, shifted, new_amount, subtarget, 1);
1803               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1804                                     mode, shifted, other_amount, 0, 1);
1805               return expand_binop (mode, ior_optab, temp, temp1, target,
1806                                    unsignedp, methods);
1807             }
1808
1809           temp = expand_binop (mode,
1810                                left ? rotl_optab : rotr_optab,
1811                                shifted, op1, target, unsignedp, methods);
1812
1813           /* If we don't have the rotate, but we are rotating by a constant
1814              that is in range, try a rotate in the opposite direction.  */
1815
1816           if (temp == 0 && GET_CODE (op1) == CONST_INT
1817               && INTVAL (op1) > 0 && INTVAL (op1) < GET_MODE_BITSIZE (mode))
1818             temp = expand_binop (mode,
1819                                  left ? rotr_optab : rotl_optab,
1820                                  shifted,
1821                                  GEN_INT (GET_MODE_BITSIZE (mode)
1822                                           - INTVAL (op1)),
1823                                  target, unsignedp, methods);
1824         }
1825       else if (unsignedp)
1826         temp = expand_binop (mode,
1827                              left ? ashl_optab : lshr_optab,
1828                              shifted, op1, target, unsignedp, methods);
1829
1830       /* Do arithmetic shifts.
1831          Also, if we are going to widen the operand, we can just as well
1832          use an arithmetic right-shift instead of a logical one.  */
1833       if (temp == 0 && ! rotate
1834           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
1835         {
1836           enum optab_methods methods1 = methods;
1837
1838           /* If trying to widen a log shift to an arithmetic shift,
1839              don't accept an arithmetic shift of the same size.  */
1840           if (unsignedp)
1841             methods1 = OPTAB_MUST_WIDEN;
1842
1843           /* Arithmetic shift */
1844
1845           temp = expand_binop (mode,
1846                                left ? ashl_optab : ashr_optab,
1847                                shifted, op1, target, unsignedp, methods1);
1848         }
1849
1850       /* We used to try extzv here for logical right shifts, but that was
1851          only useful for one machine, the VAX, and caused poor code
1852          generation there for lshrdi3, so the code was deleted and a
1853          define_expand for lshrsi3 was added to vax.md.  */
1854     }
1855
1856   if (temp == 0)
1857     abort ();
1858   return temp;
1859 }
1860 \f
1861 enum alg_code { alg_zero, alg_m, alg_shift,
1862                   alg_add_t_m2, alg_sub_t_m2,
1863                   alg_add_factor, alg_sub_factor,
1864                   alg_add_t2_m, alg_sub_t2_m,
1865                   alg_add, alg_subtract, alg_factor, alg_shiftop };
1866
1867 /* This structure records a sequence of operations.
1868    `ops' is the number of operations recorded.
1869    `cost' is their total cost.
1870    The operations are stored in `op' and the corresponding
1871    logarithms of the integer coefficients in `log'.
1872
1873    These are the operations:
1874    alg_zero             total := 0;
1875    alg_m                total := multiplicand;
1876    alg_shift            total := total * coeff
1877    alg_add_t_m2         total := total + multiplicand * coeff;
1878    alg_sub_t_m2         total := total - multiplicand * coeff;
1879    alg_add_factor       total := total * coeff + total;
1880    alg_sub_factor       total := total * coeff - total;
1881    alg_add_t2_m         total := total * coeff + multiplicand;
1882    alg_sub_t2_m         total := total * coeff - multiplicand;
1883
1884    The first operand must be either alg_zero or alg_m.  */
1885
1886 struct algorithm
1887 {
1888   short cost;
1889   short ops;
1890   /* The size of the OP and LOG fields are not directly related to the
1891      word size, but the worst-case algorithms will be if we have few
1892      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
1893      In that case we will generate shift-by-2, add, shift-by-2, add,...,
1894      in total wordsize operations.  */
1895   enum alg_code op[MAX_BITS_PER_WORD];
1896   char log[MAX_BITS_PER_WORD];
1897 };
1898
1899 /* Compute and return the best algorithm for multiplying by T.
1900    The algorithm must cost less than cost_limit
1901    If retval.cost >= COST_LIMIT, no algorithm was found and all
1902    other field of the returned struct are undefined.  */
1903
1904 static void
1905 synth_mult (alg_out, t, cost_limit)
1906      struct algorithm *alg_out;
1907      unsigned HOST_WIDE_INT t;
1908      int cost_limit;
1909 {
1910   int m;
1911   struct algorithm *alg_in, *best_alg;
1912   unsigned int cost;
1913   unsigned HOST_WIDE_INT q;
1914
1915   /* Indicate that no algorithm is yet found.  If no algorithm
1916      is found, this value will be returned and indicate failure.  */
1917   alg_out->cost = cost_limit;
1918
1919   if (cost_limit <= 0)
1920     return;
1921
1922   /* t == 1 can be done in zero cost.  */
1923   if (t == 1)
1924     {
1925       alg_out->ops = 1;
1926       alg_out->cost = 0;
1927       alg_out->op[0] = alg_m;
1928       return;
1929     }
1930
1931   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
1932      fail now.  */
1933   if (t == 0)
1934     {
1935       if (zero_cost >= cost_limit)
1936         return;
1937       else
1938         {
1939           alg_out->ops = 1;
1940           alg_out->cost = zero_cost;
1941           alg_out->op[0] = alg_zero;
1942           return;
1943         }
1944     }
1945
1946   /* We'll be needing a couple extra algorithm structures now.  */
1947
1948   alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
1949   best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
1950
1951   /* If we have a group of zero bits at the low-order part of T, try
1952      multiplying by the remaining bits and then doing a shift.  */
1953
1954   if ((t & 1) == 0)
1955     {
1956       m = floor_log2 (t & -t);  /* m = number of low zero bits */
1957       q = t >> m;
1958       cost = shift_cost[m];
1959       synth_mult (alg_in, q, cost_limit - cost);
1960
1961       cost += alg_in->cost;
1962       if (cost < cost_limit)
1963         {
1964           struct algorithm *x;
1965           x = alg_in, alg_in = best_alg, best_alg = x;
1966           best_alg->log[best_alg->ops] = m;
1967           best_alg->op[best_alg->ops] = alg_shift;
1968           cost_limit = cost;
1969         }
1970     }
1971
1972   /* If we have an odd number, add or subtract one.  */
1973   if ((t & 1) != 0)
1974     {
1975       unsigned HOST_WIDE_INT w;
1976
1977       for (w = 1; (w & t) != 0; w <<= 1)
1978         ;
1979       if (w > 2
1980           /* Reject the case where t is 3.
1981              Thus we prefer addition in that case.  */
1982           && t != 3)
1983         {
1984           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
1985
1986           cost = add_cost;
1987           synth_mult (alg_in, t + 1, cost_limit - cost);
1988
1989           cost += alg_in->cost;
1990           if (cost < cost_limit)
1991             {
1992               struct algorithm *x;
1993               x = alg_in, alg_in = best_alg, best_alg = x;
1994               best_alg->log[best_alg->ops] = 0;
1995               best_alg->op[best_alg->ops] = alg_sub_t_m2;
1996               cost_limit = cost;
1997             }
1998         }
1999       else
2000         {
2001           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2002
2003           cost = add_cost;
2004           synth_mult (alg_in, t - 1, cost_limit - cost);
2005
2006           cost += alg_in->cost;
2007           if (cost < cost_limit)
2008             {
2009               struct algorithm *x;
2010               x = alg_in, alg_in = best_alg, best_alg = x;
2011               best_alg->log[best_alg->ops] = 0;
2012               best_alg->op[best_alg->ops] = alg_add_t_m2;
2013               cost_limit = cost;
2014             }
2015         }
2016     }
2017
2018   /* Look for factors of t of the form
2019      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2020      If we find such a factor, we can multiply by t using an algorithm that
2021      multiplies by q, shift the result by m and add/subtract it to itself.
2022
2023      We search for large factors first and loop down, even if large factors
2024      are less probable than small; if we find a large factor we will find a
2025      good sequence quickly, and therefore be able to prune (by decreasing
2026      COST_LIMIT) the search.  */
2027
2028   for (m = floor_log2 (t - 1); m >= 2; m--)
2029     {
2030       unsigned HOST_WIDE_INT d;
2031
2032       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2033       if (t % d == 0 && t > d)
2034         {
2035           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2036           synth_mult (alg_in, t / d, cost_limit - cost);
2037
2038           cost += alg_in->cost;
2039           if (cost < cost_limit)
2040             {
2041               struct algorithm *x;
2042               x = alg_in, alg_in = best_alg, best_alg = x;
2043               best_alg->log[best_alg->ops] = m;
2044               best_alg->op[best_alg->ops] = alg_add_factor;
2045               cost_limit = cost;
2046             }
2047           /* Other factors will have been taken care of in the recursion.  */
2048           break;
2049         }
2050
2051       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2052       if (t % d == 0 && t > d)
2053         {
2054           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2055           synth_mult (alg_in, t / d, cost_limit - cost);
2056
2057           cost += alg_in->cost;
2058           if (cost < cost_limit)
2059             {
2060               struct algorithm *x;
2061               x = alg_in, alg_in = best_alg, best_alg = x;
2062               best_alg->log[best_alg->ops] = m;
2063               best_alg->op[best_alg->ops] = alg_sub_factor;
2064               cost_limit = cost;
2065             }
2066           break;
2067         }
2068     }
2069
2070   /* Try shift-and-add (load effective address) instructions,
2071      i.e. do a*3, a*5, a*9.  */
2072   if ((t & 1) != 0)
2073     {
2074       q = t - 1;
2075       q = q & -q;
2076       m = exact_log2 (q);
2077       if (m >= 0)
2078         {
2079           cost = shiftadd_cost[m];
2080           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2081
2082           cost += alg_in->cost;
2083           if (cost < cost_limit)
2084             {
2085               struct algorithm *x;
2086               x = alg_in, alg_in = best_alg, best_alg = x;
2087               best_alg->log[best_alg->ops] = m;
2088               best_alg->op[best_alg->ops] = alg_add_t2_m;
2089               cost_limit = cost;
2090             }
2091         }
2092
2093       q = t + 1;
2094       q = q & -q;
2095       m = exact_log2 (q);
2096       if (m >= 0)
2097         {
2098           cost = shiftsub_cost[m];
2099           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2100
2101           cost += alg_in->cost;
2102           if (cost < cost_limit)
2103             {
2104               struct algorithm *x;
2105               x = alg_in, alg_in = best_alg, best_alg = x;
2106               best_alg->log[best_alg->ops] = m;
2107               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2108               cost_limit = cost;
2109             }
2110         }
2111     }
2112
2113   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2114      we have not found any algorithm.  */
2115   if (cost_limit == alg_out->cost)
2116     return;
2117
2118   /* If we are getting a too long sequence for `struct algorithm'
2119      to record, make this search fail.  */
2120   if (best_alg->ops == MAX_BITS_PER_WORD)
2121     return;
2122
2123   /* Copy the algorithm from temporary space to the space at alg_out.
2124      We avoid using structure assignment because the majority of
2125      best_alg is normally undefined, and this is a critical function.  */
2126   alg_out->ops = best_alg->ops + 1;
2127   alg_out->cost = cost_limit;
2128   bcopy ((char *) best_alg->op, (char *) alg_out->op,
2129          alg_out->ops * sizeof *alg_out->op);
2130   bcopy ((char *) best_alg->log, (char *) alg_out->log,
2131          alg_out->ops * sizeof *alg_out->log);
2132 }
2133 \f
2134 /* Perform a multiplication and return an rtx for the result.
2135    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2136    TARGET is a suggestion for where to store the result (an rtx).
2137
2138    We check specially for a constant integer as OP1.
2139    If you want this check for OP0 as well, then before calling
2140    you should swap the two operands if OP0 would be constant.  */
2141
2142 rtx
2143 expand_mult (mode, op0, op1, target, unsignedp)
2144      enum machine_mode mode;
2145      register rtx op0, op1, target;
2146      int unsignedp;
2147 {
2148   rtx const_op1 = op1;
2149
2150   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2151      less than or equal in size to `unsigned int' this doesn't matter.
2152      If the mode is larger than `unsigned int', then synth_mult works only
2153      if the constant value exactly fits in an `unsigned int' without any
2154      truncation.  This means that multiplying by negative values does
2155      not work; results are off by 2^32 on a 32 bit machine.  */
2156
2157   /* If we are multiplying in DImode, it may still be a win
2158      to try to work with shifts and adds.  */
2159   if (GET_CODE (op1) == CONST_DOUBLE
2160       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2161       && HOST_BITS_PER_INT >= BITS_PER_WORD
2162       && CONST_DOUBLE_HIGH (op1) == 0)
2163     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2164   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2165            && GET_CODE (op1) == CONST_INT
2166            && INTVAL (op1) < 0)
2167     const_op1 = 0;
2168
2169   /* We used to test optimize here, on the grounds that it's better to
2170      produce a smaller program when -O is not used.
2171      But this causes such a terrible slowdown sometimes
2172      that it seems better to use synth_mult always.  */
2173
2174   if (const_op1 && GET_CODE (const_op1) == CONST_INT)
2175     {
2176       struct algorithm alg;
2177       struct algorithm alg2;
2178       HOST_WIDE_INT val = INTVAL (op1);
2179       HOST_WIDE_INT val_so_far;
2180       rtx insn;
2181       int mult_cost;
2182       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2183
2184       /* Try to do the computation three ways: multiply by the negative of OP1
2185          and then negate, do the multiplication directly, or do multiplication
2186          by OP1 - 1.  */
2187
2188       mult_cost = rtx_cost (gen_rtx (MULT, mode, op0, op1), SET);
2189       mult_cost = MIN (12 * add_cost, mult_cost);
2190
2191       synth_mult (&alg, val, mult_cost);
2192
2193       /* This works only if the inverted value actually fits in an
2194          `unsigned int' */
2195       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2196         {
2197           synth_mult (&alg2, - val,
2198                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2199           if (alg2.cost + negate_cost < alg.cost)
2200             alg = alg2, variant = negate_variant;
2201         }
2202
2203       /* This proves very useful for division-by-constant.  */
2204       synth_mult (&alg2, val - 1,
2205                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2206       if (alg2.cost + add_cost < alg.cost)
2207         alg = alg2, variant = add_variant;
2208
2209       if (alg.cost < mult_cost)
2210         {
2211           /* We found something cheaper than a multiply insn.  */
2212           int opno;
2213           rtx accum, tem;
2214
2215           op0 = protect_from_queue (op0, 0);
2216
2217           /* Avoid referencing memory over and over.
2218              For speed, but also for correctness when mem is volatile.  */
2219           if (GET_CODE (op0) == MEM)
2220             op0 = force_reg (mode, op0);
2221
2222           /* ACCUM starts out either as OP0 or as a zero, depending on
2223              the first operation.  */
2224
2225           if (alg.op[0] == alg_zero)
2226             {
2227               accum = copy_to_mode_reg (mode, const0_rtx);
2228               val_so_far = 0;
2229             }
2230           else if (alg.op[0] == alg_m)
2231             {
2232               accum = copy_to_mode_reg (mode, op0);
2233               val_so_far = 1;
2234             }
2235           else
2236             abort ();
2237
2238           for (opno = 1; opno < alg.ops; opno++)
2239             {
2240               int log = alg.log[opno];
2241               int preserve = preserve_subexpressions_p ();
2242               rtx shift_subtarget = preserve ? 0 : accum;
2243               rtx add_target
2244                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2245                   ? target : 0);
2246               rtx accum_target = preserve ? 0 : accum;
2247
2248               switch (alg.op[opno])
2249                 {
2250                 case alg_shift:
2251                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2252                                         build_int_2 (log, 0), NULL_RTX, 0);
2253                   val_so_far <<= log;
2254                   break;
2255
2256                 case alg_add_t_m2:
2257                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2258                                       build_int_2 (log, 0), NULL_RTX, 0);
2259                   accum = force_operand (gen_rtx (PLUS, mode, accum, tem),
2260                                          add_target ? add_target : accum_target);
2261                   val_so_far += (HOST_WIDE_INT) 1 << log;
2262                   break;
2263
2264                 case alg_sub_t_m2:
2265                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2266                                       build_int_2 (log, 0), NULL_RTX, 0);
2267                   accum = force_operand (gen_rtx (MINUS, mode, accum, tem),
2268                                          add_target ? add_target : accum_target);
2269                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2270                   break;
2271
2272                 case alg_add_t2_m:
2273                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2274                                         build_int_2 (log, 0), shift_subtarget,
2275                                         0);
2276                   accum = force_operand (gen_rtx (PLUS, mode, accum, op0),
2277                                          add_target ? add_target : accum_target);
2278                   val_so_far = (val_so_far << log) + 1;
2279                   break;
2280
2281                 case alg_sub_t2_m:
2282                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2283                                         build_int_2 (log, 0), shift_subtarget,
2284                                         0);
2285                   accum = force_operand (gen_rtx (MINUS, mode, accum, op0),
2286                                          add_target ? add_target : accum_target);
2287                   val_so_far = (val_so_far << log) - 1;
2288                   break;
2289
2290                 case alg_add_factor:
2291                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2292                                       build_int_2 (log, 0), NULL_RTX, 0);
2293                   accum = force_operand (gen_rtx (PLUS, mode, accum, tem),
2294                                          add_target ? add_target : accum_target);
2295                   val_so_far += val_so_far << log;
2296                   break;
2297
2298                 case alg_sub_factor:
2299                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2300                                       build_int_2 (log, 0), NULL_RTX, 0);
2301                   accum = force_operand (gen_rtx (MINUS, mode, tem, accum),
2302                                          (add_target ? add_target
2303                                           : preserve ? 0 : tem));
2304                   val_so_far = (val_so_far << log) - val_so_far;
2305                   break;
2306
2307                 default:
2308                   abort ();;
2309                 }
2310
2311               /* Write a REG_EQUAL note on the last insn so that we can cse
2312                  multiplication sequences.  */
2313
2314               insn = get_last_insn ();
2315               REG_NOTES (insn)
2316                 = gen_rtx (EXPR_LIST, REG_EQUAL,
2317                            gen_rtx (MULT, mode, op0, GEN_INT (val_so_far)),
2318                            REG_NOTES (insn));
2319             }
2320
2321           if (variant == negate_variant)
2322             {
2323               val_so_far = - val_so_far;
2324               accum = expand_unop (mode, neg_optab, accum, target, 0);
2325             }
2326           else if (variant == add_variant)
2327             {
2328               val_so_far = val_so_far + 1;
2329               accum = force_operand (gen_rtx (PLUS, mode, accum, op0), target);
2330             }
2331
2332           if (val != val_so_far)
2333             abort ();
2334
2335           return accum;
2336         }
2337     }
2338
2339   /* This used to use umul_optab if unsigned, but for non-widening multiply
2340      there is no difference between signed and unsigned.  */
2341   op0 = expand_binop (mode, smul_optab,
2342                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2343   if (op0 == 0)
2344     abort ();
2345   return op0;
2346 }
2347 \f
2348 /* Return the smallest n such that 2**n >= X.  */
2349
2350 int
2351 ceil_log2 (x)
2352      unsigned HOST_WIDE_INT x;
2353 {
2354   return floor_log2 (x - 1) + 1;
2355 }
2356
2357 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2358    replace division by D, and put the least significant N bits of the result
2359    in *MULTIPLIER_PTR and return the most significant bit.
2360
2361    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2362    needed precision is in PRECISION (should be <= N).
2363
2364    PRECISION should be as small as possible so this function can choose
2365    multiplier more freely.
2366
2367    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2368    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2369
2370    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2371    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2372
2373 static
2374 unsigned HOST_WIDE_INT
2375 choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
2376      unsigned HOST_WIDE_INT d;
2377      int n;
2378      int precision;
2379      unsigned HOST_WIDE_INT *multiplier_ptr;
2380      int *post_shift_ptr;
2381      int *lgup_ptr;
2382 {
2383   unsigned HOST_WIDE_INT mhigh_hi, mhigh_lo;
2384   unsigned HOST_WIDE_INT mlow_hi, mlow_lo;
2385   int lgup, post_shift;
2386   int pow, pow2;
2387   unsigned HOST_WIDE_INT nh, nl, dummy1, dummy2;
2388
2389   /* lgup = ceil(log2(divisor)); */
2390   lgup = ceil_log2 (d);
2391
2392   if (lgup > n)
2393     abort ();
2394
2395   pow = n + lgup;
2396   pow2 = n + lgup - precision;
2397
2398   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2399     {
2400       /* We could handle this with some effort, but this case is much better
2401          handled directly with a scc insn, so rely on caller using that.  */
2402       abort ();
2403     }
2404
2405   /* mlow = 2^(N + lgup)/d */
2406  if (pow >= HOST_BITS_PER_WIDE_INT)
2407     {
2408       nh = (unsigned HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2409       nl = 0;
2410     }
2411   else
2412     {
2413       nh = 0;
2414       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2415     }
2416   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2417                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2418
2419   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2420   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2421     nh |= (unsigned HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2422   else
2423     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2424   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2425                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2426
2427   if (mhigh_hi && nh - d >= d)
2428     abort ();
2429   if (mhigh_hi > 1 || mlow_hi > 1)
2430     abort ();
2431   /* assert that mlow < mhigh.  */
2432   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2433     abort();
2434
2435   /* If precision == N, then mlow, mhigh exceed 2^N
2436      (but they do not exceed 2^(N+1)).  */
2437
2438   /* Reduce to lowest terms */
2439   for (post_shift = lgup; post_shift > 0; post_shift--)
2440     {
2441       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2442       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2443       if (ml_lo >= mh_lo)
2444         break;
2445
2446       mlow_hi = 0;
2447       mlow_lo = ml_lo;
2448       mhigh_hi = 0;
2449       mhigh_lo = mh_lo;
2450     }
2451
2452   *post_shift_ptr = post_shift;
2453   *lgup_ptr = lgup;
2454   if (n < HOST_BITS_PER_WIDE_INT)
2455     {
2456       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2457       *multiplier_ptr = mhigh_lo & mask;
2458       return mhigh_lo >= mask;
2459     }
2460   else
2461     {
2462       *multiplier_ptr = mhigh_lo;
2463       return mhigh_hi;
2464     }
2465 }
2466
2467 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2468    congruent to 1 (mod 2**N).  */
2469
2470 static unsigned HOST_WIDE_INT
2471 invert_mod2n (x, n)
2472      unsigned HOST_WIDE_INT x;
2473      int n;
2474 {
2475   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y. */
2476
2477   /* The algorithm notes that the choice y = x satisfies
2478      x*y == 1 mod 2^3, since x is assumed odd.
2479      Each iteration doubles the number of bits of significance in y.  */
2480
2481   unsigned HOST_WIDE_INT mask;
2482   unsigned HOST_WIDE_INT y = x;
2483   int nbit = 3;
2484
2485   mask = (n == HOST_BITS_PER_WIDE_INT
2486           ? ~(unsigned HOST_WIDE_INT) 0
2487           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2488
2489   while (nbit < n)
2490     {
2491       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2492       nbit *= 2;
2493     }
2494   return y;
2495 }
2496
2497 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2498    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2499    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2500    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2501    become signed.
2502
2503    The result is put in TARGET if that is convenient.
2504
2505    MODE is the mode of operation.  */
2506
2507 rtx
2508 expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
2509      enum machine_mode mode;
2510      register rtx adj_operand, op0, op1, target;
2511      int unsignedp;
2512 {
2513   rtx tem;
2514   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2515
2516   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2517                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2518                       NULL_RTX, 0);
2519   tem = expand_and (tem, op1, NULL_RTX);
2520   adj_operand = force_operand (gen_rtx (adj_code, mode, adj_operand, tem),
2521                                adj_operand);
2522
2523   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2524                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2525                       NULL_RTX, 0);
2526   tem = expand_and (tem, op0, NULL_RTX);
2527   target = force_operand (gen_rtx (adj_code, mode, adj_operand, tem), target);
2528
2529   return target;
2530 }
2531
2532 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2533    in TARGET if that is convenient, and return where the result is.  If the
2534    operation can not be performed, 0 is returned.
2535
2536    MODE is the mode of operation and result.
2537
2538    UNSIGNEDP nonzero means unsigned multiply.
2539
2540    MAX_COST is the total allowed cost for the expanded RTL.  */
2541
2542 rtx
2543 expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
2544      enum machine_mode mode;
2545      register rtx op0, target;
2546      unsigned HOST_WIDE_INT cnst1;
2547      int unsignedp;
2548      int max_cost;
2549 {
2550   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2551   optab mul_highpart_optab;
2552   optab moptab;
2553   rtx tem;
2554   int size = GET_MODE_BITSIZE (mode);
2555   rtx op1, wide_op1;
2556
2557   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2558   if (size > HOST_BITS_PER_WIDE_INT)
2559     abort ();
2560
2561   op1 = GEN_INT (cnst1);
2562
2563   if (GET_MODE_BITSIZE (wider_mode) <= HOST_BITS_PER_INT)
2564     wide_op1 = op1;
2565   else
2566     wide_op1
2567       = immed_double_const (cnst1,
2568                             (unsignedp
2569                              ? (HOST_WIDE_INT) 0
2570                              : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2571                             wider_mode);
2572
2573   /* expand_mult handles constant multiplication of word_mode
2574      or narrower.  It does a poor job for large modes.  */
2575   if (size < BITS_PER_WORD
2576       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2577     {
2578       /* We have to do this, since expand_binop doesn't do conversion for
2579          multiply.  Maybe change expand_binop to handle widening multiply?  */
2580       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2581
2582       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, unsignedp);
2583       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2584                           build_int_2 (size, 0), NULL_RTX, 1);
2585       return convert_modes (mode, wider_mode, tem, unsignedp);
2586     }
2587
2588   if (target == 0)
2589     target = gen_reg_rtx (mode);
2590
2591   /* Firstly, try using a multiplication insn that only generates the needed
2592      high part of the product, and in the sign flavor of unsignedp.  */
2593   if (mul_highpart_cost[(int) mode] < max_cost)
2594     {
2595       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2596       target = expand_binop (mode, mul_highpart_optab,
2597                              op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2598       if (target)
2599         return target;
2600     }
2601
2602   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2603      Need to adjust the result after the multiplication.  */
2604   if (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost < max_cost)
2605     {
2606       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2607       target = expand_binop (mode, mul_highpart_optab,
2608                              op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2609       if (target)
2610         /* We used the wrong signedness.  Adjust the result.  */
2611         return expand_mult_highpart_adjust (mode, target, op0,
2612                                             op1, target, unsignedp);
2613     }
2614
2615   /* Try widening multiplication.  */
2616   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2617   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2618       && mul_widen_cost[(int) wider_mode] < max_cost)
2619     {
2620       op1 = force_reg (mode, op1);
2621       goto try;
2622     }
2623
2624   /* Try widening the mode and perform a non-widening multiplication.  */
2625   moptab = smul_optab;
2626   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2627       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2628     {
2629       op1 = wide_op1;
2630       goto try;
2631     }
2632
2633   /* Try widening multiplication of opposite signedness, and adjust.  */
2634   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2635   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2636       && (mul_widen_cost[(int) wider_mode]
2637           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2638     {
2639       rtx regop1 = force_reg (mode, op1);
2640       tem = expand_binop (wider_mode, moptab, op0, regop1,
2641                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2642       if (tem != 0)
2643         {
2644           /* Extract the high half of the just generated product.  */
2645           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2646                               build_int_2 (size, 0), NULL_RTX, 1);
2647           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2648           /* We used the wrong signedness.  Adjust the result.  */
2649           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2650                                               target, unsignedp);
2651         }
2652     }
2653
2654   return 0;
2655
2656  try:
2657   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2658   tem = expand_binop (wider_mode, moptab, op0, op1,
2659                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2660   if (tem == 0)
2661     return 0;
2662
2663   /* Extract the high half of the just generated product.  */
2664   if (mode == word_mode)
2665     {
2666       return gen_highpart (mode, tem);
2667     }
2668   else
2669     {
2670       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2671                           build_int_2 (size, 0), NULL_RTX, 1);
2672       return convert_modes (mode, wider_mode, tem, unsignedp);
2673     }
2674 }
2675 \f
2676 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2677    if that is convenient, and returning where the result is.
2678    You may request either the quotient or the remainder as the result;
2679    specify REM_FLAG nonzero to get the remainder.
2680
2681    CODE is the expression code for which kind of division this is;
2682    it controls how rounding is done.  MODE is the machine mode to use.
2683    UNSIGNEDP nonzero means do unsigned division.  */
2684
2685 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2686    and then correct it by or'ing in missing high bits
2687    if result of ANDI is nonzero.
2688    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2689    This could optimize to a bfexts instruction.
2690    But C doesn't use these operations, so their optimizations are
2691    left for later.  */
2692
2693 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2694
2695 rtx
2696 expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
2697      int rem_flag;
2698      enum tree_code code;
2699      enum machine_mode mode;
2700      register rtx op0, op1, target;
2701      int unsignedp;
2702 {
2703   enum machine_mode compute_mode;
2704   register rtx tquotient;
2705   rtx quotient = 0, remainder = 0;
2706   rtx last;
2707   int size;
2708   rtx insn, set;
2709   optab optab1, optab2;
2710   int op1_is_constant, op1_is_pow2;
2711   int max_cost, extra_cost;
2712
2713   op1_is_constant = GET_CODE (op1) == CONST_INT;
2714   op1_is_pow2 = (op1_is_constant
2715                  && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2716                       || EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))));
2717
2718   /*
2719      This is the structure of expand_divmod:
2720
2721      First comes code to fix up the operands so we can perform the operations
2722      correctly and efficiently.
2723
2724      Second comes a switch statement with code specific for each rounding mode.
2725      For some special operands this code emits all RTL for the desired
2726      operation, for other cases, it generates only a quotient and stores it in
2727      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2728      to indicate that it has not done anything.
2729
2730      Last comes code that finishes the operation.  If QUOTIENT is set and
2731      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2732      QUOTIENT is not set, it is computed using trunc rounding.
2733
2734      We try to generate special code for division and remainder when OP1 is a
2735      constant.  If |OP1| = 2**n we can use shifts and some other fast
2736      operations.  For other values of OP1, we compute a carefully selected
2737      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2738      by m.
2739
2740      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2741      half of the product.  Different strategies for generating the product are
2742      implemented in expand_mult_highpart.
2743
2744      If what we actually want is the remainder, we generate that by another
2745      by-constant multiplication and a subtraction.  */
2746
2747   /* We shouldn't be called with OP1 == const1_rtx, but some of the
2748      code below will malfunction if we are, so check here and handle
2749      the special case if so.  */
2750   if (op1 == const1_rtx)
2751     return rem_flag ? const0_rtx : op0;
2752
2753   if (target
2754       /* Don't use the function value register as a target
2755          since we have to read it as well as write it,
2756          and function-inlining gets confused by this.  */
2757       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
2758           /* Don't clobber an operand while doing a multi-step calculation.  */
2759           || ((rem_flag || op1_is_constant)
2760               && (reg_mentioned_p (target, op0)
2761                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
2762           || reg_mentioned_p (target, op1)
2763           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
2764     target = 0;
2765
2766   /* Get the mode in which to perform this computation.  Normally it will
2767      be MODE, but sometimes we can't do the desired operation in MODE.
2768      If so, pick a wider mode in which we can do the operation.  Convert
2769      to that mode at the start to avoid repeated conversions.
2770
2771      First see what operations we need.  These depend on the expression
2772      we are evaluating.  (We assume that divxx3 insns exist under the
2773      same conditions that modxx3 insns and that these insns don't normally
2774      fail.  If these assumptions are not correct, we may generate less
2775      efficient code in some cases.)
2776
2777      Then see if we find a mode in which we can open-code that operation
2778      (either a division, modulus, or shift).  Finally, check for the smallest
2779      mode for which we can do the operation with a library call.  */
2780
2781   /* We might want to refine this now that we have division-by-constant
2782      optimization.  Since expand_mult_highpart tries so many variants, it is
2783      not straightforward to generalize this.  Maybe we should make an array
2784      of possible modes in init_expmed?  Save this for GCC 2.7.  */
2785
2786   optab1 = (op1_is_pow2 ? (unsignedp ? lshr_optab : ashr_optab)
2787             : (unsignedp ? udiv_optab : sdiv_optab));
2788   optab2 = (op1_is_pow2 ? optab1 : (unsignedp ? udivmod_optab : sdivmod_optab));
2789
2790   for (compute_mode = mode; compute_mode != VOIDmode;
2791        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
2792     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
2793         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
2794       break;
2795
2796   if (compute_mode == VOIDmode)
2797     for (compute_mode = mode; compute_mode != VOIDmode;
2798          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
2799       if (optab1->handlers[(int) compute_mode].libfunc
2800           || optab2->handlers[(int) compute_mode].libfunc)
2801         break;
2802
2803   /* If we still couldn't find a mode, use MODE, but we'll probably abort
2804      in expand_binop.  */
2805   if (compute_mode == VOIDmode)
2806     compute_mode = mode;
2807
2808   if (target && GET_MODE (target) == compute_mode)
2809     tquotient = target;
2810   else
2811     tquotient = gen_reg_rtx (compute_mode);
2812
2813   size = GET_MODE_BITSIZE (compute_mode);
2814 #if 0
2815   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
2816      (mode), and thereby get better code when OP1 is a constant.  Do that
2817      later.  It will require going over all usages of SIZE below.  */
2818   size = GET_MODE_BITSIZE (mode);
2819 #endif
2820
2821   max_cost = div_cost[(int) compute_mode]
2822     - (rem_flag ? mul_cost[(int) compute_mode] + add_cost : 0);
2823
2824   /* Now convert to the best mode to use.  */
2825   if (compute_mode != mode)
2826     {
2827       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
2828       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
2829     }
2830
2831   /* If one of the operands is a volatile MEM, copy it into a register.  */
2832
2833   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
2834     op0 = force_reg (compute_mode, op0);
2835   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
2836     op1 = force_reg (compute_mode, op1);
2837
2838   /* If we need the remainder or if OP1 is constant, we need to
2839      put OP0 in a register in case it has any queued subexpressions.  */
2840   if (rem_flag || op1_is_constant)
2841     op0 = force_reg (compute_mode, op0);
2842
2843   last = get_last_insn ();
2844
2845   /* Promote floor rounding to trunc rounding for unsigned operations.  */
2846   if (unsignedp)
2847     {
2848       if (code == FLOOR_DIV_EXPR)
2849         code = TRUNC_DIV_EXPR;
2850       if (code == FLOOR_MOD_EXPR)
2851         code = TRUNC_MOD_EXPR;
2852     }
2853
2854   if (op1 != const0_rtx)
2855     switch (code)
2856       {
2857       case TRUNC_MOD_EXPR:
2858       case TRUNC_DIV_EXPR:
2859         if (op1_is_constant)
2860           {
2861             if (unsignedp)
2862               {
2863                 unsigned HOST_WIDE_INT mh, ml;
2864                 int pre_shift, post_shift;
2865                 int dummy;
2866                 unsigned HOST_WIDE_INT d = INTVAL (op1);
2867
2868                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
2869                   {
2870                     pre_shift = floor_log2 (d);
2871                     if (rem_flag)
2872                       {
2873                         remainder =
2874                           expand_binop (compute_mode, and_optab, op0,
2875                                         GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
2876                                         remainder, 1,
2877                                         OPTAB_LIB_WIDEN);
2878                         if (remainder)
2879                           return gen_lowpart (mode, remainder);
2880                       }
2881                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
2882                                              build_int_2 (pre_shift, 0),
2883                                              tquotient, 1);
2884                   }
2885                 else if (size <= HOST_BITS_PER_WIDE_INT)
2886                   {
2887                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
2888                       {
2889                         /* Most significant bit of divisor is set; emit an scc
2890                            insn.  */
2891                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
2892                                                     compute_mode, 1, 1);
2893                         if (quotient == 0)
2894                           goto fail1;
2895                       }
2896                     else
2897                       {
2898                         /* Find a suitable multiplier and right shift count
2899                            instead of multiplying with D.  */
2900
2901                         mh = choose_multiplier (d, size, size,
2902                                                 &ml, &post_shift, &dummy);
2903
2904                         /* If the suggested multiplier is more than SIZE bits,
2905                            we can do better for even divisors, using an
2906                            initial right shift.  */
2907                         if (mh != 0 && (d & 1) == 0)
2908                           {
2909                             pre_shift = floor_log2 (d & -d);
2910                             mh = choose_multiplier (d >> pre_shift, size,
2911                                                     size - pre_shift,
2912                                                     &ml, &post_shift, &dummy);
2913                             if (mh)
2914                               abort ();
2915                           }
2916                         else
2917                           pre_shift = 0;
2918
2919                         if (mh != 0)
2920                           {
2921                             rtx t1, t2, t3, t4;
2922
2923                             extra_cost = (shift_cost[post_shift - 1]
2924                                           + shift_cost[1] + 2 * add_cost);
2925                             t1 = expand_mult_highpart (compute_mode, op0, ml,
2926                                                        NULL_RTX, 1,
2927                                                        max_cost - extra_cost);
2928                             if (t1 == 0)
2929                               goto fail1;
2930                             t2 = force_operand (gen_rtx (MINUS, compute_mode,
2931                                                          op0, t1),
2932                                                 NULL_RTX);
2933                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
2934                                                build_int_2 (1, 0), NULL_RTX,1);
2935                             t4 = force_operand (gen_rtx (PLUS, compute_mode,
2936                                                          t1, t3),
2937                                                 NULL_RTX);
2938                             quotient =
2939                               expand_shift (RSHIFT_EXPR, compute_mode, t4,
2940                                             build_int_2 (post_shift - 1, 0),
2941                                             tquotient, 1);
2942                           }
2943                         else
2944                           {
2945                             rtx t1, t2;
2946
2947                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
2948                                                build_int_2 (pre_shift, 0),
2949                                                NULL_RTX, 1);
2950                             extra_cost = (shift_cost[pre_shift]
2951                                           + shift_cost[post_shift]);
2952                             t2 = expand_mult_highpart (compute_mode, t1, ml,
2953                                                        NULL_RTX, 1,
2954                                                        max_cost - extra_cost);
2955                             if (t2 == 0)
2956                               goto fail1;
2957                             quotient =
2958                               expand_shift (RSHIFT_EXPR, compute_mode, t2,
2959                                             build_int_2 (post_shift, 0),
2960                                             tquotient, 1);
2961                           }
2962                       }
2963                   }
2964                 else            /* Too wide mode to use tricky code */
2965                   break;
2966
2967                 insn = get_last_insn ();
2968                 if (insn != last
2969                     && (set = single_set (insn)) != 0
2970                     && SET_DEST (set) == quotient)
2971                   REG_NOTES (insn)
2972                     = gen_rtx (EXPR_LIST, REG_EQUAL,
2973                                gen_rtx (UDIV, compute_mode, op0, op1),
2974                                REG_NOTES (insn));
2975               }
2976             else                /* TRUNC_DIV, signed */
2977               {
2978                 unsigned HOST_WIDE_INT ml;
2979                 int lgup, post_shift;
2980                 HOST_WIDE_INT d = INTVAL (op1);
2981                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
2982
2983                 /* n rem d = n rem -d */
2984                 if (rem_flag && d < 0)
2985                   {
2986                     d = abs_d;
2987                     op1 = GEN_INT (abs_d);
2988                   }
2989
2990                 if (d == 1)
2991                   quotient = op0;
2992                 else if (d == -1)
2993                   quotient = expand_unop (compute_mode, neg_optab, op0,
2994                                           tquotient, 0);
2995                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
2996                   {
2997                     /* This case is not handled correctly below.  */
2998                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
2999                                                 compute_mode, 1, 1);
3000                     if (quotient == 0)
3001                       goto fail1;
3002                   }
3003                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3004                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap))
3005                   ;
3006                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3007                   {
3008                     lgup = floor_log2 (abs_d);
3009                     if (abs_d != 2 && BRANCH_COST < 3)
3010                       {
3011                         rtx label = gen_label_rtx ();
3012                         rtx t1;
3013
3014                         t1 = copy_to_mode_reg (compute_mode, op0);
3015                         emit_cmp_insn (t1, const0_rtx, GE,
3016                                        NULL_RTX, compute_mode, 0, 0);
3017                         emit_jump_insn (gen_bge (label));
3018                         expand_inc (t1, GEN_INT (abs_d - 1));
3019                         emit_label (label);
3020                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3021                                                  build_int_2 (lgup, 0),
3022                                                  tquotient, 0);
3023                       }
3024                     else
3025                       {
3026                         rtx t1, t2, t3;
3027                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3028                                            build_int_2 (size - 1, 0),
3029                                            NULL_RTX, 0);
3030                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3031                                            build_int_2 (size - lgup, 0),
3032                                            NULL_RTX, 1);
3033                         t3 = force_operand (gen_rtx (PLUS, compute_mode,
3034                                                      op0, t2),
3035                                             NULL_RTX);
3036                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3037                                                  build_int_2 (lgup, 0),
3038                                                  tquotient, 0);
3039                       }
3040
3041                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3042                        the quotient.  */
3043                     if (d < 0)
3044                       {
3045                         insn = get_last_insn ();
3046                         if (insn != last
3047                             && (set = single_set (insn)) != 0
3048                             && SET_DEST (set) == quotient)
3049                           REG_NOTES (insn)
3050                             = gen_rtx (EXPR_LIST, REG_EQUAL,
3051                                        gen_rtx (DIV, compute_mode, op0,
3052                                                 GEN_INT (abs_d)),
3053                                        REG_NOTES (insn));
3054
3055                         quotient = expand_unop (compute_mode, neg_optab,
3056                                                 quotient, quotient, 0);
3057                       }
3058                   }
3059                 else if (size <= HOST_BITS_PER_WIDE_INT)
3060                   {
3061                     choose_multiplier (abs_d, size, size - 1,
3062                                        &ml, &post_shift, &lgup);
3063                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3064                       {
3065                         rtx t1, t2, t3;
3066
3067                         extra_cost = (shift_cost[post_shift]
3068                                       + shift_cost[size - 1] + add_cost);
3069                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3070                                                    NULL_RTX, 0,
3071                                                    max_cost - extra_cost);
3072                         if (t1 == 0)
3073                           goto fail1;
3074                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3075                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3076                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3077                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3078                         if (d < 0)
3079                           quotient = force_operand (gen_rtx (MINUS, compute_mode, t3, t2),
3080                                                     tquotient);
3081                         else
3082                           quotient = force_operand (gen_rtx (MINUS, compute_mode, t2, t3),
3083                                                     tquotient);
3084                       }
3085                     else
3086                       {
3087                         rtx t1, t2, t3, t4;
3088
3089                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3090                         extra_cost = (shift_cost[post_shift]
3091                                       + shift_cost[size - 1] + 2 * add_cost);
3092                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3093                                                    NULL_RTX, 0,
3094                                                    max_cost - extra_cost);
3095                         if (t1 == 0)
3096                           goto fail1;
3097                         t2 = force_operand (gen_rtx (PLUS, compute_mode, t1, op0),
3098                                             NULL_RTX);
3099                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3100                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3101                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3102                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3103                         if (d < 0)
3104                           quotient = force_operand (gen_rtx (MINUS, compute_mode, t4, t3),
3105                                                     tquotient);
3106                         else
3107                           quotient = force_operand (gen_rtx (MINUS, compute_mode, t3, t4),
3108                                                     tquotient);
3109                       }
3110                   }
3111                 else            /* Too wide mode to use tricky code */
3112                   break;
3113
3114                 insn = get_last_insn ();
3115                 if (insn != last
3116                     && (set = single_set (insn)) != 0
3117                     && SET_DEST (set) == quotient)
3118                   REG_NOTES (insn)
3119                     = gen_rtx (EXPR_LIST, REG_EQUAL,
3120                                gen_rtx (DIV, compute_mode, op0, op1),
3121                                REG_NOTES (insn));
3122               }
3123             break;
3124           }
3125       fail1:
3126         delete_insns_since (last);
3127         break;
3128
3129       case FLOOR_DIV_EXPR:
3130       case FLOOR_MOD_EXPR:
3131       /* We will come here only for signed operations.  */
3132         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3133           {
3134             unsigned HOST_WIDE_INT mh, ml;
3135             int pre_shift, lgup, post_shift;
3136             HOST_WIDE_INT d = INTVAL (op1);
3137
3138             if (d > 0)
3139               {
3140                 /* We could just as easily deal with negative constants here,
3141                    but it does not seem worth the trouble for GCC 2.6.  */
3142                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3143                   {
3144                     pre_shift = floor_log2 (d);
3145                     if (rem_flag)
3146                       {
3147                         remainder = expand_binop (compute_mode, and_optab, op0,
3148                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3149                                                   remainder, 0, OPTAB_LIB_WIDEN);
3150                         if (remainder)
3151                           return gen_lowpart (mode, remainder);
3152                       }
3153                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3154                                              build_int_2 (pre_shift, 0),
3155                                              tquotient, 0);
3156                   }
3157                 else
3158                   {
3159                     rtx t1, t2, t3, t4;
3160
3161                     mh = choose_multiplier (d, size, size - 1,
3162                                             &ml, &post_shift, &lgup);
3163                     if (mh)
3164                       abort ();
3165
3166                     t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3167                                        build_int_2 (size - 1, 0), NULL_RTX, 0);
3168                     t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3169                                        NULL_RTX, 0, OPTAB_WIDEN);
3170                     extra_cost = (shift_cost[post_shift]
3171                                   + shift_cost[size - 1] + 2 * add_cost);
3172                     t3 = expand_mult_highpart (compute_mode, t2, ml,
3173                                                NULL_RTX, 1,
3174                                                max_cost - extra_cost);
3175                     if (t3 != 0)
3176                       {
3177                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3178                                            build_int_2 (post_shift, 0),
3179                                            NULL_RTX, 1);
3180                         quotient = expand_binop (compute_mode, xor_optab,
3181                                                  t4, t1, tquotient, 0,
3182                                                  OPTAB_WIDEN);
3183                       }
3184                   }
3185               }
3186             else
3187               {
3188                 rtx nsign, t1, t2, t3, t4;
3189                 t1 = force_operand (gen_rtx (PLUS, compute_mode,
3190                                              op0, constm1_rtx), NULL_RTX);
3191                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3192                                    0, OPTAB_WIDEN);
3193                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3194                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3195                 t3 = force_operand (gen_rtx (MINUS, compute_mode, t1, nsign),
3196                                     NULL_RTX);
3197                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3198                                     NULL_RTX, 0);
3199                 if (t4)
3200                   {
3201                     rtx t5;
3202                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3203                                       NULL_RTX, 0);
3204                     quotient = force_operand (gen_rtx (PLUS, compute_mode,
3205                                                        t4, t5),
3206                                               tquotient);
3207                   }
3208               }
3209           }
3210
3211         if (quotient != 0)
3212           break;
3213         delete_insns_since (last);
3214
3215         /* Try using an instruction that produces both the quotient and
3216            remainder, using truncation.  We can easily compensate the quotient
3217            or remainder to get floor rounding, once we have the remainder.
3218            Notice that we compute also the final remainder value here,
3219            and return the result right away.  */
3220         if (target == 0 || GET_MODE (target) != compute_mode)
3221           target = gen_reg_rtx (compute_mode);
3222
3223         if (rem_flag)
3224           {
3225             remainder
3226               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3227             quotient = gen_reg_rtx (compute_mode);
3228           }
3229         else
3230           {
3231             quotient
3232               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3233             remainder = gen_reg_rtx (compute_mode);
3234           }
3235
3236         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3237                                  quotient, remainder, 0))
3238           {
3239             /* This could be computed with a branch-less sequence.
3240                Save that for later.  */
3241             rtx tem;
3242             rtx label = gen_label_rtx ();
3243             emit_cmp_insn (remainder, const0_rtx, EQ, NULL_RTX,
3244                            compute_mode, 0, 0);
3245             emit_jump_insn (gen_beq (label));
3246             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3247                                 NULL_RTX, 0, OPTAB_WIDEN);
3248             emit_cmp_insn (tem, const0_rtx, GE, NULL_RTX, compute_mode, 0, 0);
3249             emit_jump_insn (gen_bge (label));
3250             expand_dec (quotient, const1_rtx);
3251             expand_inc (remainder, op1);
3252             emit_label (label);
3253             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3254           }
3255
3256         /* No luck with division elimination or divmod.  Have to do it
3257            by conditionally adjusting op0 *and* the result.  */
3258         {
3259           rtx label1, label2, label3, label4, label5;
3260           rtx adjusted_op0;
3261           rtx tem;
3262
3263           quotient = gen_reg_rtx (compute_mode);
3264           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3265           label1 = gen_label_rtx ();
3266           label2 = gen_label_rtx ();
3267           label3 = gen_label_rtx ();
3268           label4 = gen_label_rtx ();
3269           label5 = gen_label_rtx ();
3270           emit_cmp_insn (op1, const0_rtx, LT, NULL_RTX, compute_mode, 0, 0);
3271           emit_jump_insn (gen_blt (label2));
3272           emit_cmp_insn (adjusted_op0, const0_rtx, LT, NULL_RTX,
3273                          compute_mode, 0, 0);
3274           emit_jump_insn (gen_blt (label1));
3275           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3276                               quotient, 0, OPTAB_LIB_WIDEN);
3277           if (tem != quotient)
3278             emit_move_insn (quotient, tem);
3279           emit_jump_insn (gen_jump (label5));
3280           emit_barrier ();
3281           emit_label (label1);
3282           expand_inc (adjusted_op0, const1_rtx);
3283           emit_jump_insn (gen_jump (label4));
3284           emit_barrier ();
3285           emit_label (label2);
3286           emit_cmp_insn (adjusted_op0, const0_rtx, GT, NULL_RTX,
3287                          compute_mode, 0, 0);
3288           emit_jump_insn (gen_bgt (label3));
3289           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3290                               quotient, 0, OPTAB_LIB_WIDEN);
3291           if (tem != quotient)
3292             emit_move_insn (quotient, tem);
3293           emit_jump_insn (gen_jump (label5));
3294           emit_barrier ();
3295           emit_label (label3);
3296           expand_dec (adjusted_op0, const1_rtx);
3297           emit_label (label4);
3298           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3299                               quotient, 0, OPTAB_LIB_WIDEN);
3300           if (tem != quotient)
3301             emit_move_insn (quotient, tem);
3302           expand_dec (quotient, const1_rtx);
3303           emit_label (label5);
3304         }
3305         break;
3306
3307       case CEIL_DIV_EXPR:
3308       case CEIL_MOD_EXPR:
3309         if (unsignedp)
3310           {
3311             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3312               {
3313                 rtx t1, t2, t3;
3314                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3315                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3316                                    build_int_2 (floor_log2 (d), 0),
3317                                    tquotient, 1);
3318                 t2 = expand_binop (compute_mode, and_optab, op0,
3319                                    GEN_INT (d - 1),
3320                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3321                 t3 = gen_reg_rtx (compute_mode);
3322                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3323                                       compute_mode, 1, 1);
3324                 if (t3 == 0)
3325                   {
3326                     rtx lab;
3327                     lab = gen_label_rtx ();
3328                     emit_cmp_insn (t2, const0_rtx, EQ, NULL_RTX,
3329                                    compute_mode, 0, 0);
3330                     emit_jump_insn (gen_beq (lab));
3331                     expand_inc (t1, const1_rtx);
3332                     emit_label (lab);
3333                     quotient = t1;
3334                   }
3335                 else
3336                   quotient = force_operand (gen_rtx (PLUS, compute_mode,
3337                                                      t1, t3),
3338                                             tquotient);
3339                 break;
3340               }
3341
3342             /* Try using an instruction that produces both the quotient and
3343                remainder, using truncation.  We can easily compensate the
3344                quotient or remainder to get ceiling rounding, once we have the
3345                remainder.  Notice that we compute also the final remainder
3346                value here, and return the result right away.  */
3347             if (target == 0 || GET_MODE (target) != compute_mode)
3348               target = gen_reg_rtx (compute_mode);
3349
3350             if (rem_flag)
3351               {
3352                 remainder = (GET_CODE (target) == REG
3353                              ? target : gen_reg_rtx (compute_mode));
3354                 quotient = gen_reg_rtx (compute_mode);
3355               }
3356             else
3357               {
3358                 quotient = (GET_CODE (target) == REG
3359                             ? target : gen_reg_rtx (compute_mode));
3360                 remainder = gen_reg_rtx (compute_mode);
3361               }
3362
3363             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3364                                      remainder, 1))
3365               {
3366                 /* This could be computed with a branch-less sequence.
3367                    Save that for later.  */
3368                 rtx label = gen_label_rtx ();
3369                 emit_cmp_insn (remainder, const0_rtx, EQ, NULL_RTX,
3370                                compute_mode, 0, 0);
3371                 emit_jump_insn (gen_beq (label));
3372                 expand_inc (quotient, const1_rtx);
3373                 expand_dec (remainder, op1);
3374                 emit_label (label);
3375                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3376               }
3377
3378             /* No luck with division elimination or divmod.  Have to do it
3379                by conditionally adjusting op0 *and* the result.  */
3380             {
3381               rtx label1, label2;
3382               rtx adjusted_op0, tem;
3383
3384               quotient = gen_reg_rtx (compute_mode);
3385               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3386               label1 = gen_label_rtx ();
3387               label2 = gen_label_rtx ();
3388               emit_cmp_insn (adjusted_op0, const0_rtx, NE, NULL_RTX,
3389                              compute_mode, 0, 0);
3390               emit_jump_insn (gen_bne (label1));
3391               emit_move_insn  (quotient, const0_rtx);
3392               emit_jump_insn (gen_jump (label2));
3393               emit_barrier ();
3394               emit_label (label1);
3395               expand_dec (adjusted_op0, const1_rtx);
3396               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3397                                   quotient, 1, OPTAB_LIB_WIDEN);
3398               if (tem != quotient)
3399                 emit_move_insn (quotient, tem);
3400               expand_inc (quotient, const1_rtx);
3401               emit_label (label2);
3402             }
3403           }
3404         else /* signed */
3405           {
3406             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3407                 && INTVAL (op1) >= 0)
3408               {
3409                 /* This is extremely similar to the code for the unsigned case
3410                    above.  For 2.7 we should merge these variants, but for
3411                    2.6.1 I don't want to touch the code for unsigned since that
3412                    get used in C.  The signed case will only be used by other
3413                    languages (Ada).  */
3414
3415                 rtx t1, t2, t3;
3416                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3417                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3418                                    build_int_2 (floor_log2 (d), 0),
3419                                    tquotient, 0);
3420                 t2 = expand_binop (compute_mode, and_optab, op0,
3421                                    GEN_INT (d - 1),
3422                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3423                 t3 = gen_reg_rtx (compute_mode);
3424                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3425                                       compute_mode, 1, 1);
3426                 if (t3 == 0)
3427                   {
3428                     rtx lab;
3429                     lab = gen_label_rtx ();
3430                     emit_cmp_insn (t2, const0_rtx, EQ, NULL_RTX,
3431                                    compute_mode, 0, 0);
3432                     emit_jump_insn (gen_beq (lab));
3433                     expand_inc (t1, const1_rtx);
3434                     emit_label (lab);
3435                     quotient = t1;
3436                   }
3437                 else
3438                   quotient = force_operand (gen_rtx (PLUS, compute_mode,
3439                                                      t1, t3),
3440                                             tquotient);
3441                 break;
3442               }
3443
3444             /* Try using an instruction that produces both the quotient and
3445                remainder, using truncation.  We can easily compensate the
3446                quotient or remainder to get ceiling rounding, once we have the
3447                remainder.  Notice that we compute also the final remainder
3448                value here, and return the result right away.  */
3449             if (target == 0 || GET_MODE (target) != compute_mode)
3450               target = gen_reg_rtx (compute_mode);
3451             if (rem_flag)
3452               {
3453                 remainder= (GET_CODE (target) == REG
3454                             ? target : gen_reg_rtx (compute_mode));
3455                 quotient = gen_reg_rtx (compute_mode);
3456               }
3457             else
3458               {
3459                 quotient = (GET_CODE (target) == REG
3460                             ? target : gen_reg_rtx (compute_mode));
3461                 remainder = gen_reg_rtx (compute_mode);
3462               }
3463
3464             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3465                                      remainder, 0))
3466               {
3467                 /* This could be computed with a branch-less sequence.
3468                    Save that for later.  */
3469                 rtx tem;
3470                 rtx label = gen_label_rtx ();
3471                 emit_cmp_insn (remainder, const0_rtx, EQ, NULL_RTX,
3472                                compute_mode, 0, 0);
3473                 emit_jump_insn (gen_beq (label));
3474                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3475                                     NULL_RTX, 0, OPTAB_WIDEN);
3476                 emit_cmp_insn (tem, const0_rtx, LT, NULL_RTX,
3477                                compute_mode, 0, 0);
3478                 emit_jump_insn (gen_blt (label));
3479                 expand_inc (quotient, const1_rtx);
3480                 expand_dec (remainder, op1);
3481                 emit_label (label);
3482                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3483               }
3484
3485             /* No luck with division elimination or divmod.  Have to do it
3486                by conditionally adjusting op0 *and* the result.  */
3487             {
3488               rtx label1, label2, label3, label4, label5;
3489               rtx adjusted_op0;
3490               rtx tem;
3491
3492               quotient = gen_reg_rtx (compute_mode);
3493               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3494               label1 = gen_label_rtx ();
3495               label2 = gen_label_rtx ();
3496               label3 = gen_label_rtx ();
3497               label4 = gen_label_rtx ();
3498               label5 = gen_label_rtx ();
3499               emit_cmp_insn (op1, const0_rtx, LT, NULL_RTX,
3500                              compute_mode, 0, 0);
3501               emit_jump_insn (gen_blt (label2));
3502               emit_cmp_insn (adjusted_op0, const0_rtx, GT, NULL_RTX,
3503                              compute_mode, 0, 0);
3504               emit_jump_insn (gen_bgt (label1));
3505               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3506                                   quotient, 0, OPTAB_LIB_WIDEN);
3507               if (tem != quotient)
3508                 emit_move_insn (quotient, tem);
3509               emit_jump_insn (gen_jump (label5));
3510               emit_barrier ();
3511               emit_label (label1);
3512               expand_dec (adjusted_op0, const1_rtx);
3513               emit_jump_insn (gen_jump (label4));
3514               emit_barrier ();
3515               emit_label (label2);
3516               emit_cmp_insn (adjusted_op0, const0_rtx, LT, NULL_RTX,
3517                              compute_mode, 0, 0);
3518               emit_jump_insn (gen_blt (label3));
3519               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3520                                   quotient, 0, OPTAB_LIB_WIDEN);
3521               if (tem != quotient)
3522                 emit_move_insn (quotient, tem);
3523               emit_jump_insn (gen_jump (label5));
3524               emit_barrier ();
3525               emit_label (label3);
3526               expand_inc (adjusted_op0, const1_rtx);
3527               emit_label (label4);
3528               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3529                                   quotient, 0, OPTAB_LIB_WIDEN);
3530               if (tem != quotient)
3531                 emit_move_insn (quotient, tem);
3532               expand_inc (quotient, const1_rtx);
3533               emit_label (label5);
3534             }
3535           }
3536         break;
3537
3538       case EXACT_DIV_EXPR:
3539         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3540           {
3541             HOST_WIDE_INT d = INTVAL (op1);
3542             unsigned HOST_WIDE_INT ml;
3543             int post_shift;
3544             rtx t1;
3545
3546             post_shift = floor_log2 (d & -d);
3547             ml = invert_mod2n (d >> post_shift, size);
3548             t1 = expand_mult (compute_mode, op0, GEN_INT (ml), NULL_RTX,
3549                               unsignedp);
3550             quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3551                                      build_int_2 (post_shift, 0),
3552                                      NULL_RTX, unsignedp);
3553
3554             insn = get_last_insn ();
3555             REG_NOTES (insn)
3556               = gen_rtx (EXPR_LIST, REG_EQUAL,
3557                          gen_rtx (unsignedp ? UDIV : DIV, compute_mode,
3558                                   op0, op1),
3559                          REG_NOTES (insn));
3560           }
3561         break;
3562
3563       case ROUND_DIV_EXPR:
3564       case ROUND_MOD_EXPR:
3565         if (unsignedp)
3566           {
3567             rtx tem;
3568             rtx label;
3569             label = gen_label_rtx ();
3570             quotient = gen_reg_rtx (compute_mode);
3571             remainder = gen_reg_rtx (compute_mode);
3572             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3573               {
3574                 rtx tem;
3575                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3576                                          quotient, 1, OPTAB_LIB_WIDEN);
3577                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3578                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3579                                           remainder, 1, OPTAB_LIB_WIDEN);
3580               }
3581             tem = plus_constant (op1, -1);
3582             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3583                                 build_int_2 (1, 0), NULL_RTX, 1);
3584             emit_cmp_insn (remainder, tem, LEU, NULL_RTX, compute_mode, 0, 0);
3585             emit_jump_insn (gen_bleu (label));
3586             expand_inc (quotient, const1_rtx);
3587             expand_dec (remainder, op1);
3588             emit_label (label);
3589           }
3590         else
3591           {
3592             rtx abs_rem, abs_op1, tem, mask;
3593             rtx label;
3594             label = gen_label_rtx ();
3595             quotient = gen_reg_rtx (compute_mode);
3596             remainder = gen_reg_rtx (compute_mode);
3597             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3598               {
3599                 rtx tem;
3600                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3601                                          quotient, 0, OPTAB_LIB_WIDEN);
3602                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3603                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3604                                           remainder, 0, OPTAB_LIB_WIDEN);
3605               }
3606             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 0, 0);
3607             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 0, 0);
3608             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3609                                 build_int_2 (1, 0), NULL_RTX, 1);
3610             emit_cmp_insn (tem, abs_op1, LTU, NULL_RTX, compute_mode, 0, 0);
3611             emit_jump_insn (gen_bltu (label));
3612             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3613                                 NULL_RTX, 0, OPTAB_WIDEN);
3614             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3615                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3616             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3617                                 NULL_RTX, 0, OPTAB_WIDEN);
3618             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3619                                 NULL_RTX, 0, OPTAB_WIDEN);
3620             expand_inc (quotient, tem);
3621             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3622                                 NULL_RTX, 0, OPTAB_WIDEN);
3623             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3624                                 NULL_RTX, 0, OPTAB_WIDEN);
3625             expand_dec (remainder, tem);
3626             emit_label (label);
3627           }
3628         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3629       }
3630
3631   if (quotient == 0)
3632     {
3633       if (target && GET_MODE (target) != compute_mode)
3634         target = 0;
3635
3636       if (rem_flag)
3637         {
3638           /* Try to produce the remainder directly without a library call.  */
3639           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3640                                          op0, op1, target,
3641                                          unsignedp, OPTAB_WIDEN);
3642           if (remainder == 0)
3643             {
3644               /* No luck there.  Can we do remainder and divide at once
3645                  without a library call?  */
3646               remainder = gen_reg_rtx (compute_mode);
3647               if (! expand_twoval_binop ((unsignedp
3648                                           ? udivmod_optab
3649                                           : sdivmod_optab),
3650                                          op0, op1,
3651                                          NULL_RTX, remainder, unsignedp))
3652                 remainder = 0;
3653             }
3654
3655           if (remainder)
3656             return gen_lowpart (mode, remainder);
3657         }
3658
3659       /* Produce the quotient.  */
3660       /* Try a quotient insn, but not a library call.  */
3661       quotient = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3662                                     op0, op1, rem_flag ? NULL_RTX : target,
3663                                     unsignedp, OPTAB_WIDEN);
3664       if (quotient == 0)
3665         {
3666           /* No luck there.  Try a quotient-and-remainder insn,
3667              keeping the quotient alone.  */
3668           quotient = gen_reg_rtx (compute_mode);
3669           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3670                                      op0, op1,
3671                                      quotient, NULL_RTX, unsignedp))
3672             {
3673               quotient = 0;
3674               if (! rem_flag)
3675                 /* Still no luck.  If we are not computing the remainder,
3676                    use a library call for the quotient.  */
3677                 quotient = sign_expand_binop (compute_mode,
3678                                               udiv_optab, sdiv_optab,
3679                                               op0, op1, target,
3680                                               unsignedp, OPTAB_LIB_WIDEN);
3681             }
3682         }
3683     }
3684
3685   if (rem_flag)
3686     {
3687       if (target && GET_MODE (target) != compute_mode)
3688         target = 0;
3689
3690       if (quotient == 0)
3691         /* No divide instruction either.  Use library for remainder.  */
3692         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3693                                        op0, op1, target,
3694                                        unsignedp, OPTAB_LIB_WIDEN);
3695       else
3696         {
3697           /* We divided.  Now finish doing X - Y * (X / Y).  */
3698           remainder = expand_mult (compute_mode, quotient, op1,
3699                                    NULL_RTX, unsignedp);
3700           remainder = expand_binop (compute_mode, sub_optab, op0,
3701                                     remainder, target, unsignedp,
3702                                     OPTAB_LIB_WIDEN);
3703         }
3704     }
3705
3706   return gen_lowpart (mode, rem_flag ? remainder : quotient);
3707 }
3708 \f
3709 /* Return a tree node with data type TYPE, describing the value of X.
3710    Usually this is an RTL_EXPR, if there is no obvious better choice.
3711    X may be an expression, however we only support those expressions
3712    generated by loop.c.   */
3713
3714 tree
3715 make_tree (type, x)
3716      tree type;
3717      rtx x;
3718 {
3719   tree t;
3720
3721   switch (GET_CODE (x))
3722     {
3723     case CONST_INT:
3724       t = build_int_2 (INTVAL (x),
3725                        TREE_UNSIGNED (type) || INTVAL (x) >= 0 ? 0 : -1);
3726       TREE_TYPE (t) = type;
3727       return t;
3728
3729     case CONST_DOUBLE:
3730       if (GET_MODE (x) == VOIDmode)
3731         {
3732           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
3733           TREE_TYPE (t) = type;
3734         }
3735       else
3736         {
3737           REAL_VALUE_TYPE d;
3738
3739           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
3740           t = build_real (type, d);
3741         }
3742
3743       return t;
3744
3745     case PLUS:
3746       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3747                           make_tree (type, XEXP (x, 1))));
3748
3749     case MINUS:
3750       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3751                           make_tree (type, XEXP (x, 1))));
3752
3753     case NEG:
3754       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
3755
3756     case MULT:
3757       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
3758                           make_tree (type, XEXP (x, 1))));
3759
3760     case ASHIFT:
3761       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
3762                           make_tree (type, XEXP (x, 1))));
3763
3764     case LSHIFTRT:
3765       return fold (convert (type,
3766                             build (RSHIFT_EXPR, unsigned_type (type),
3767                                    make_tree (unsigned_type (type),
3768                                               XEXP (x, 0)),
3769                                    make_tree (type, XEXP (x, 1)))));
3770
3771     case ASHIFTRT:
3772       return fold (convert (type,
3773                             build (RSHIFT_EXPR, signed_type (type),
3774                                    make_tree (signed_type (type), XEXP (x, 0)),
3775                                    make_tree (type, XEXP (x, 1)))));
3776
3777     case DIV:
3778       if (TREE_CODE (type) != REAL_TYPE)
3779         t = signed_type (type);
3780       else
3781         t = type;
3782
3783       return fold (convert (type,
3784                             build (TRUNC_DIV_EXPR, t,
3785                                    make_tree (t, XEXP (x, 0)),
3786                                    make_tree (t, XEXP (x, 1)))));
3787     case UDIV:
3788       t = unsigned_type (type);
3789       return fold (convert (type,
3790                             build (TRUNC_DIV_EXPR, t,
3791                                    make_tree (t, XEXP (x, 0)),
3792                                    make_tree (t, XEXP (x, 1)))));
3793    default:
3794       t = make_node (RTL_EXPR);
3795       TREE_TYPE (t) = type;
3796       RTL_EXPR_RTL (t) = x;
3797       /* There are no insns to be output
3798          when this rtl_expr is used.  */
3799       RTL_EXPR_SEQUENCE (t) = 0;
3800       return t;
3801     }
3802 }
3803
3804 /* Return an rtx representing the value of X * MULT + ADD.
3805    TARGET is a suggestion for where to store the result (an rtx).
3806    MODE is the machine mode for the computation.
3807    X and MULT must have mode MODE.  ADD may have a different mode.
3808    So can X (defaults to same as MODE).
3809    UNSIGNEDP is non-zero to do unsigned multiplication.
3810    This may emit insns.  */
3811
3812 rtx
3813 expand_mult_add (x, target, mult, add, mode, unsignedp)
3814      rtx x, target, mult, add;
3815      enum machine_mode mode;
3816      int unsignedp;
3817 {
3818   tree type = type_for_mode (mode, unsignedp);
3819   tree add_type = (GET_MODE (add) == VOIDmode
3820                    ? type : type_for_mode (GET_MODE (add), unsignedp));
3821   tree result =  fold (build (PLUS_EXPR, type,
3822                               fold (build (MULT_EXPR, type,
3823                                            make_tree (type, x),
3824                                            make_tree (type, mult))),
3825                               make_tree (add_type, add)));
3826
3827   return expand_expr (result, target, VOIDmode, 0);
3828 }
3829 \f
3830 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
3831    and returning TARGET.
3832
3833    If TARGET is 0, a pseudo-register or constant is returned.  */
3834
3835 rtx
3836 expand_and (op0, op1, target)
3837      rtx op0, op1, target;
3838 {
3839   enum machine_mode mode = VOIDmode;
3840   rtx tem;
3841
3842   if (GET_MODE (op0) != VOIDmode)
3843     mode = GET_MODE (op0);
3844   else if (GET_MODE (op1) != VOIDmode)
3845     mode = GET_MODE (op1);
3846
3847   if (mode != VOIDmode)
3848     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
3849   else if (GET_CODE (op0) == CONST_INT && GET_CODE (op1) == CONST_INT)
3850     tem = GEN_INT (INTVAL (op0) & INTVAL (op1));
3851   else
3852     abort ();
3853
3854   if (target == 0)
3855     target = tem;
3856   else if (tem != target)
3857     emit_move_insn (target, tem);
3858   return target;
3859 }
3860 \f
3861 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
3862    and storing in TARGET.  Normally return TARGET.
3863    Return 0 if that cannot be done.
3864
3865    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
3866    it is VOIDmode, they cannot both be CONST_INT.
3867
3868    UNSIGNEDP is for the case where we have to widen the operands
3869    to perform the operation.  It says to use zero-extension.
3870
3871    NORMALIZEP is 1 if we should convert the result to be either zero
3872    or one.  Normalize is -1 if we should convert the result to be
3873    either zero or -1.  If NORMALIZEP is zero, the result will be left
3874    "raw" out of the scc insn.  */
3875
3876 rtx
3877 emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
3878      rtx target;
3879      enum rtx_code code;
3880      rtx op0, op1;
3881      enum machine_mode mode;
3882      int unsignedp;
3883      int normalizep;
3884 {
3885   rtx subtarget;
3886   enum insn_code icode;
3887   enum machine_mode compare_mode;
3888   enum machine_mode target_mode = GET_MODE (target);
3889   rtx tem;
3890   rtx last = get_last_insn ();
3891   rtx pattern, comparison;
3892
3893   /* If one operand is constant, make it the second one.  Only do this
3894      if the other operand is not constant as well.  */
3895
3896   if ((CONSTANT_P (op0) && ! CONSTANT_P (op1))
3897       || (GET_CODE (op0) == CONST_INT && GET_CODE (op1) != CONST_INT))
3898     {
3899       tem = op0;
3900       op0 = op1;
3901       op1 = tem;
3902       code = swap_condition (code);
3903     }
3904
3905   if (mode == VOIDmode)
3906     mode = GET_MODE (op0);
3907
3908   /* For some comparisons with 1 and -1, we can convert this to
3909      comparisons with zero.  This will often produce more opportunities for
3910      store-flag insns. */
3911
3912   switch (code)
3913     {
3914     case LT:
3915       if (op1 == const1_rtx)
3916         op1 = const0_rtx, code = LE;
3917       break;
3918     case LE:
3919       if (op1 == constm1_rtx)
3920         op1 = const0_rtx, code = LT;
3921       break;
3922     case GE:
3923       if (op1 == const1_rtx)
3924         op1 = const0_rtx, code = GT;
3925       break;
3926     case GT:
3927       if (op1 == constm1_rtx)
3928         op1 = const0_rtx, code = GE;
3929       break;
3930     case GEU:
3931       if (op1 == const1_rtx)
3932         op1 = const0_rtx, code = NE;
3933       break;
3934     case LTU:
3935       if (op1 == const1_rtx)
3936         op1 = const0_rtx, code = EQ;
3937       break;
3938     }
3939
3940   /* From now on, we won't change CODE, so set ICODE now.  */
3941   icode = setcc_gen_code[(int) code];
3942
3943   /* If this is A < 0 or A >= 0, we can do this by taking the ones
3944      complement of A (for GE) and shifting the sign bit to the low bit.  */
3945   if (op1 == const0_rtx && (code == LT || code == GE)
3946       && GET_MODE_CLASS (mode) == MODE_INT
3947       && (normalizep || STORE_FLAG_VALUE == 1
3948           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
3949               && (STORE_FLAG_VALUE
3950                   == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
3951     {
3952       subtarget = target;
3953
3954       /* If the result is to be wider than OP0, it is best to convert it
3955          first.  If it is to be narrower, it is *incorrect* to convert it
3956          first.  */
3957       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
3958         {
3959           op0 = protect_from_queue (op0, 0);
3960           op0 = convert_modes (target_mode, mode, op0, 0);
3961           mode = target_mode;
3962         }
3963
3964       if (target_mode != mode)
3965         subtarget = 0;
3966
3967       if (code == GE)
3968         op0 = expand_unop (mode, one_cmpl_optab, op0,
3969                            ((STORE_FLAG_VALUE == 1 || normalizep)
3970                             ? 0 : subtarget), 0);
3971
3972       if (STORE_FLAG_VALUE == 1 || normalizep)
3973         /* If we are supposed to produce a 0/1 value, we want to do
3974            a logical shift from the sign bit to the low-order bit; for
3975            a -1/0 value, we do an arithmetic shift.  */
3976         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
3977                             size_int (GET_MODE_BITSIZE (mode) - 1),
3978                             subtarget, normalizep != -1);
3979
3980       if (mode != target_mode)
3981         op0 = convert_modes (target_mode, mode, op0, 0);
3982
3983       return op0;
3984     }
3985
3986   if (icode != CODE_FOR_nothing)
3987     {
3988       /* We think we may be able to do this with a scc insn.  Emit the
3989          comparison and then the scc insn.
3990
3991          compare_from_rtx may call emit_queue, which would be deleted below
3992          if the scc insn fails.  So call it ourselves before setting LAST.  */
3993
3994       emit_queue ();
3995       last = get_last_insn ();
3996
3997       comparison
3998         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
3999       if (GET_CODE (comparison) == CONST_INT)
4000         return (comparison == const0_rtx ? const0_rtx
4001                 : normalizep == 1 ? const1_rtx
4002                 : normalizep == -1 ? constm1_rtx
4003                 : const_true_rtx);
4004
4005       /* If the code of COMPARISON doesn't match CODE, something is
4006          wrong; we can no longer be sure that we have the operation.
4007          We could handle this case, but it should not happen.  */
4008
4009       if (GET_CODE (comparison) != code)
4010         abort ();
4011
4012       /* Get a reference to the target in the proper mode for this insn.  */
4013       compare_mode = insn_operand_mode[(int) icode][0];
4014       subtarget = target;
4015       if (preserve_subexpressions_p ()
4016           || ! (*insn_operand_predicate[(int) icode][0]) (subtarget, compare_mode))
4017         subtarget = gen_reg_rtx (compare_mode);
4018
4019       pattern = GEN_FCN (icode) (subtarget);
4020       if (pattern)
4021         {
4022           emit_insn (pattern);
4023
4024           /* If we are converting to a wider mode, first convert to
4025              TARGET_MODE, then normalize.  This produces better combining
4026              opportunities on machines that have a SIGN_EXTRACT when we are
4027              testing a single bit.  This mostly benefits the 68k.
4028
4029              If STORE_FLAG_VALUE does not have the sign bit set when
4030              interpreted in COMPARE_MODE, we can do this conversion as
4031              unsigned, which is usually more efficient.  */
4032           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4033             {
4034               convert_move (target, subtarget,
4035                             (GET_MODE_BITSIZE (compare_mode)
4036                              <= HOST_BITS_PER_WIDE_INT)
4037                             && 0 == (STORE_FLAG_VALUE
4038                                      & ((HOST_WIDE_INT) 1
4039                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4040               op0 = target;
4041               compare_mode = target_mode;
4042             }
4043           else
4044             op0 = subtarget;
4045
4046           /* If we want to keep subexpressions around, don't reuse our
4047              last target.  */
4048
4049           if (preserve_subexpressions_p ())
4050             subtarget = 0;
4051
4052           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4053              we don't have to do anything.  */
4054           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4055             ;
4056           else if (normalizep == - STORE_FLAG_VALUE)
4057             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4058
4059           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4060              makes it hard to use a value of just the sign bit due to
4061              ANSI integer constant typing rules.  */
4062           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4063                    && (STORE_FLAG_VALUE
4064                        & ((HOST_WIDE_INT) 1
4065                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4066             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4067                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4068                                 subtarget, normalizep == 1);
4069           else if (STORE_FLAG_VALUE & 1)
4070             {
4071               op0 = expand_and (op0, const1_rtx, subtarget);
4072               if (normalizep == -1)
4073                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4074             }
4075           else
4076             abort ();
4077
4078           /* If we were converting to a smaller mode, do the
4079              conversion now.  */
4080           if (target_mode != compare_mode)
4081             {
4082               convert_move (target, op0, 0);
4083               return target;
4084             }
4085           else
4086             return op0;
4087         }
4088     }
4089
4090   delete_insns_since (last);
4091
4092   /* If expensive optimizations, use different pseudo registers for each
4093      insn, instead of reusing the same pseudo.  This leads to better CSE,
4094      but slows down the compiler, since there are more pseudos */
4095   subtarget = (!flag_expensive_optimizations
4096                && (target_mode == mode)) ? target : NULL_RTX;
4097
4098   /* If we reached here, we can't do this with a scc insn.  However, there
4099      are some comparisons that can be done directly.  For example, if
4100      this is an equality comparison of integers, we can try to exclusive-or
4101      (or subtract) the two operands and use a recursive call to try the
4102      comparison with zero.  Don't do any of these cases if branches are
4103      very cheap.  */
4104
4105   if (BRANCH_COST > 0
4106       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4107       && op1 != const0_rtx)
4108     {
4109       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4110                           OPTAB_WIDEN);
4111
4112       if (tem == 0)
4113         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4114                             OPTAB_WIDEN);
4115       if (tem != 0)
4116         tem = emit_store_flag (target, code, tem, const0_rtx,
4117                                mode, unsignedp, normalizep);
4118       if (tem == 0)
4119         delete_insns_since (last);
4120       return tem;
4121     }
4122
4123   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4124      the constant zero.  Reject all other comparisons at this point.  Only
4125      do LE and GT if branches are expensive since they are expensive on
4126      2-operand machines.  */
4127
4128   if (BRANCH_COST == 0
4129       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4130       || (code != EQ && code != NE
4131           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4132     return 0;
4133
4134   /* See what we need to return.  We can only return a 1, -1, or the
4135      sign bit.  */
4136
4137   if (normalizep == 0)
4138     {
4139       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4140         normalizep = STORE_FLAG_VALUE;
4141
4142       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4143                && (STORE_FLAG_VALUE
4144                    == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4145         ;
4146       else
4147         return 0;
4148     }
4149
4150   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4151      do the necessary operation below.  */
4152
4153   tem = 0;
4154
4155   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4156      the sign bit set.  */
4157
4158   if (code == LE)
4159     {
4160       /* This is destructive, so SUBTARGET can't be OP0.  */
4161       if (rtx_equal_p (subtarget, op0))
4162         subtarget = 0;
4163
4164       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4165                           OPTAB_WIDEN);
4166       if (tem)
4167         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4168                             OPTAB_WIDEN);
4169     }
4170
4171   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4172      number of bits in the mode of OP0, minus one.  */
4173
4174   if (code == GT)
4175     {
4176       if (rtx_equal_p (subtarget, op0))
4177         subtarget = 0;
4178
4179       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4180                           size_int (GET_MODE_BITSIZE (mode) - 1),
4181                           subtarget, 0);
4182       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4183                           OPTAB_WIDEN);
4184     }
4185
4186   if (code == EQ || code == NE)
4187     {
4188       /* For EQ or NE, one way to do the comparison is to apply an operation
4189          that converts the operand into a positive number if it is non-zero
4190          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4191          for NE we negate.  This puts the result in the sign bit.  Then we
4192          normalize with a shift, if needed.
4193
4194          Two operations that can do the above actions are ABS and FFS, so try
4195          them.  If that doesn't work, and MODE is smaller than a full word,
4196          we can use zero-extension to the wider mode (an unsigned conversion)
4197          as the operation.  */
4198
4199       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4200         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4201       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4202         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4203       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4204         {
4205           op0 = protect_from_queue (op0, 0);
4206           tem = convert_modes (word_mode, mode, op0, 1);
4207           mode = word_mode;
4208         }
4209
4210       if (tem != 0)
4211         {
4212           if (code == EQ)
4213             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4214                                 0, OPTAB_WIDEN);
4215           else
4216             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4217         }
4218
4219       /* If we couldn't do it that way, for NE we can "or" the two's complement
4220          of the value with itself.  For EQ, we take the one's complement of
4221          that "or", which is an extra insn, so we only handle EQ if branches
4222          are expensive.  */
4223
4224       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4225         {
4226           if (rtx_equal_p (subtarget, op0))
4227             subtarget = 0;
4228
4229           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4230           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4231                               OPTAB_WIDEN);
4232
4233           if (tem && code == EQ)
4234             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4235         }
4236     }
4237
4238   if (tem && normalizep)
4239     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4240                         size_int (GET_MODE_BITSIZE (mode) - 1),
4241                         subtarget, normalizep == 1);
4242
4243   if (tem)
4244     {
4245       if (GET_MODE (tem) != target_mode)
4246         {
4247           convert_move (target, tem, 0);
4248           tem = target;
4249         }
4250       else if (!subtarget)
4251         {
4252           emit_move_insn (target, tem);
4253           tem = target;
4254         }
4255     }
4256   else
4257     delete_insns_since (last);
4258
4259   return tem;
4260 }
4261   emit_jump_insn ((*bcc_gen_fctn[(int) code]) (label));
4262   emit_move_insn (target, const1_rtx);
4263   emit_label (label);
4264
4265   return target;
4266 }