gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  55 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56
  57 /* Test whether a value is zero of a power of two.  */
  58 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  59
  60 /* Nonzero means divides or modulus operations are relatively cheap for
  61    powers of two, so don't use branches; emit the operation instead.
  62    Usually, this will mean that the MD file will emit non-branch
  63    sequences.  */
  64
  65 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  66 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  67
  68 #ifndef SLOW_UNALIGNED_ACCESS
  69 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  70 #endif
  71
  72 /* For compilers that support multiple targets with different word sizes,
  73    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  74    is the H8/300(H) compiler.  */
  75
  76 #ifndef MAX_BITS_PER_WORD
  77 #define MAX_BITS_PER_WORD BITS_PER_WORD
  78 #endif
  79
  80 /* Reduce conditional compilation elsewhere.  */
  81 #ifndef HAVE_insv
  82 #define HAVE_insv       0
  83 #define CODE_FOR_insv   CODE_FOR_nothing
  84 #define gen_insv(a,b,c,d) NULL_RTX
  85 #endif
  86 #ifndef HAVE_extv
  87 #define HAVE_extv       0
  88 #define CODE_FOR_extv   CODE_FOR_nothing
  89 #define gen_extv(a,b,c,d) NULL_RTX
  90 #endif
  91 #ifndef HAVE_extzv
  92 #define HAVE_extzv      0
  93 #define CODE_FOR_extzv  CODE_FOR_nothing
  94 #define gen_extzv(a,b,c,d) NULL_RTX
  95 #endif
  96
  97 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  98    shift count and some by mode.  */
  99 static int zero_cost;
 100 static int add_cost[NUM_MACHINE_MODES];
 101 static int neg_cost[NUM_MACHINE_MODES];
 102 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 103 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 104 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int mul_cost[NUM_MACHINE_MODES];
 106 static int div_cost[NUM_MACHINE_MODES];
 107 static int mul_widen_cost[NUM_MACHINE_MODES];
 108 static int mul_highpart_cost[NUM_MACHINE_MODES];
 109
 110 void
 111 init_expmed (void)
 112 {
 113   struct
 114   {
 115     struct rtx_def reg;         rtunion reg_fld[2];
 116     struct rtx_def plus;        rtunion plus_fld1;
 117     struct rtx_def neg;
 118     struct rtx_def udiv;        rtunion udiv_fld1;
 119     struct rtx_def mult;        rtunion mult_fld1;
 120     struct rtx_def div;         rtunion div_fld1;
 121     struct rtx_def mod;         rtunion mod_fld1;
 122     struct rtx_def zext;
 123     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 124     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 125     struct rtx_def wide_trunc;
 126     struct rtx_def shift;       rtunion shift_fld1;
 127     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 128     struct rtx_def shift_add;   rtunion shift_add_fld1;
 129     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 130   } all;
 131
 132   rtx pow2[MAX_BITS_PER_WORD];
 133   rtx cint[MAX_BITS_PER_WORD];
 134   int m, n;
 135   enum machine_mode mode, wider_mode;
 136
 137   zero_cost = rtx_cost (const0_rtx, 0);
 138
 139   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 140     {
 141       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 142       cint[m] = GEN_INT (m);
 143     }
 144
 145   memset (&all, 0, sizeof all);
 146
 147   PUT_CODE (&all.reg, REG);
 148   /* Avoid using hard regs in ways which may be unsupported.  */
 149   REGNO (&all.reg) = LAST_VIRTUAL_REGISTER + 1;
 150
 151   PUT_CODE (&all.plus, PLUS);
 152   XEXP (&all.plus, 0) = &all.reg;
 153   XEXP (&all.plus, 1) = &all.reg;
 154
 155   PUT_CODE (&all.neg, NEG);
 156   XEXP (&all.neg, 0) = &all.reg;
 157
 158   PUT_CODE (&all.udiv, UDIV);
 159   XEXP (&all.udiv, 0) = &all.reg;
 160   XEXP (&all.udiv, 1) = &all.reg;
 161
 162   PUT_CODE (&all.mult, MULT);
 163   XEXP (&all.mult, 0) = &all.reg;
 164   XEXP (&all.mult, 1) = &all.reg;
 165
 166   PUT_CODE (&all.div, DIV);
 167   XEXP (&all.div, 0) = &all.reg;
 168   XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 169
 170   PUT_CODE (&all.mod, MOD);
 171   XEXP (&all.mod, 0) = &all.reg;
 172   XEXP (&all.mod, 1) = XEXP (&all.div, 1);
 173
 174   PUT_CODE (&all.zext, ZERO_EXTEND);
 175   XEXP (&all.zext, 0) = &all.reg;
 176
 177   PUT_CODE (&all.wide_mult, MULT);
 178   XEXP (&all.wide_mult, 0) = &all.zext;
 179   XEXP (&all.wide_mult, 1) = &all.zext;
 180
 181   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 182   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 183
 184   PUT_CODE (&all.wide_trunc, TRUNCATE);
 185   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 186
 187   PUT_CODE (&all.shift, ASHIFT);
 188   XEXP (&all.shift, 0) = &all.reg;
 189
 190   PUT_CODE (&all.shift_mult, MULT);
 191   XEXP (&all.shift_mult, 0) = &all.reg;
 192
 193   PUT_CODE (&all.shift_add, PLUS);
 194   XEXP (&all.shift_add, 0) = &all.shift_mult;
 195   XEXP (&all.shift_add, 1) = &all.reg;
 196
 197   PUT_CODE (&all.shift_sub, MINUS);
 198   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 199   XEXP (&all.shift_sub, 1) = &all.reg;
 200
 201   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 202        mode != VOIDmode;
 203        mode = GET_MODE_WIDER_MODE (mode))
 204     {
 205       PUT_MODE (&all.reg, mode);
 206       PUT_MODE (&all.plus, mode);
 207       PUT_MODE (&all.neg, mode);
 208       PUT_MODE (&all.udiv, mode);
 209       PUT_MODE (&all.mult, mode);
 210       PUT_MODE (&all.div, mode);
 211       PUT_MODE (&all.mod, mode);
 212       PUT_MODE (&all.wide_trunc, mode);
 213       PUT_MODE (&all.shift, mode);
 214       PUT_MODE (&all.shift_mult, mode);
 215       PUT_MODE (&all.shift_add, mode);
 216       PUT_MODE (&all.shift_sub, mode);
 217
 218       add_cost[mode] = rtx_cost (&all.plus, SET);
 219       neg_cost[mode] = rtx_cost (&all.neg, SET);
 220       div_cost[mode] = rtx_cost (&all.udiv, SET);
 221       mul_cost[mode] = rtx_cost (&all.mult, SET);
 222
 223       sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
 224       smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
 225
 226       wider_mode = GET_MODE_WIDER_MODE (mode);
 227       if (wider_mode != VOIDmode)
 228         {
 229           PUT_MODE (&all.zext, wider_mode);
 230           PUT_MODE (&all.wide_mult, wider_mode);
 231           PUT_MODE (&all.wide_lshr, wider_mode);
 232           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 233
 234           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 235           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 236         }
 237
 238       shift_cost[mode][0] = 0;
 239       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 240
 241       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 242       for (m = 1; m < n; m++)
 243         {
 244           XEXP (&all.shift, 1) = cint[m];
 245           XEXP (&all.shift_mult, 1) = pow2[m];
 246
 247           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 248           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 249           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 250         }
 251     }
 252 }
 253
 254 /* Return an rtx representing minus the value of X.
 255    MODE is the intended mode of the result,
 256    useful if X is a CONST_INT.  */
 257
 258 rtx
 259 negate_rtx (enum machine_mode mode, rtx x)
 260 {
 261   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 262
 263   if (result == 0)
 264     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 265
 266   return result;
 267 }
 268
 269 /* Report on the availability of insv/extv/extzv and the desired mode
 270    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 271    is false; else the mode of the specified operand.  If OPNO is -1,
 272    all the caller cares about is whether the insn is available.  */
 273 enum machine_mode
 274 mode_for_extraction (enum extraction_pattern pattern, int opno)
 275 {
 276   const struct insn_data *data;
 277
 278   switch (pattern)
 279     {
 280     case EP_insv:
 281       if (HAVE_insv)
 282         {
 283           data = &insn_data[CODE_FOR_insv];
 284           break;
 285         }
 286       return MAX_MACHINE_MODE;
 287
 288     case EP_extv:
 289       if (HAVE_extv)
 290         {
 291           data = &insn_data[CODE_FOR_extv];
 292           break;
 293         }
 294       return MAX_MACHINE_MODE;
 295
 296     case EP_extzv:
 297       if (HAVE_extzv)
 298         {
 299           data = &insn_data[CODE_FOR_extzv];
 300           break;
 301         }
 302       return MAX_MACHINE_MODE;
 303
 304     default:
 305       gcc_unreachable ();
 306     }
 307
 308   if (opno == -1)
 309     return VOIDmode;
 310
 311   /* Everyone who uses this function used to follow it with
 312      if (result == VOIDmode) result = word_mode; */
 313   if (data->operand[opno].mode == VOIDmode)
 314     return word_mode;
 315   return data->operand[opno].mode;
 316 }
 317
 318 \f
 319 /* Generate code to store value from rtx VALUE
 320    into a bit-field within structure STR_RTX
 321    containing BITSIZE bits starting at bit BITNUM.
 322    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 323    ALIGN is the alignment that STR_RTX is known to have.
 324    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 325
 326 /* ??? Note that there are two different ideas here for how
 327    to determine the size to count bits within, for a register.
 328    One is BITS_PER_WORD, and the other is the size of operand 3
 329    of the insv pattern.
 330
 331    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 332    else, we use the mode of operand 3.  */
 333
 334 rtx
 335 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 336                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 337                  rtx value)
 338 {
 339   unsigned int unit
 340     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 341   unsigned HOST_WIDE_INT offset, bitpos;
 342   rtx op0 = str_rtx;
 343   int byte_offset;
 344   rtx orig_value;
 345
 346   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 347
 348   while (GET_CODE (op0) == SUBREG)
 349     {
 350       /* The following line once was done only if WORDS_BIG_ENDIAN,
 351          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 352          meaningful at a much higher level; when structures are copied
 353          between memory and regs, the higher-numbered regs
 354          always get higher addresses.  */
 355       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
 356       op0 = SUBREG_REG (op0);
 357     }
 358
 359   /* No action is needed if the target is a register and if the field
 360      lies completely outside that register.  This can occur if the source
 361      code contains an out-of-bounds access to a small array.  */
 362   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 363     return value;
 364
 365   /* Use vec_set patterns for inserting parts of vectors whenever
 366      available.  */
 367   if (VECTOR_MODE_P (GET_MODE (op0))
 368       && !MEM_P (op0)
 369       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 370           != CODE_FOR_nothing)
 371       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 372       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 373       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 374     {
 375       enum machine_mode outermode = GET_MODE (op0);
 376       enum machine_mode innermode = GET_MODE_INNER (outermode);
 377       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 378       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 379       rtx rtxpos = GEN_INT (pos);
 380       rtx src = value;
 381       rtx dest = op0;
 382       rtx pat, seq;
 383       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 384       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 385       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 386
 387       start_sequence ();
 388
 389       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 390         src = copy_to_mode_reg (mode1, src);
 391
 392       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 393         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 394
 395       /* We could handle this, but we should always be called with a pseudo
 396          for our targets and all insns should take them as outputs.  */
 397       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 398                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 399                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 400       pat = GEN_FCN (icode) (dest, src, rtxpos);
 401       seq = get_insns ();
 402       end_sequence ();
 403       if (pat)
 404         {
 405           emit_insn (seq);
 406           emit_insn (pat);
 407           return dest;
 408         }
 409     }
 410
 411   if (flag_force_mem)
 412     {
 413       int old_generating_concat_p = generating_concat_p;
 414       generating_concat_p = 0;
 415       value = force_not_mem (value);
 416       generating_concat_p = old_generating_concat_p;
 417     }
 418
 419   /* If the target is a register, overwriting the entire object, or storing
 420      a full-word or multi-word field can be done with just a SUBREG.
 421
 422      If the target is memory, storing any naturally aligned field can be
 423      done with a simple store.  For targets that support fast unaligned
 424      memory, any naturally sized, unit aligned field can be done directly.  */
 425
 426   offset = bitnum / unit;
 427   bitpos = bitnum % unit;
 428   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 429                 + (offset * UNITS_PER_WORD);
 430
 431   if (bitpos == 0
 432       && bitsize == GET_MODE_BITSIZE (fieldmode)
 433       && (!MEM_P (op0)
 434           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 435              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 436              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 437           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 438              || (offset * BITS_PER_UNIT % bitsize == 0
 439                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 440     {
 441       if (GET_MODE (op0) != fieldmode)
 442         {
 443           if (MEM_P (op0))
 444             op0 = adjust_address (op0, fieldmode, offset);
 445           else
 446             op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 447                                        byte_offset);
 448         }
 449       emit_move_insn (op0, value);
 450       return value;
 451     }
 452
 453   /* Make sure we are playing with integral modes.  Pun with subregs
 454      if we aren't.  This must come after the entire register case above,
 455      since that case is valid for any mode.  The following cases are only
 456      valid for integral modes.  */
 457   {
 458     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 459     if (imode != GET_MODE (op0))
 460       {
 461         if (MEM_P (op0))
 462           op0 = adjust_address (op0, imode, 0);
 463         else
 464           {
 465             gcc_assert (imode != BLKmode);
 466             op0 = gen_lowpart (imode, op0);
 467           }
 468       }
 469   }
 470
 471   /* We may be accessing data outside the field, which means
 472      we can alias adjacent data.  */
 473   if (MEM_P (op0))
 474     {
 475       op0 = shallow_copy_rtx (op0);
 476       set_mem_alias_set (op0, 0);
 477       set_mem_expr (op0, 0);
 478     }
 479
 480   /* If OP0 is a register, BITPOS must count within a word.
 481      But as we have it, it counts within whatever size OP0 now has.
 482      On a bigendian machine, these are not the same, so convert.  */
 483   if (BYTES_BIG_ENDIAN
 484       && !MEM_P (op0)
 485       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 486     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 487
 488   /* Storing an lsb-aligned field in a register
 489      can be done with a movestrict instruction.  */
 490
 491   if (!MEM_P (op0)
 492       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 493       && bitsize == GET_MODE_BITSIZE (fieldmode)
 494       && (movstrict_optab->handlers[fieldmode].insn_code
 495           != CODE_FOR_nothing))
 496     {
 497       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 498
 499       /* Get appropriate low part of the value being stored.  */
 500       if (GET_CODE (value) == CONST_INT || REG_P (value))
 501         value = gen_lowpart (fieldmode, value);
 502       else if (!(GET_CODE (value) == SYMBOL_REF
 503                  || GET_CODE (value) == LABEL_REF
 504                  || GET_CODE (value) == CONST))
 505         value = convert_to_mode (fieldmode, value, 0);
 506
 507       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 508         value = copy_to_mode_reg (fieldmode, value);
 509
 510       if (GET_CODE (op0) == SUBREG)
 511         {
 512           /* Else we've got some float mode source being extracted into
 513              a different float mode destination -- this combination of
 514              subregs results in Severe Tire Damage.  */
 515           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 516                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 517                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 518           op0 = SUBREG_REG (op0);
 519         }
 520
 521       emit_insn (GEN_FCN (icode)
 522                  (gen_rtx_SUBREG (fieldmode, op0,
 523                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 524                                   + (offset * UNITS_PER_WORD)),
 525                                   value));
 526
 527       return value;
 528     }
 529
 530   /* Handle fields bigger than a word.  */
 531
 532   if (bitsize > BITS_PER_WORD)
 533     {
 534       /* Here we transfer the words of the field
 535          in the order least significant first.
 536          This is because the most significant word is the one which may
 537          be less than full.
 538          However, only do that if the value is not BLKmode.  */
 539
 540       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 541       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 542       unsigned int i;
 543
 544       /* This is the mode we must force value to, so that there will be enough
 545          subwords to extract.  Note that fieldmode will often (always?) be
 546          VOIDmode, because that is what store_field uses to indicate that this
 547          is a bit field, but passing VOIDmode to operand_subword_force will
 548          result in an abort.  */
 549       fieldmode = GET_MODE (value);
 550       if (fieldmode == VOIDmode)
 551         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 552
 553       for (i = 0; i < nwords; i++)
 554         {
 555           /* If I is 0, use the low-order word in both field and target;
 556              if I is 1, use the next to lowest word; and so on.  */
 557           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 558           unsigned int bit_offset = (backwards
 559                                      ? MAX ((int) bitsize - ((int) i + 1)
 560                                             * BITS_PER_WORD,
 561                                             0)
 562                                      : (int) i * BITS_PER_WORD);
 563
 564           store_bit_field (op0, MIN (BITS_PER_WORD,
 565                                      bitsize - i * BITS_PER_WORD),
 566                            bitnum + bit_offset, word_mode,
 567                            operand_subword_force (value, wordnum, fieldmode));
 568         }
 569       return value;
 570     }
 571
 572   /* From here on we can assume that the field to be stored in is
 573      a full-word (whatever type that is), since it is shorter than a word.  */
 574
 575   /* OFFSET is the number of words or bytes (UNIT says which)
 576      from STR_RTX to the first word or byte containing part of the field.  */
 577
 578   if (!MEM_P (op0))
 579     {
 580       if (offset != 0
 581           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 582         {
 583           if (!REG_P (op0))
 584             {
 585               /* Since this is a destination (lvalue), we can't copy it to a
 586                  pseudo.  We can trivially remove a SUBREG that does not
 587                  change the size of the operand.  Such a SUBREG may have been
 588                  added above.  Otherwise, abort.  */
 589               gcc_assert (GET_CODE (op0) == SUBREG
 590                           && (GET_MODE_SIZE (GET_MODE (op0))
 591                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 592               op0 = SUBREG_REG (op0);
 593             }
 594           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 595                                 op0, (offset * UNITS_PER_WORD));
 596         }
 597       offset = 0;
 598     }
 599
 600   /* If VALUE has a floating-point or complex mode, access it as an
 601      integer of the corresponding size.  This can occur on a machine
 602      with 64 bit registers that uses SFmode for float.  It can also
 603      occur for unaligned float or complex fields.  */
 604   orig_value = value;
 605   if (GET_MODE (value) != VOIDmode
 606       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 607       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 608     {
 609       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 610       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 611     }
 612
 613   /* Now OFFSET is nonzero only if OP0 is memory
 614      and is therefore always measured in bytes.  */
 615
 616   if (HAVE_insv
 617       && GET_MODE (value) != BLKmode
 618       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 619       /* Ensure insv's size is wide enough for this field.  */
 620       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 621       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 622             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 623     {
 624       int xbitpos = bitpos;
 625       rtx value1;
 626       rtx xop0 = op0;
 627       rtx last = get_last_insn ();
 628       rtx pat;
 629       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 630       int save_volatile_ok = volatile_ok;
 631
 632       volatile_ok = 1;
 633
 634       /* If this machine's insv can only insert into a register, copy OP0
 635          into a register and save it back later.  */
 636       /* This used to check flag_force_mem, but that was a serious
 637          de-optimization now that flag_force_mem is enabled by -O2.  */
 638       if (MEM_P (op0)
 639           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 640                 (op0, VOIDmode)))
 641         {
 642           rtx tempreg;
 643           enum machine_mode bestmode;
 644
 645           /* Get the mode to use for inserting into this field.  If OP0 is
 646              BLKmode, get the smallest mode consistent with the alignment. If
 647              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 648              mode. Otherwise, use the smallest mode containing the field.  */
 649
 650           if (GET_MODE (op0) == BLKmode
 651               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 652             bestmode
 653               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 654                                MEM_VOLATILE_P (op0));
 655           else
 656             bestmode = GET_MODE (op0);
 657
 658           if (bestmode == VOIDmode
 659               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 660                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 661             goto insv_loses;
 662
 663           /* Adjust address to point to the containing unit of that mode.
 664              Compute offset as multiple of this unit, counting in bytes.  */
 665           unit = GET_MODE_BITSIZE (bestmode);
 666           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 667           bitpos = bitnum % unit;
 668           op0 = adjust_address (op0, bestmode,  offset);
 669
 670           /* Fetch that unit, store the bitfield in it, then store
 671              the unit.  */
 672           tempreg = copy_to_reg (op0);
 673           store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
 674           emit_move_insn (op0, tempreg);
 675           return value;
 676         }
 677       volatile_ok = save_volatile_ok;
 678
 679       /* Add OFFSET into OP0's address.  */
 680       if (MEM_P (xop0))
 681         xop0 = adjust_address (xop0, byte_mode, offset);
 682
 683       /* If xop0 is a register, we need it in MAXMODE
 684          to make it acceptable to the format of insv.  */
 685       if (GET_CODE (xop0) == SUBREG)
 686         /* We can't just change the mode, because this might clobber op0,
 687            and we will need the original value of op0 if insv fails.  */
 688         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 689       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 690         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 691
 692       /* On big-endian machines, we count bits from the most significant.
 693          If the bit field insn does not, we must invert.  */
 694
 695       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 696         xbitpos = unit - bitsize - xbitpos;
 697
 698       /* We have been counting XBITPOS within UNIT.
 699          Count instead within the size of the register.  */
 700       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 701         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 702
 703       unit = GET_MODE_BITSIZE (maxmode);
 704
 705       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 706       value1 = value;
 707       if (GET_MODE (value) != maxmode)
 708         {
 709           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 710             {
 711               /* Optimization: Don't bother really extending VALUE
 712                  if it has all the bits we will actually use.  However,
 713                  if we must narrow it, be sure we do it correctly.  */
 714
 715               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 716                 {
 717                   rtx tmp;
 718
 719                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 720                   if (! tmp)
 721                     tmp = simplify_gen_subreg (maxmode,
 722                                                force_reg (GET_MODE (value),
 723                                                           value1),
 724                                                GET_MODE (value), 0);
 725                   value1 = tmp;
 726                 }
 727               else
 728                 value1 = gen_lowpart (maxmode, value1);
 729             }
 730           else if (GET_CODE (value) == CONST_INT)
 731             value1 = gen_int_mode (INTVAL (value), maxmode);
 732           else
 733             /* Parse phase is supposed to make VALUE's data type
 734                match that of the component reference, which is a type
 735                at least as wide as the field; so VALUE should have
 736                a mode that corresponds to that type.  */
 737             gcc_assert (CONSTANT_P (value));
 738         }
 739
 740       /* If this machine's insv insists on a register,
 741          get VALUE1 into a register.  */
 742       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 743              (value1, maxmode)))
 744         value1 = force_reg (maxmode, value1);
 745
 746       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 747       if (pat)
 748         emit_insn (pat);
 749       else
 750         {
 751           delete_insns_since (last);
 752           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 753         }
 754     }
 755   else
 756     insv_loses:
 757     /* Insv is not available; store using shifts and boolean ops.  */
 758     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 759   return value;
 760 }
 761 \f
 762 /* Use shifts and boolean operations to store VALUE
 763    into a bit field of width BITSIZE
 764    in a memory location specified by OP0 except offset by OFFSET bytes.
 765      (OFFSET must be 0 if OP0 is a register.)
 766    The field starts at position BITPOS within the byte.
 767     (If OP0 is a register, it may be a full word or a narrower mode,
 768      but BITPOS still counts within a full word,
 769      which is significant on bigendian machines.)  */
 770
 771 static void
 772 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 773                        unsigned HOST_WIDE_INT bitsize,
 774                        unsigned HOST_WIDE_INT bitpos, rtx value)
 775 {
 776   enum machine_mode mode;
 777   unsigned int total_bits = BITS_PER_WORD;
 778   rtx subtarget, temp;
 779   int all_zero = 0;
 780   int all_one = 0;
 781
 782   /* There is a case not handled here:
 783      a structure with a known alignment of just a halfword
 784      and a field split across two aligned halfwords within the structure.
 785      Or likewise a structure with a known alignment of just a byte
 786      and a field split across two bytes.
 787      Such cases are not supposed to be able to occur.  */
 788
 789   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 790     {
 791       gcc_assert (!offset);
 792       /* Special treatment for a bit field split across two registers.  */
 793       if (bitsize + bitpos > BITS_PER_WORD)
 794         {
 795           store_split_bit_field (op0, bitsize, bitpos, value);
 796           return;
 797         }
 798     }
 799   else
 800     {
 801       /* Get the proper mode to use for this field.  We want a mode that
 802          includes the entire field.  If such a mode would be larger than
 803          a word, we won't be doing the extraction the normal way.
 804          We don't want a mode bigger than the destination.  */
 805
 806       mode = GET_MODE (op0);
 807       if (GET_MODE_BITSIZE (mode) == 0
 808           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 809         mode = word_mode;
 810       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 811                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 812
 813       if (mode == VOIDmode)
 814         {
 815           /* The only way this should occur is if the field spans word
 816              boundaries.  */
 817           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 818                                  value);
 819           return;
 820         }
 821
 822       total_bits = GET_MODE_BITSIZE (mode);
 823
 824       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 825          be in the range 0 to total_bits-1, and put any excess bytes in
 826          OFFSET.  */
 827       if (bitpos >= total_bits)
 828         {
 829           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 830           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 831                      * BITS_PER_UNIT);
 832         }
 833
 834       /* Get ref to an aligned byte, halfword, or word containing the field.
 835          Adjust BITPOS to be position within a word,
 836          and OFFSET to be the offset of that word.
 837          Then alter OP0 to refer to that word.  */
 838       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 839       offset -= (offset % (total_bits / BITS_PER_UNIT));
 840       op0 = adjust_address (op0, mode, offset);
 841     }
 842
 843   mode = GET_MODE (op0);
 844
 845   /* Now MODE is either some integral mode for a MEM as OP0,
 846      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 847      The bit field is contained entirely within OP0.
 848      BITPOS is the starting bit number within OP0.
 849      (OP0's mode may actually be narrower than MODE.)  */
 850
 851   if (BYTES_BIG_ENDIAN)
 852       /* BITPOS is the distance between our msb
 853          and that of the containing datum.
 854          Convert it to the distance from the lsb.  */
 855       bitpos = total_bits - bitsize - bitpos;
 856
 857   /* Now BITPOS is always the distance between our lsb
 858      and that of OP0.  */
 859
 860   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 861      we must first convert its mode to MODE.  */
 862
 863   if (GET_CODE (value) == CONST_INT)
 864     {
 865       HOST_WIDE_INT v = INTVAL (value);
 866
 867       if (bitsize < HOST_BITS_PER_WIDE_INT)
 868         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 869
 870       if (v == 0)
 871         all_zero = 1;
 872       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 873                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 874                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 875         all_one = 1;
 876
 877       value = lshift_value (mode, value, bitpos, bitsize);
 878     }
 879   else
 880     {
 881       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 882                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 883
 884       if (GET_MODE (value) != mode)
 885         {
 886           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 887               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 888             value = gen_lowpart (mode, value);
 889           else
 890             value = convert_to_mode (mode, value, 1);
 891         }
 892
 893       if (must_and)
 894         value = expand_binop (mode, and_optab, value,
 895                               mask_rtx (mode, 0, bitsize, 0),
 896                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 897       if (bitpos > 0)
 898         value = expand_shift (LSHIFT_EXPR, mode, value,
 899                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 900     }
 901
 902   /* Now clear the chosen bits in OP0,
 903      except that if VALUE is -1 we need not bother.  */
 904
 905   subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0;
 906
 907   if (! all_one)
 908     {
 909       temp = expand_binop (mode, and_optab, op0,
 910                            mask_rtx (mode, bitpos, bitsize, 1),
 911                            subtarget, 1, OPTAB_LIB_WIDEN);
 912       subtarget = temp;
 913     }
 914   else
 915     temp = op0;
 916
 917   /* Now logical-or VALUE into OP0, unless it is zero.  */
 918
 919   if (! all_zero)
 920     temp = expand_binop (mode, ior_optab, temp, value,
 921                          subtarget, 1, OPTAB_LIB_WIDEN);
 922   if (op0 != temp)
 923     emit_move_insn (op0, temp);
 924 }
 925 \f
 926 /* Store a bit field that is split across multiple accessible memory objects.
 927
 928    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 929    BITSIZE is the field width; BITPOS the position of its first bit
 930    (within the word).
 931    VALUE is the value to store.
 932
 933    This does not yet handle fields wider than BITS_PER_WORD.  */
 934
 935 static void
 936 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 937                        unsigned HOST_WIDE_INT bitpos, rtx value)
 938 {
 939   unsigned int unit;
 940   unsigned int bitsdone = 0;
 941
 942   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 943      much at a time.  */
 944   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 945     unit = BITS_PER_WORD;
 946   else
 947     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 948
 949   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 950      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 951      that VALUE might be a floating-point constant.  */
 952   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 953     {
 954       rtx word = gen_lowpart_common (word_mode, value);
 955
 956       if (word && (value != word))
 957         value = word;
 958       else
 959         value = gen_lowpart_common (word_mode,
 960                                     force_reg (GET_MODE (value) != VOIDmode
 961                                                ? GET_MODE (value)
 962                                                : word_mode, value));
 963     }
 964
 965   while (bitsdone < bitsize)
 966     {
 967       unsigned HOST_WIDE_INT thissize;
 968       rtx part, word;
 969       unsigned HOST_WIDE_INT thispos;
 970       unsigned HOST_WIDE_INT offset;
 971
 972       offset = (bitpos + bitsdone) / unit;
 973       thispos = (bitpos + bitsdone) % unit;
 974
 975       /* THISSIZE must not overrun a word boundary.  Otherwise,
 976          store_fixed_bit_field will call us again, and we will mutually
 977          recurse forever.  */
 978       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 979       thissize = MIN (thissize, unit - thispos);
 980
 981       if (BYTES_BIG_ENDIAN)
 982         {
 983           int total_bits;
 984
 985           /* We must do an endian conversion exactly the same way as it is
 986              done in extract_bit_field, so that the two calls to
 987              extract_fixed_bit_field will have comparable arguments.  */
 988           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
 989             total_bits = BITS_PER_WORD;
 990           else
 991             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 992
 993           /* Fetch successively less significant portions.  */
 994           if (GET_CODE (value) == CONST_INT)
 995             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 996                              >> (bitsize - bitsdone - thissize))
 997                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 998           else
 999             /* The args are chosen so that the last part includes the
1000                lsb.  Give extract_bit_field the value it needs (with
1001                endianness compensation) to fetch the piece we want.  */
1002             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1003                                             total_bits - bitsize + bitsdone,
1004                                             NULL_RTX, 1);
1005         }
1006       else
1007         {
1008           /* Fetch successively more significant portions.  */
1009           if (GET_CODE (value) == CONST_INT)
1010             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1011                              >> bitsdone)
1012                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1013           else
1014             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1015                                             bitsdone, NULL_RTX, 1);
1016         }
1017
1018       /* If OP0 is a register, then handle OFFSET here.
1019
1020          When handling multiword bitfields, extract_bit_field may pass
1021          down a word_mode SUBREG of a larger REG for a bitfield that actually
1022          crosses a word boundary.  Thus, for a SUBREG, we must find
1023          the current word starting from the base register.  */
1024       if (GET_CODE (op0) == SUBREG)
1025         {
1026           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1027           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1028                                         GET_MODE (SUBREG_REG (op0)));
1029           offset = 0;
1030         }
1031       else if (REG_P (op0))
1032         {
1033           word = operand_subword_force (op0, offset, GET_MODE (op0));
1034           offset = 0;
1035         }
1036       else
1037         word = op0;
1038
1039       /* OFFSET is in UNITs, and UNIT is in bits.
1040          store_fixed_bit_field wants offset in bytes.  */
1041       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1042                              thispos, part);
1043       bitsdone += thissize;
1044     }
1045 }
1046 \f
1047 /* Generate code to extract a byte-field from STR_RTX
1048    containing BITSIZE bits, starting at BITNUM,
1049    and put it in TARGET if possible (if TARGET is nonzero).
1050    Regardless of TARGET, we return the rtx for where the value is placed.
1051
1052    STR_RTX is the structure containing the byte (a REG or MEM).
1053    UNSIGNEDP is nonzero if this is an unsigned bit field.
1054    MODE is the natural mode of the field value once extracted.
1055    TMODE is the mode the caller would like the value to have;
1056    but the value may be returned with type MODE instead.
1057
1058    TOTAL_SIZE is the size in bytes of the containing structure,
1059    or -1 if varying.
1060
1061    If a TARGET is specified and we can store in it at no extra cost,
1062    we do so, and return TARGET.
1063    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1064    if they are equally easy.  */
1065
1066 rtx
1067 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1068                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1069                    enum machine_mode mode, enum machine_mode tmode)
1070 {
1071   unsigned int unit
1072     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1073   unsigned HOST_WIDE_INT offset, bitpos;
1074   rtx op0 = str_rtx;
1075   rtx spec_target = target;
1076   rtx spec_target_subreg = 0;
1077   enum machine_mode int_mode;
1078   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1079   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1080   enum machine_mode mode1;
1081   int byte_offset;
1082
1083   if (tmode == VOIDmode)
1084     tmode = mode;
1085
1086   while (GET_CODE (op0) == SUBREG)
1087     {
1088       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1089       op0 = SUBREG_REG (op0);
1090     }
1091
1092   /* If we have an out-of-bounds access to a register, just return an
1093      uninitialized register of the required mode.  This can occur if the
1094      source code contains an out-of-bounds access to a small array.  */
1095   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1096     return gen_reg_rtx (tmode);
1097
1098   if (REG_P (op0)
1099       && mode == GET_MODE (op0)
1100       && bitnum == 0
1101       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1102     {
1103       /* We're trying to extract a full register from itself.  */
1104       return op0;
1105     }
1106
1107   /* Use vec_extract patterns for extracting parts of vectors whenever
1108      available.  */
1109   if (VECTOR_MODE_P (GET_MODE (op0))
1110       && !MEM_P (op0)
1111       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1112           != CODE_FOR_nothing)
1113       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1114           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1115     {
1116       enum machine_mode outermode = GET_MODE (op0);
1117       enum machine_mode innermode = GET_MODE_INNER (outermode);
1118       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1119       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1120       rtx rtxpos = GEN_INT (pos);
1121       rtx src = op0;
1122       rtx dest = NULL, pat, seq;
1123       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1124       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1125       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1126
1127       if (innermode == tmode || innermode == mode)
1128         dest = target;
1129
1130       if (!dest)
1131         dest = gen_reg_rtx (innermode);
1132
1133       start_sequence ();
1134
1135       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1136         dest = copy_to_mode_reg (mode0, dest);
1137
1138       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1139         src = copy_to_mode_reg (mode1, src);
1140
1141       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1142         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1143
1144       /* We could handle this, but we should always be called with a pseudo
1145          for our targets and all insns should take them as outputs.  */
1146       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1147                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1148                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1149
1150       pat = GEN_FCN (icode) (dest, src, rtxpos);
1151       seq = get_insns ();
1152       end_sequence ();
1153       if (pat)
1154         {
1155           emit_insn (seq);
1156           emit_insn (pat);
1157           return dest;
1158         }
1159     }
1160
1161   /* Make sure we are playing with integral modes.  Pun with subregs
1162      if we aren't.  */
1163   {
1164     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1165     if (imode != GET_MODE (op0))
1166       {
1167         if (MEM_P (op0))
1168           op0 = adjust_address (op0, imode, 0);
1169         else
1170           {
1171             gcc_assert (imode != BLKmode);
1172             op0 = gen_lowpart (imode, op0);
1173
1174             /* If we got a SUBREG, force it into a register since we
1175                aren't going to be able to do another SUBREG on it.  */
1176             if (GET_CODE (op0) == SUBREG)
1177               op0 = force_reg (imode, op0);
1178           }
1179       }
1180   }
1181
1182   /* We may be accessing data outside the field, which means
1183      we can alias adjacent data.  */
1184   if (MEM_P (op0))
1185     {
1186       op0 = shallow_copy_rtx (op0);
1187       set_mem_alias_set (op0, 0);
1188       set_mem_expr (op0, 0);
1189     }
1190
1191   /* Extraction of a full-word or multi-word value from a structure
1192      in a register or aligned memory can be done with just a SUBREG.
1193      A subword value in the least significant part of a register
1194      can also be extracted with a SUBREG.  For this, we need the
1195      byte offset of the value in op0.  */
1196
1197   bitpos = bitnum % unit;
1198   offset = bitnum / unit;
1199   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1200
1201   /* If OP0 is a register, BITPOS must count within a word.
1202      But as we have it, it counts within whatever size OP0 now has.
1203      On a bigendian machine, these are not the same, so convert.  */
1204   if (BYTES_BIG_ENDIAN
1205       && !MEM_P (op0)
1206       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1207     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1208
1209   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1210      If that's wrong, the solution is to test for it and set TARGET to 0
1211      if needed.  */
1212
1213   /* Only scalar integer modes can be converted via subregs.  There is an
1214      additional problem for FP modes here in that they can have a precision
1215      which is different from the size.  mode_for_size uses precision, but
1216      we want a mode based on the size, so we must avoid calling it for FP
1217      modes.  */
1218   mode1  = (SCALAR_INT_MODE_P (tmode)
1219             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1220             : mode);
1221
1222   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1223         && bitpos % BITS_PER_WORD == 0)
1224        || (mode1 != BLKmode
1225            /* ??? The big endian test here is wrong.  This is correct
1226               if the value is in a register, and if mode_for_size is not
1227               the same mode as op0.  This causes us to get unnecessarily
1228               inefficient code from the Thumb port when -mbig-endian.  */
1229            && (BYTES_BIG_ENDIAN
1230                ? bitpos + bitsize == BITS_PER_WORD
1231                : bitpos == 0)))
1232       && ((!MEM_P (op0)
1233            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1234                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1235            && GET_MODE_SIZE (mode1) != 0
1236            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1237           || (MEM_P (op0)
1238               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1239                   || (offset * BITS_PER_UNIT % bitsize == 0
1240                       && MEM_ALIGN (op0) % bitsize == 0)))))
1241     {
1242       if (mode1 != GET_MODE (op0))
1243         {
1244           if (MEM_P (op0))
1245             op0 = adjust_address (op0, mode1, offset);
1246           else
1247             {
1248               rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1249                                              byte_offset);
1250               if (sub == NULL)
1251                 goto no_subreg_mode_swap;
1252               op0 = sub;
1253             }
1254         }
1255       if (mode1 != mode)
1256         return convert_to_mode (tmode, op0, unsignedp);
1257       return op0;
1258     }
1259  no_subreg_mode_swap:
1260
1261   /* Handle fields bigger than a word.  */
1262
1263   if (bitsize > BITS_PER_WORD)
1264     {
1265       /* Here we transfer the words of the field
1266          in the order least significant first.
1267          This is because the most significant word is the one which may
1268          be less than full.  */
1269
1270       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1271       unsigned int i;
1272
1273       if (target == 0 || !REG_P (target))
1274         target = gen_reg_rtx (mode);
1275
1276       /* Indicate for flow that the entire target reg is being set.  */
1277       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1278
1279       for (i = 0; i < nwords; i++)
1280         {
1281           /* If I is 0, use the low-order word in both field and target;
1282              if I is 1, use the next to lowest word; and so on.  */
1283           /* Word number in TARGET to use.  */
1284           unsigned int wordnum
1285             = (WORDS_BIG_ENDIAN
1286                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1287                : i);
1288           /* Offset from start of field in OP0.  */
1289           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1290                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1291                                                 * (int) BITS_PER_WORD))
1292                                      : (int) i * BITS_PER_WORD);
1293           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1294           rtx result_part
1295             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1296                                            bitsize - i * BITS_PER_WORD),
1297                                  bitnum + bit_offset, 1, target_part, mode,
1298                                  word_mode);
1299
1300           gcc_assert (target_part);
1301
1302           if (result_part != target_part)
1303             emit_move_insn (target_part, result_part);
1304         }
1305
1306       if (unsignedp)
1307         {
1308           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1309              need to be zero'd out.  */
1310           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1311             {
1312               unsigned int i, total_words;
1313
1314               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1315               for (i = nwords; i < total_words; i++)
1316                 emit_move_insn
1317                   (operand_subword (target,
1318                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1319                                     1, VOIDmode),
1320                    const0_rtx);
1321             }
1322           return target;
1323         }
1324
1325       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1326       target = expand_shift (LSHIFT_EXPR, mode, target,
1327                              build_int_cst (NULL_TREE,
1328                                             GET_MODE_BITSIZE (mode) - bitsize),
1329                              NULL_RTX, 0);
1330       return expand_shift (RSHIFT_EXPR, mode, target,
1331                            build_int_cst (NULL_TREE,
1332                                           GET_MODE_BITSIZE (mode) - bitsize),
1333                            NULL_RTX, 0);
1334     }
1335
1336   /* From here on we know the desired field is smaller than a word.  */
1337
1338   /* Check if there is a correspondingly-sized integer field, so we can
1339      safely extract it as one size of integer, if necessary; then
1340      truncate or extend to the size that is wanted; then use SUBREGs or
1341      convert_to_mode to get one of the modes we really wanted.  */
1342
1343   int_mode = int_mode_for_mode (tmode);
1344   if (int_mode == BLKmode)
1345     int_mode = int_mode_for_mode (mode);
1346   /* Should probably push op0 out to memory and then do a load.  */
1347   gcc_assert (int_mode != BLKmode);
1348
1349   /* OFFSET is the number of words or bytes (UNIT says which)
1350      from STR_RTX to the first word or byte containing part of the field.  */
1351   if (!MEM_P (op0))
1352     {
1353       if (offset != 0
1354           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1355         {
1356           if (!REG_P (op0))
1357             op0 = copy_to_reg (op0);
1358           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1359                                 op0, (offset * UNITS_PER_WORD));
1360         }
1361       offset = 0;
1362     }
1363
1364   /* Now OFFSET is nonzero only for memory operands.  */
1365
1366   if (unsignedp)
1367     {
1368       if (HAVE_extzv
1369           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1370           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1371                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1372         {
1373           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1374           rtx bitsize_rtx, bitpos_rtx;
1375           rtx last = get_last_insn ();
1376           rtx xop0 = op0;
1377           rtx xtarget = target;
1378           rtx xspec_target = spec_target;
1379           rtx xspec_target_subreg = spec_target_subreg;
1380           rtx pat;
1381           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1382
1383           if (MEM_P (xop0))
1384             {
1385               int save_volatile_ok = volatile_ok;
1386               volatile_ok = 1;
1387
1388               /* Is the memory operand acceptable?  */
1389               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1390                      (xop0, GET_MODE (xop0))))
1391                 {
1392                   /* No, load into a reg and extract from there.  */
1393                   enum machine_mode bestmode;
1394
1395                   /* Get the mode to use for inserting into this field.  If
1396                      OP0 is BLKmode, get the smallest mode consistent with the
1397                      alignment. If OP0 is a non-BLKmode object that is no
1398                      wider than MAXMODE, use its mode. Otherwise, use the
1399                      smallest mode containing the field.  */
1400
1401                   if (GET_MODE (xop0) == BLKmode
1402                       || (GET_MODE_SIZE (GET_MODE (op0))
1403                           > GET_MODE_SIZE (maxmode)))
1404                     bestmode = get_best_mode (bitsize, bitnum,
1405                                               MEM_ALIGN (xop0), maxmode,
1406                                               MEM_VOLATILE_P (xop0));
1407                   else
1408                     bestmode = GET_MODE (xop0);
1409
1410                   if (bestmode == VOIDmode
1411                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1412                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1413                     goto extzv_loses;
1414
1415                   /* Compute offset as multiple of this unit,
1416                      counting in bytes.  */
1417                   unit = GET_MODE_BITSIZE (bestmode);
1418                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1419                   xbitpos = bitnum % unit;
1420                   xop0 = adjust_address (xop0, bestmode, xoffset);
1421
1422                   /* Fetch it to a register in that size.  */
1423                   xop0 = force_reg (bestmode, xop0);
1424
1425                   /* XBITPOS counts within UNIT, which is what is expected.  */
1426                 }
1427               else
1428                 /* Get ref to first byte containing part of the field.  */
1429                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1430
1431               volatile_ok = save_volatile_ok;
1432             }
1433
1434           /* If op0 is a register, we need it in MAXMODE (which is usually
1435              SImode). to make it acceptable to the format of extzv.  */
1436           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1437             goto extzv_loses;
1438           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1439             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1440
1441           /* On big-endian machines, we count bits from the most significant.
1442              If the bit field insn does not, we must invert.  */
1443           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1444             xbitpos = unit - bitsize - xbitpos;
1445
1446           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1447           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1448             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1449
1450           unit = GET_MODE_BITSIZE (maxmode);
1451
1452           if (xtarget == 0
1453               || (flag_force_mem && MEM_P (xtarget)))
1454             xtarget = xspec_target = gen_reg_rtx (tmode);
1455
1456           if (GET_MODE (xtarget) != maxmode)
1457             {
1458               if (REG_P (xtarget))
1459                 {
1460                   int wider = (GET_MODE_SIZE (maxmode)
1461                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1462                   xtarget = gen_lowpart (maxmode, xtarget);
1463                   if (wider)
1464                     xspec_target_subreg = xtarget;
1465                 }
1466               else
1467                 xtarget = gen_reg_rtx (maxmode);
1468             }
1469
1470           /* If this machine's extzv insists on a register target,
1471              make sure we have one.  */
1472           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1473                  (xtarget, maxmode)))
1474             xtarget = gen_reg_rtx (maxmode);
1475
1476           bitsize_rtx = GEN_INT (bitsize);
1477           bitpos_rtx = GEN_INT (xbitpos);
1478
1479           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1480           if (pat)
1481             {
1482               emit_insn (pat);
1483               target = xtarget;
1484               spec_target = xspec_target;
1485               spec_target_subreg = xspec_target_subreg;
1486             }
1487           else
1488             {
1489               delete_insns_since (last);
1490               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1491                                                 bitpos, target, 1);
1492             }
1493         }
1494       else
1495       extzv_loses:
1496         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1497                                           bitpos, target, 1);
1498     }
1499   else
1500     {
1501       if (HAVE_extv
1502           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1503           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1504                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1505         {
1506           int xbitpos = bitpos, xoffset = offset;
1507           rtx bitsize_rtx, bitpos_rtx;
1508           rtx last = get_last_insn ();
1509           rtx xop0 = op0, xtarget = target;
1510           rtx xspec_target = spec_target;
1511           rtx xspec_target_subreg = spec_target_subreg;
1512           rtx pat;
1513           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1514
1515           if (MEM_P (xop0))
1516             {
1517               /* Is the memory operand acceptable?  */
1518               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1519                      (xop0, GET_MODE (xop0))))
1520                 {
1521                   /* No, load into a reg and extract from there.  */
1522                   enum machine_mode bestmode;
1523
1524                   /* Get the mode to use for inserting into this field.  If
1525                      OP0 is BLKmode, get the smallest mode consistent with the
1526                      alignment. If OP0 is a non-BLKmode object that is no
1527                      wider than MAXMODE, use its mode. Otherwise, use the
1528                      smallest mode containing the field.  */
1529
1530                   if (GET_MODE (xop0) == BLKmode
1531                       || (GET_MODE_SIZE (GET_MODE (op0))
1532                           > GET_MODE_SIZE (maxmode)))
1533                     bestmode = get_best_mode (bitsize, bitnum,
1534                                               MEM_ALIGN (xop0), maxmode,
1535                                               MEM_VOLATILE_P (xop0));
1536                   else
1537                     bestmode = GET_MODE (xop0);
1538
1539                   if (bestmode == VOIDmode
1540                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1541                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1542                     goto extv_loses;
1543
1544                   /* Compute offset as multiple of this unit,
1545                      counting in bytes.  */
1546                   unit = GET_MODE_BITSIZE (bestmode);
1547                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1548                   xbitpos = bitnum % unit;
1549                   xop0 = adjust_address (xop0, bestmode, xoffset);
1550
1551                   /* Fetch it to a register in that size.  */
1552                   xop0 = force_reg (bestmode, xop0);
1553
1554                   /* XBITPOS counts within UNIT, which is what is expected.  */
1555                 }
1556               else
1557                 /* Get ref to first byte containing part of the field.  */
1558                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1559             }
1560
1561           /* If op0 is a register, we need it in MAXMODE (which is usually
1562              SImode) to make it acceptable to the format of extv.  */
1563           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1564             goto extv_loses;
1565           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1566             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1567
1568           /* On big-endian machines, we count bits from the most significant.
1569              If the bit field insn does not, we must invert.  */
1570           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1571             xbitpos = unit - bitsize - xbitpos;
1572
1573           /* XBITPOS counts within a size of UNIT.
1574              Adjust to count within a size of MAXMODE.  */
1575           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1576             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1577
1578           unit = GET_MODE_BITSIZE (maxmode);
1579
1580           if (xtarget == 0
1581               || (flag_force_mem && MEM_P (xtarget)))
1582             xtarget = xspec_target = gen_reg_rtx (tmode);
1583
1584           if (GET_MODE (xtarget) != maxmode)
1585             {
1586               if (REG_P (xtarget))
1587                 {
1588                   int wider = (GET_MODE_SIZE (maxmode)
1589                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1590                   xtarget = gen_lowpart (maxmode, xtarget);
1591                   if (wider)
1592                     xspec_target_subreg = xtarget;
1593                 }
1594               else
1595                 xtarget = gen_reg_rtx (maxmode);
1596             }
1597
1598           /* If this machine's extv insists on a register target,
1599              make sure we have one.  */
1600           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1601                  (xtarget, maxmode)))
1602             xtarget = gen_reg_rtx (maxmode);
1603
1604           bitsize_rtx = GEN_INT (bitsize);
1605           bitpos_rtx = GEN_INT (xbitpos);
1606
1607           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1608           if (pat)
1609             {
1610               emit_insn (pat);
1611               target = xtarget;
1612               spec_target = xspec_target;
1613               spec_target_subreg = xspec_target_subreg;
1614             }
1615           else
1616             {
1617               delete_insns_since (last);
1618               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1619                                                 bitpos, target, 0);
1620             }
1621         }
1622       else
1623       extv_loses:
1624         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1625                                           bitpos, target, 0);
1626     }
1627   if (target == spec_target)
1628     return target;
1629   if (target == spec_target_subreg)
1630     return spec_target;
1631   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1632     {
1633       /* If the target mode is not a scalar integral, first convert to the
1634          integer mode of that size and then access it as a floating-point
1635          value via a SUBREG.  */
1636       if (!SCALAR_INT_MODE_P (tmode))
1637         {
1638           enum machine_mode smode
1639             = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1640           target = convert_to_mode (smode, target, unsignedp);
1641           target = force_reg (smode, target);
1642           return gen_lowpart (tmode, target);
1643         }
1644
1645       return convert_to_mode (tmode, target, unsignedp);
1646     }
1647   return target;
1648 }
1649 \f
1650 /* Extract a bit field using shifts and boolean operations
1651    Returns an rtx to represent the value.
1652    OP0 addresses a register (word) or memory (byte).
1653    BITPOS says which bit within the word or byte the bit field starts in.
1654    OFFSET says how many bytes farther the bit field starts;
1655     it is 0 if OP0 is a register.
1656    BITSIZE says how many bits long the bit field is.
1657     (If OP0 is a register, it may be narrower than a full word,
1658      but BITPOS still counts within a full word,
1659      which is significant on bigendian machines.)
1660
1661    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1662    If TARGET is nonzero, attempts to store the value there
1663    and return TARGET, but this is not guaranteed.
1664    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1665
1666 static rtx
1667 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1668                          unsigned HOST_WIDE_INT offset,
1669                          unsigned HOST_WIDE_INT bitsize,
1670                          unsigned HOST_WIDE_INT bitpos, rtx target,
1671                          int unsignedp)
1672 {
1673   unsigned int total_bits = BITS_PER_WORD;
1674   enum machine_mode mode;
1675
1676   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1677     {
1678       /* Special treatment for a bit field split across two registers.  */
1679       if (bitsize + bitpos > BITS_PER_WORD)
1680         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1681     }
1682   else
1683     {
1684       /* Get the proper mode to use for this field.  We want a mode that
1685          includes the entire field.  If such a mode would be larger than
1686          a word, we won't be doing the extraction the normal way.  */
1687
1688       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1689                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1690
1691       if (mode == VOIDmode)
1692         /* The only way this should occur is if the field spans word
1693            boundaries.  */
1694         return extract_split_bit_field (op0, bitsize,
1695                                         bitpos + offset * BITS_PER_UNIT,
1696                                         unsignedp);
1697
1698       total_bits = GET_MODE_BITSIZE (mode);
1699
1700       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1701          be in the range 0 to total_bits-1, and put any excess bytes in
1702          OFFSET.  */
1703       if (bitpos >= total_bits)
1704         {
1705           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1706           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1707                      * BITS_PER_UNIT);
1708         }
1709
1710       /* Get ref to an aligned byte, halfword, or word containing the field.
1711          Adjust BITPOS to be position within a word,
1712          and OFFSET to be the offset of that word.
1713          Then alter OP0 to refer to that word.  */
1714       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1715       offset -= (offset % (total_bits / BITS_PER_UNIT));
1716       op0 = adjust_address (op0, mode, offset);
1717     }
1718
1719   mode = GET_MODE (op0);
1720
1721   if (BYTES_BIG_ENDIAN)
1722     /* BITPOS is the distance between our msb and that of OP0.
1723        Convert it to the distance from the lsb.  */
1724     bitpos = total_bits - bitsize - bitpos;
1725
1726   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1727      We have reduced the big-endian case to the little-endian case.  */
1728
1729   if (unsignedp)
1730     {
1731       if (bitpos)
1732         {
1733           /* If the field does not already start at the lsb,
1734              shift it so it does.  */
1735           tree amount = build_int_cst (NULL_TREE, bitpos);
1736           /* Maybe propagate the target for the shift.  */
1737           /* But not if we will return it--could confuse integrate.c.  */
1738           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1739           if (tmode != mode) subtarget = 0;
1740           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1741         }
1742       /* Convert the value to the desired mode.  */
1743       if (mode != tmode)
1744         op0 = convert_to_mode (tmode, op0, 1);
1745
1746       /* Unless the msb of the field used to be the msb when we shifted,
1747          mask out the upper bits.  */
1748
1749       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1750         return expand_binop (GET_MODE (op0), and_optab, op0,
1751                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1752                              target, 1, OPTAB_LIB_WIDEN);
1753       return op0;
1754     }
1755
1756   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1757      then arithmetic-shift its lsb to the lsb of the word.  */
1758   op0 = force_reg (mode, op0);
1759   if (mode != tmode)
1760     target = 0;
1761
1762   /* Find the narrowest integer mode that contains the field.  */
1763
1764   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1765        mode = GET_MODE_WIDER_MODE (mode))
1766     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1767       {
1768         op0 = convert_to_mode (mode, op0, 0);
1769         break;
1770       }
1771
1772   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1773     {
1774       tree amount
1775         = build_int_cst (NULL_TREE,
1776                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1777       /* Maybe propagate the target for the shift.  */
1778       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1779       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1780     }
1781
1782   return expand_shift (RSHIFT_EXPR, mode, op0,
1783                        build_int_cst (NULL_TREE,
1784                                       GET_MODE_BITSIZE (mode) - bitsize),
1785                        target, 0);
1786 }
1787 \f
1788 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1789    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1790    complement of that if COMPLEMENT.  The mask is truncated if
1791    necessary to the width of mode MODE.  The mask is zero-extended if
1792    BITSIZE+BITPOS is too small for MODE.  */
1793
1794 static rtx
1795 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1796 {
1797   HOST_WIDE_INT masklow, maskhigh;
1798
1799   if (bitsize == 0)
1800     masklow = 0;
1801   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1802     masklow = (HOST_WIDE_INT) -1 << bitpos;
1803   else
1804     masklow = 0;
1805
1806   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1807     masklow &= ((unsigned HOST_WIDE_INT) -1
1808                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1809
1810   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1811     maskhigh = -1;
1812   else
1813     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1814
1815   if (bitsize == 0)
1816     maskhigh = 0;
1817   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1818     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1819                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1820   else
1821     maskhigh = 0;
1822
1823   if (complement)
1824     {
1825       maskhigh = ~maskhigh;
1826       masklow = ~masklow;
1827     }
1828
1829   return immed_double_const (masklow, maskhigh, mode);
1830 }
1831
1832 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1833    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1834
1835 static rtx
1836 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1837 {
1838   unsigned HOST_WIDE_INT v = INTVAL (value);
1839   HOST_WIDE_INT low, high;
1840
1841   if (bitsize < HOST_BITS_PER_WIDE_INT)
1842     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1843
1844   if (bitpos < HOST_BITS_PER_WIDE_INT)
1845     {
1846       low = v << bitpos;
1847       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1848     }
1849   else
1850     {
1851       low = 0;
1852       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1853     }
1854
1855   return immed_double_const (low, high, mode);
1856 }
1857 \f
1858 /* Extract a bit field from a memory by forcing the alignment of the
1859    memory.  This efficient only if the field spans at least 4 boundaries.
1860
1861    OP0 is the MEM.
1862    BITSIZE is the field width; BITPOS is the position of the first bit.
1863    UNSIGNEDP is true if the result should be zero-extended.  */
1864
1865 static rtx
1866 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1867                                    unsigned HOST_WIDE_INT bitpos,
1868                                    int unsignedp)
1869 {
1870   enum machine_mode mode, dmode;
1871   unsigned int m_bitsize, m_size;
1872   unsigned int sign_shift_up, sign_shift_dn;
1873   rtx base, a1, a2, v1, v2, comb, shift, result, start;
1874
1875   /* Choose a mode that will fit BITSIZE.  */
1876   mode = smallest_mode_for_size (bitsize, MODE_INT);
1877   m_size = GET_MODE_SIZE (mode);
1878   m_bitsize = GET_MODE_BITSIZE (mode);
1879
1880   /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1881   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1882   if (dmode == BLKmode)
1883     return NULL;
1884
1885   do_pending_stack_adjust ();
1886   start = get_last_insn ();
1887
1888   /* At the end, we'll need an additional shift to deal with sign/zero
1889      extension.  By default this will be a left+right shift of the
1890      appropriate size.  But we may be able to eliminate one of them.  */
1891   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1892
1893   if (STRICT_ALIGNMENT)
1894     {
1895       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1896       bitpos %= BITS_PER_UNIT;
1897
1898       /* We load two values to be concatenate.  There's an edge condition
1899          that bears notice -- an aligned value at the end of a page can
1900          only load one value lest we segfault.  So the two values we load
1901          are at "base & -size" and "(base + size - 1) & -size".  If base
1902          is unaligned, the addresses will be aligned and sequential; if
1903          base is aligned, the addresses will both be equal to base.  */
1904
1905       a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1906                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1907                                 NULL, true, OPTAB_LIB_WIDEN);
1908       mark_reg_pointer (a1, m_bitsize);
1909       v1 = gen_rtx_MEM (mode, a1);
1910       set_mem_align (v1, m_bitsize);
1911       v1 = force_reg (mode, validize_mem (v1));
1912
1913       a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1914       a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1915                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1916                                 NULL, true, OPTAB_LIB_WIDEN);
1917       v2 = gen_rtx_MEM (mode, a2);
1918       set_mem_align (v2, m_bitsize);
1919       v2 = force_reg (mode, validize_mem (v2));
1920
1921       /* Combine these two values into a double-word value.  */
1922       if (m_bitsize == BITS_PER_WORD)
1923         {
1924           comb = gen_reg_rtx (dmode);
1925           emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1926           emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1927           emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1928         }
1929       else
1930         {
1931           if (BYTES_BIG_ENDIAN)
1932             comb = v1, v1 = v2, v2 = comb;
1933           v1 = convert_modes (dmode, mode, v1, true);
1934           if (v1 == NULL)
1935             goto fail;
1936           v2 = convert_modes (dmode, mode, v2, true);
1937           v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1938                                     NULL, true, OPTAB_LIB_WIDEN);
1939           if (v2 == NULL)
1940             goto fail;
1941           comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1942                                       true, OPTAB_LIB_WIDEN);
1943           if (comb == NULL)
1944             goto fail;
1945         }
1946
1947       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1948                                    NULL, true, OPTAB_LIB_WIDEN);
1949       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1950
1951       if (bitpos != 0)
1952         {
1953           if (sign_shift_up <= bitpos)
1954             bitpos -= sign_shift_up, sign_shift_up = 0;
1955           shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1956                                        NULL, true, OPTAB_LIB_WIDEN);
1957         }
1958     }
1959   else
1960     {
1961       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1962       bitpos %= BITS_PER_UNIT;
1963
1964       /* When strict alignment is not required, we can just load directly
1965          from memory without masking.  If the remaining BITPOS offset is
1966          small enough, we may be able to do all operations in MODE as
1967          opposed to DMODE.  */
1968       if (bitpos + bitsize <= m_bitsize)
1969         dmode = mode;
1970       comb = adjust_address (op0, dmode, offset);
1971
1972       if (sign_shift_up <= bitpos)
1973         bitpos -= sign_shift_up, sign_shift_up = 0;
1974       shift = GEN_INT (bitpos);
1975     }
1976
1977   /* Shift down the double-word such that the requested value is at bit 0.  */
1978   if (shift != const0_rtx)
1979     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
1980                                 comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
1981   if (comb == NULL)
1982     goto fail;
1983
1984   /* If the field exactly matches MODE, then all we need to do is return the
1985      lowpart.  Otherwise, shift to get the sign bits set properly.  */
1986   result = force_reg (mode, gen_lowpart (mode, comb));
1987
1988   if (sign_shift_up)
1989     result = expand_simple_binop (mode, ASHIFT, result,
1990                                   GEN_INT (sign_shift_up),
1991                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1992   if (sign_shift_dn)
1993     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
1994                                   result, GEN_INT (sign_shift_dn),
1995                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1996
1997   return result;
1998
1999  fail:
2000   delete_insns_since (start);
2001   return NULL;
2002 }
2003
2004 /* Extract a bit field that is split across two words
2005    and return an RTX for the result.
2006
2007    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2008    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2009    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2010
2011 static rtx
2012 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2013                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
2014 {
2015   unsigned int unit;
2016   unsigned int bitsdone = 0;
2017   rtx result = NULL_RTX;
2018   int first = 1;
2019
2020   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2021      much at a time.  */
2022   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2023     unit = BITS_PER_WORD;
2024   else
2025     {
2026       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2027       if (0 && bitsize / unit > 2)
2028         {
2029           rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2030                                                        unsignedp);
2031           if (tmp)
2032             return tmp;
2033         }
2034     }
2035
2036   while (bitsdone < bitsize)
2037     {
2038       unsigned HOST_WIDE_INT thissize;
2039       rtx part, word;
2040       unsigned HOST_WIDE_INT thispos;
2041       unsigned HOST_WIDE_INT offset;
2042
2043       offset = (bitpos + bitsdone) / unit;
2044       thispos = (bitpos + bitsdone) % unit;
2045
2046       /* THISSIZE must not overrun a word boundary.  Otherwise,
2047          extract_fixed_bit_field will call us again, and we will mutually
2048          recurse forever.  */
2049       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2050       thissize = MIN (thissize, unit - thispos);
2051
2052       /* If OP0 is a register, then handle OFFSET here.
2053
2054          When handling multiword bitfields, extract_bit_field may pass
2055          down a word_mode SUBREG of a larger REG for a bitfield that actually
2056          crosses a word boundary.  Thus, for a SUBREG, we must find
2057          the current word starting from the base register.  */
2058       if (GET_CODE (op0) == SUBREG)
2059         {
2060           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2061           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2062                                         GET_MODE (SUBREG_REG (op0)));
2063           offset = 0;
2064         }
2065       else if (REG_P (op0))
2066         {
2067           word = operand_subword_force (op0, offset, GET_MODE (op0));
2068           offset = 0;
2069         }
2070       else
2071         word = op0;
2072
2073       /* Extract the parts in bit-counting order,
2074          whose meaning is determined by BYTES_PER_UNIT.
2075          OFFSET is in UNITs, and UNIT is in bits.
2076          extract_fixed_bit_field wants offset in bytes.  */
2077       part = extract_fixed_bit_field (word_mode, word,
2078                                       offset * unit / BITS_PER_UNIT,
2079                                       thissize, thispos, 0, 1);
2080       bitsdone += thissize;
2081
2082       /* Shift this part into place for the result.  */
2083       if (BYTES_BIG_ENDIAN)
2084         {
2085           if (bitsize != bitsdone)
2086             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2087                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
2088                                  0, 1);
2089         }
2090       else
2091         {
2092           if (bitsdone != thissize)
2093             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2094                                  build_int_cst (NULL_TREE,
2095                                                 bitsdone - thissize), 0, 1);
2096         }
2097
2098       if (first)
2099         result = part;
2100       else
2101         /* Combine the parts with bitwise or.  This works
2102            because we extracted each part as an unsigned bit field.  */
2103         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2104                                OPTAB_LIB_WIDEN);
2105
2106       first = 0;
2107     }
2108
2109   /* Unsigned bit field: we are done.  */
2110   if (unsignedp)
2111     return result;
2112   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2113   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2114                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2115                          NULL_RTX, 0);
2116   return expand_shift (RSHIFT_EXPR, word_mode, result,
2117                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2118                        NULL_RTX, 0);
2119 }
2120 \f
2121 /* Add INC into TARGET.  */
2122
2123 void
2124 expand_inc (rtx target, rtx inc)
2125 {
2126   rtx value = expand_binop (GET_MODE (target), add_optab,
2127                             target, inc,
2128                             target, 0, OPTAB_LIB_WIDEN);
2129   if (value != target)
2130     emit_move_insn (target, value);
2131 }
2132
2133 /* Subtract DEC from TARGET.  */
2134
2135 void
2136 expand_dec (rtx target, rtx dec)
2137 {
2138   rtx value = expand_binop (GET_MODE (target), sub_optab,
2139                             target, dec,
2140                             target, 0, OPTAB_LIB_WIDEN);
2141   if (value != target)
2142     emit_move_insn (target, value);
2143 }
2144 \f
2145 /* Output a shift instruction for expression code CODE,
2146    with SHIFTED being the rtx for the value to shift,
2147    and AMOUNT the tree for the amount to shift by.
2148    Store the result in the rtx TARGET, if that is convenient.
2149    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2150    Return the rtx for where the value is.  */
2151
2152 rtx
2153 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2154               tree amount, rtx target, int unsignedp)
2155 {
2156   rtx op1, temp = 0;
2157   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2158   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2159   int try;
2160
2161   /* Previously detected shift-counts computed by NEGATE_EXPR
2162      and shifted in the other direction; but that does not work
2163      on all machines.  */
2164
2165   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2166
2167   if (SHIFT_COUNT_TRUNCATED)
2168     {
2169       if (GET_CODE (op1) == CONST_INT
2170           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2171               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2172         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2173                        % GET_MODE_BITSIZE (mode));
2174       else if (GET_CODE (op1) == SUBREG
2175                && subreg_lowpart_p (op1))
2176         op1 = SUBREG_REG (op1);
2177     }
2178
2179   if (op1 == const0_rtx)
2180     return shifted;
2181
2182   /* Check whether its cheaper to implement a left shift by a constant
2183      bit count by a sequence of additions.  */
2184   if (code == LSHIFT_EXPR
2185       && GET_CODE (op1) == CONST_INT
2186       && INTVAL (op1) > 0
2187       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2188       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2189     {
2190       int i;
2191       for (i = 0; i < INTVAL (op1); i++)
2192         {
2193           temp = force_reg (mode, shifted);
2194           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2195                                   unsignedp, OPTAB_LIB_WIDEN);
2196         }
2197       return shifted;
2198     }
2199
2200   for (try = 0; temp == 0 && try < 3; try++)
2201     {
2202       enum optab_methods methods;
2203
2204       if (try == 0)
2205         methods = OPTAB_DIRECT;
2206       else if (try == 1)
2207         methods = OPTAB_WIDEN;
2208       else
2209         methods = OPTAB_LIB_WIDEN;
2210
2211       if (rotate)
2212         {
2213           /* Widening does not work for rotation.  */
2214           if (methods == OPTAB_WIDEN)
2215             continue;
2216           else if (methods == OPTAB_LIB_WIDEN)
2217             {
2218               /* If we have been unable to open-code this by a rotation,
2219                  do it as the IOR of two shifts.  I.e., to rotate A
2220                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2221                  where C is the bitsize of A.
2222
2223                  It is theoretically possible that the target machine might
2224                  not be able to perform either shift and hence we would
2225                  be making two libcalls rather than just the one for the
2226                  shift (similarly if IOR could not be done).  We will allow
2227                  this extremely unlikely lossage to avoid complicating the
2228                  code below.  */
2229
2230               rtx subtarget = target == shifted ? 0 : target;
2231               rtx temp1;
2232               tree type = TREE_TYPE (amount);
2233               tree new_amount = make_tree (type, op1);
2234               tree other_amount
2235                 = fold (build2 (MINUS_EXPR, type,
2236                                 build_int_cst (type, GET_MODE_BITSIZE (mode)),
2237                                 amount));
2238
2239               shifted = force_reg (mode, shifted);
2240
2241               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2242                                    mode, shifted, new_amount, subtarget, 1);
2243               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2244                                     mode, shifted, other_amount, 0, 1);
2245               return expand_binop (mode, ior_optab, temp, temp1, target,
2246                                    unsignedp, methods);
2247             }
2248
2249           temp = expand_binop (mode,
2250                                left ? rotl_optab : rotr_optab,
2251                                shifted, op1, target, unsignedp, methods);
2252
2253           /* If we don't have the rotate, but we are rotating by a constant
2254              that is in range, try a rotate in the opposite direction.  */
2255
2256           if (temp == 0 && GET_CODE (op1) == CONST_INT
2257               && INTVAL (op1) > 0
2258               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2259             temp = expand_binop (mode,
2260                                  left ? rotr_optab : rotl_optab,
2261                                  shifted,
2262                                  GEN_INT (GET_MODE_BITSIZE (mode)
2263                                           - INTVAL (op1)),
2264                                  target, unsignedp, methods);
2265         }
2266       else if (unsignedp)
2267         temp = expand_binop (mode,
2268                              left ? ashl_optab : lshr_optab,
2269                              shifted, op1, target, unsignedp, methods);
2270
2271       /* Do arithmetic shifts.
2272          Also, if we are going to widen the operand, we can just as well
2273          use an arithmetic right-shift instead of a logical one.  */
2274       if (temp == 0 && ! rotate
2275           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2276         {
2277           enum optab_methods methods1 = methods;
2278
2279           /* If trying to widen a log shift to an arithmetic shift,
2280              don't accept an arithmetic shift of the same size.  */
2281           if (unsignedp)
2282             methods1 = OPTAB_MUST_WIDEN;
2283
2284           /* Arithmetic shift */
2285
2286           temp = expand_binop (mode,
2287                                left ? ashl_optab : ashr_optab,
2288                                shifted, op1, target, unsignedp, methods1);
2289         }
2290
2291       /* We used to try extzv here for logical right shifts, but that was
2292          only useful for one machine, the VAX, and caused poor code
2293          generation there for lshrdi3, so the code was deleted and a
2294          define_expand for lshrsi3 was added to vax.md.  */
2295     }
2296
2297   gcc_assert (temp);
2298   return temp;
2299 }
2300 \f
2301 enum alg_code { alg_unknown, alg_zero, alg_m, alg_shift,
2302                   alg_add_t_m2, alg_sub_t_m2,
2303                   alg_add_factor, alg_sub_factor,
2304                   alg_add_t2_m, alg_sub_t2_m };
2305
2306 /* This structure holds the "cost" of a multiply sequence.  The
2307    "cost" field holds the total rtx_cost of every operator in the
2308    synthetic multiplication sequence, hence cost(a op b) is defined
2309    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2310    The "latency" field holds the minimum possible latency of the
2311    synthetic multiply, on a hypothetical infinitely parallel CPU.
2312    This is the critical path, or the maximum height, of the expression
2313    tree which is the sum of rtx_costs on the most expensive path from
2314    any leaf to the root.  Hence latency(a op b) is defined as zero for
2315    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2316
2317 struct mult_cost {
2318   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2319   short latency;  /* The latency of the multiplication sequence.  */
2320 };
2321
2322 /* This macro is used to compare a pointer to a mult_cost against an
2323    single integer "rtx_cost" value.  This is equivalent to the macro
2324    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2325 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2326                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2327
2328 /* This macro is used to compare two pointers to mult_costs against
2329    each other.  The macro returns true if X is cheaper than Y.
2330    Currently, the cheaper of two mult_costs is the one with the
2331    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2332 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2333                                  || ((X)->cost == (Y)->cost     \
2334                                      && (X)->latency < (Y)->latency))
2335
2336 /* This structure records a sequence of operations.
2337    `ops' is the number of operations recorded.
2338    `cost' is their total cost.
2339    The operations are stored in `op' and the corresponding
2340    logarithms of the integer coefficients in `log'.
2341
2342    These are the operations:
2343    alg_zero             total := 0;
2344    alg_m                total := multiplicand;
2345    alg_shift            total := total * coeff
2346    alg_add_t_m2         total := total + multiplicand * coeff;
2347    alg_sub_t_m2         total := total - multiplicand * coeff;
2348    alg_add_factor       total := total * coeff + total;
2349    alg_sub_factor       total := total * coeff - total;
2350    alg_add_t2_m         total := total * coeff + multiplicand;
2351    alg_sub_t2_m         total := total * coeff - multiplicand;
2352
2353    The first operand must be either alg_zero or alg_m.  */
2354
2355 struct algorithm
2356 {
2357   struct mult_cost cost;
2358   short ops;
2359   /* The size of the OP and LOG fields are not directly related to the
2360      word size, but the worst-case algorithms will be if we have few
2361      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2362      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2363      in total wordsize operations.  */
2364   enum alg_code op[MAX_BITS_PER_WORD];
2365   char log[MAX_BITS_PER_WORD];
2366 };
2367
2368 /* The entry for our multiplication cache/hash table.  */
2369 struct alg_hash_entry {
2370   /* The number we are multiplying by.  */
2371   unsigned int t;
2372
2373   /* The mode in which we are multiplying something by T.  */
2374   enum machine_mode mode;
2375
2376   /* The best multiplication algorithm for t.  */
2377   enum alg_code alg;
2378 };
2379
2380 /* The number of cache/hash entries.  */
2381 #define NUM_ALG_HASH_ENTRIES 307
2382
2383 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2384    actually a hash table.  If we have a collision, that the older
2385    entry is kicked out.  */
2386 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2387
2388 /* Indicates the type of fixup needed after a constant multiplication.
2389    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2390    the result should be negated, and ADD_VARIANT means that the
2391    multiplicand should be added to the result.  */
2392 enum mult_variant {basic_variant, negate_variant, add_variant};
2393
2394 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2395                         const struct mult_cost *, enum machine_mode mode);
2396 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2397                                  struct algorithm *, enum mult_variant *, int);
2398 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2399                               const struct algorithm *, enum mult_variant);
2400 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2401                                                  int, rtx *, int *, int *);
2402 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2403 static rtx extract_high_half (enum machine_mode, rtx);
2404 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2405 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2406                                        int, int);
2407 /* Compute and return the best algorithm for multiplying by T.
2408    The algorithm must cost less than cost_limit
2409    If retval.cost >= COST_LIMIT, no algorithm was found and all
2410    other field of the returned struct are undefined.
2411    MODE is the machine mode of the multiplication.  */
2412
2413 static void
2414 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2415             const struct mult_cost *cost_limit, enum machine_mode mode)
2416 {
2417   int m;
2418   struct algorithm *alg_in, *best_alg;
2419   struct mult_cost best_cost;
2420   struct mult_cost new_limit;
2421   int op_cost, op_latency;
2422   unsigned HOST_WIDE_INT q;
2423   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2424   int hash_index;
2425   bool cache_hit = false;
2426   enum alg_code cache_alg = alg_zero;
2427
2428   /* Indicate that no algorithm is yet found.  If no algorithm
2429      is found, this value will be returned and indicate failure.  */
2430   alg_out->cost.cost = cost_limit->cost + 1;
2431   alg_out->cost.latency = cost_limit->latency + 1;
2432
2433   if (cost_limit->cost < 0
2434       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2435     return;
2436
2437   /* Restrict the bits of "t" to the multiplication's mode.  */
2438   t &= GET_MODE_MASK (mode);
2439
2440   /* t == 1 can be done in zero cost.  */
2441   if (t == 1)
2442     {
2443       alg_out->ops = 1;
2444       alg_out->cost.cost = 0;
2445       alg_out->cost.latency = 0;
2446       alg_out->op[0] = alg_m;
2447       return;
2448     }
2449
2450   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2451      fail now.  */
2452   if (t == 0)
2453     {
2454       if (MULT_COST_LESS (cost_limit, zero_cost))
2455         return;
2456       else
2457         {
2458           alg_out->ops = 1;
2459           alg_out->cost.cost = zero_cost;
2460           alg_out->cost.latency = zero_cost;
2461           alg_out->op[0] = alg_zero;
2462           return;
2463         }
2464     }
2465
2466   /* We'll be needing a couple extra algorithm structures now.  */
2467
2468   alg_in = alloca (sizeof (struct algorithm));
2469   best_alg = alloca (sizeof (struct algorithm));
2470   best_cost = *cost_limit;
2471
2472   /* Compute the hash index.  */
2473   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2474
2475   /* See if we already know what to do for T.  */
2476   if (alg_hash[hash_index].t == t
2477       && alg_hash[hash_index].mode == mode
2478       && alg_hash[hash_index].alg != alg_unknown)
2479     {
2480       cache_hit = true;
2481       cache_alg = alg_hash[hash_index].alg;
2482       switch (cache_alg)
2483         {
2484         case alg_shift:
2485           goto do_alg_shift;
2486
2487         case alg_add_t_m2:
2488         case alg_sub_t_m2:
2489           goto do_alg_addsub_t_m2;
2490
2491         case alg_add_factor:
2492         case alg_sub_factor:
2493           goto do_alg_addsub_factor;
2494
2495         case alg_add_t2_m:
2496           goto do_alg_add_t2_m;
2497
2498         case alg_sub_t2_m:
2499           goto do_alg_sub_t2_m;
2500
2501         default:
2502           gcc_unreachable ();
2503         }
2504     }
2505
2506   /* If we have a group of zero bits at the low-order part of T, try
2507      multiplying by the remaining bits and then doing a shift.  */
2508
2509   if ((t & 1) == 0)
2510     {
2511     do_alg_shift:
2512       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2513       if (m < maxm)
2514         {
2515           q = t >> m;
2516           /* The function expand_shift will choose between a shift and
2517              a sequence of additions, so the observed cost is given as
2518              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2519           op_cost = m * add_cost[mode];
2520           if (shift_cost[mode][m] < op_cost)
2521             op_cost = shift_cost[mode][m];
2522           new_limit.cost = best_cost.cost - op_cost;
2523           new_limit.latency = best_cost.latency - op_cost;
2524           synth_mult (alg_in, q, &new_limit, mode);
2525
2526           alg_in->cost.cost += op_cost;
2527           alg_in->cost.latency += op_cost;
2528           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2529             {
2530               struct algorithm *x;
2531               best_cost = alg_in->cost;
2532               x = alg_in, alg_in = best_alg, best_alg = x;
2533               best_alg->log[best_alg->ops] = m;
2534               best_alg->op[best_alg->ops] = alg_shift;
2535             }
2536         }
2537       if (cache_hit)
2538         goto done;
2539     }
2540
2541   /* If we have an odd number, add or subtract one.  */
2542   if ((t & 1) != 0)
2543     {
2544       unsigned HOST_WIDE_INT w;
2545
2546     do_alg_addsub_t_m2:
2547       for (w = 1; (w & t) != 0; w <<= 1)
2548         ;
2549       /* If T was -1, then W will be zero after the loop.  This is another
2550          case where T ends with ...111.  Handling this with (T + 1) and
2551          subtract 1 produces slightly better code and results in algorithm
2552          selection much faster than treating it like the ...0111 case
2553          below.  */
2554       if (w == 0
2555           || (w > 2
2556               /* Reject the case where t is 3.
2557                  Thus we prefer addition in that case.  */
2558               && t != 3))
2559         {
2560           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2561
2562           op_cost = add_cost[mode];
2563           new_limit.cost = best_cost.cost - op_cost;
2564           new_limit.latency = best_cost.latency - op_cost;
2565           synth_mult (alg_in, t + 1, &new_limit, mode);
2566
2567           alg_in->cost.cost += op_cost;
2568           alg_in->cost.latency += op_cost;
2569           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2570             {
2571               struct algorithm *x;
2572               best_cost = alg_in->cost;
2573               x = alg_in, alg_in = best_alg, best_alg = x;
2574               best_alg->log[best_alg->ops] = 0;
2575               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2576             }
2577         }
2578       else
2579         {
2580           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2581
2582           op_cost = add_cost[mode];
2583           new_limit.cost = best_cost.cost - op_cost;
2584           new_limit.latency = best_cost.latency - op_cost;
2585           synth_mult (alg_in, t - 1, &new_limit, mode);
2586
2587           alg_in->cost.cost += op_cost;
2588           alg_in->cost.latency += op_cost;
2589           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2590             {
2591               struct algorithm *x;
2592               best_cost = alg_in->cost;
2593               x = alg_in, alg_in = best_alg, best_alg = x;
2594               best_alg->log[best_alg->ops] = 0;
2595               best_alg->op[best_alg->ops] = alg_add_t_m2;
2596             }
2597         }
2598       if (cache_hit)
2599         goto done;
2600     }
2601
2602   /* Look for factors of t of the form
2603      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2604      If we find such a factor, we can multiply by t using an algorithm that
2605      multiplies by q, shift the result by m and add/subtract it to itself.
2606
2607      We search for large factors first and loop down, even if large factors
2608      are less probable than small; if we find a large factor we will find a
2609      good sequence quickly, and therefore be able to prune (by decreasing
2610      COST_LIMIT) the search.  */
2611
2612  do_alg_addsub_factor:
2613   for (m = floor_log2 (t - 1); m >= 2; m--)
2614     {
2615       unsigned HOST_WIDE_INT d;
2616
2617       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2618       if (t % d == 0 && t > d && m < maxm
2619           && (!cache_hit || cache_alg == alg_add_factor))
2620         {
2621           /* If the target has a cheap shift-and-add instruction use
2622              that in preference to a shift insn followed by an add insn.
2623              Assume that the shift-and-add is "atomic" with a latency
2624              equal to its cost, otherwise assume that on superscalar
2625              hardware the shift may be executed concurrently with the
2626              earlier steps in the algorithm.  */
2627           op_cost = add_cost[mode] + shift_cost[mode][m];
2628           if (shiftadd_cost[mode][m] < op_cost)
2629             {
2630               op_cost = shiftadd_cost[mode][m];
2631               op_latency = op_cost;
2632             }
2633           else
2634             op_latency = add_cost[mode];
2635
2636           new_limit.cost = best_cost.cost - op_cost;
2637           new_limit.latency = best_cost.latency - op_latency;
2638           synth_mult (alg_in, t / d, &new_limit, mode);
2639
2640           alg_in->cost.cost += op_cost;
2641           alg_in->cost.latency += op_latency;
2642           if (alg_in->cost.latency < op_cost)
2643             alg_in->cost.latency = op_cost;
2644           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2645             {
2646               struct algorithm *x;
2647               best_cost = alg_in->cost;
2648               x = alg_in, alg_in = best_alg, best_alg = x;
2649               best_alg->log[best_alg->ops] = m;
2650               best_alg->op[best_alg->ops] = alg_add_factor;
2651             }
2652           /* Other factors will have been taken care of in the recursion.  */
2653           break;
2654         }
2655
2656       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2657       if (t % d == 0 && t > d && m < maxm
2658           && (!cache_hit || cache_alg == alg_sub_factor))
2659         {
2660           /* If the target has a cheap shift-and-subtract insn use
2661              that in preference to a shift insn followed by a sub insn.
2662              Assume that the shift-and-sub is "atomic" with a latency
2663              equal to it's cost, otherwise assume that on superscalar
2664              hardware the shift may be executed concurrently with the
2665              earlier steps in the algorithm.  */
2666           op_cost = add_cost[mode] + shift_cost[mode][m];
2667           if (shiftsub_cost[mode][m] < op_cost)
2668             {
2669               op_cost = shiftsub_cost[mode][m];
2670               op_latency = op_cost;
2671             }
2672           else
2673             op_latency = add_cost[mode];
2674
2675           new_limit.cost = best_cost.cost - op_cost;
2676           new_limit.latency = best_cost.latency - op_latency;
2677           synth_mult (alg_in, t / d, &new_limit, mode);
2678
2679           alg_in->cost.cost += op_cost;
2680           alg_in->cost.latency += op_latency;
2681           if (alg_in->cost.latency < op_cost)
2682             alg_in->cost.latency = op_cost;
2683           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2684             {
2685               struct algorithm *x;
2686               best_cost = alg_in->cost;
2687               x = alg_in, alg_in = best_alg, best_alg = x;
2688               best_alg->log[best_alg->ops] = m;
2689               best_alg->op[best_alg->ops] = alg_sub_factor;
2690             }
2691           break;
2692         }
2693     }
2694   if (cache_hit)
2695     goto done;
2696
2697   /* Try shift-and-add (load effective address) instructions,
2698      i.e. do a*3, a*5, a*9.  */
2699   if ((t & 1) != 0)
2700     {
2701     do_alg_add_t2_m:
2702       q = t - 1;
2703       q = q & -q;
2704       m = exact_log2 (q);
2705       if (m >= 0 && m < maxm)
2706         {
2707           op_cost = shiftadd_cost[mode][m];
2708           new_limit.cost = best_cost.cost - op_cost;
2709           new_limit.latency = best_cost.latency - op_cost;
2710           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2711
2712           alg_in->cost.cost += op_cost;
2713           alg_in->cost.latency += op_cost;
2714           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2715             {
2716               struct algorithm *x;
2717               best_cost = alg_in->cost;
2718               x = alg_in, alg_in = best_alg, best_alg = x;
2719               best_alg->log[best_alg->ops] = m;
2720               best_alg->op[best_alg->ops] = alg_add_t2_m;
2721             }
2722         }
2723       if (cache_hit)
2724         goto done;
2725
2726     do_alg_sub_t2_m:
2727       q = t + 1;
2728       q = q & -q;
2729       m = exact_log2 (q);
2730       if (m >= 0 && m < maxm)
2731         {
2732           op_cost = shiftsub_cost[mode][m];
2733           new_limit.cost = best_cost.cost - op_cost;
2734           new_limit.latency = best_cost.latency - op_cost;
2735           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2736
2737           alg_in->cost.cost += op_cost;
2738           alg_in->cost.latency += op_cost;
2739           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2740             {
2741               struct algorithm *x;
2742               best_cost = alg_in->cost;
2743               x = alg_in, alg_in = best_alg, best_alg = x;
2744               best_alg->log[best_alg->ops] = m;
2745               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2746             }
2747         }
2748       if (cache_hit)
2749         goto done;
2750     }
2751
2752  done:
2753   /* If best_cost has not decreased, we have not found any algorithm.  */
2754   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2755     return;
2756
2757   /* Cache the result.  */
2758   if (!cache_hit)
2759     {
2760       alg_hash[hash_index].t = t;
2761       alg_hash[hash_index].mode = mode;
2762       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2763     }
2764
2765   /* If we are getting a too long sequence for `struct algorithm'
2766      to record, make this search fail.  */
2767   if (best_alg->ops == MAX_BITS_PER_WORD)
2768     return;
2769
2770   /* Copy the algorithm from temporary space to the space at alg_out.
2771      We avoid using structure assignment because the majority of
2772      best_alg is normally undefined, and this is a critical function.  */
2773   alg_out->ops = best_alg->ops + 1;
2774   alg_out->cost = best_cost;
2775   memcpy (alg_out->op, best_alg->op,
2776           alg_out->ops * sizeof *alg_out->op);
2777   memcpy (alg_out->log, best_alg->log,
2778           alg_out->ops * sizeof *alg_out->log);
2779 }
2780 \f
2781 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2782    Try three variations:
2783
2784        - a shift/add sequence based on VAL itself
2785        - a shift/add sequence based on -VAL, followed by a negation
2786        - a shift/add sequence based on VAL - 1, followed by an addition.
2787
2788    Return true if the cheapest of these cost less than MULT_COST,
2789    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2790
2791 static bool
2792 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2793                      struct algorithm *alg, enum mult_variant *variant,
2794                      int mult_cost)
2795 {
2796   struct algorithm alg2;
2797   struct mult_cost limit;
2798   int op_cost;
2799
2800   *variant = basic_variant;
2801   limit.cost = mult_cost;
2802   limit.latency = mult_cost;
2803   synth_mult (alg, val, &limit, mode);
2804
2805   /* This works only if the inverted value actually fits in an
2806      `unsigned int' */
2807   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2808     {
2809       op_cost = neg_cost[mode];
2810       if (MULT_COST_LESS (&alg->cost, mult_cost))
2811         {
2812           limit.cost = alg->cost.cost - op_cost;
2813           limit.latency = alg->cost.latency - op_cost;
2814         }
2815       else
2816         {
2817           limit.cost = mult_cost - op_cost;
2818           limit.latency = mult_cost - op_cost;
2819         }
2820
2821       synth_mult (&alg2, -val, &limit, mode);
2822       alg2.cost.cost += op_cost;
2823       alg2.cost.latency += op_cost;
2824       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2825         *alg = alg2, *variant = negate_variant;
2826     }
2827
2828   /* This proves very useful for division-by-constant.  */
2829   op_cost = add_cost[mode];
2830   if (MULT_COST_LESS (&alg->cost, mult_cost))
2831     {
2832       limit.cost = alg->cost.cost - op_cost;
2833       limit.latency = alg->cost.latency - op_cost;
2834     }
2835   else
2836     {
2837       limit.cost = mult_cost - op_cost;
2838       limit.latency = mult_cost - op_cost;
2839     }
2840
2841   synth_mult (&alg2, val - 1, &limit, mode);
2842   alg2.cost.cost += op_cost;
2843   alg2.cost.latency += op_cost;
2844   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2845     *alg = alg2, *variant = add_variant;
2846
2847   return MULT_COST_LESS (&alg->cost, mult_cost);
2848 }
2849
2850 /* A subroutine of expand_mult, used for constant multiplications.
2851    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2852    convenient.  Use the shift/add sequence described by ALG and apply
2853    the final fixup specified by VARIANT.  */
2854
2855 static rtx
2856 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2857                    rtx target, const struct algorithm *alg,
2858                    enum mult_variant variant)
2859 {
2860   HOST_WIDE_INT val_so_far;
2861   rtx insn, accum, tem;
2862   int opno;
2863   enum machine_mode nmode;
2864
2865   /* Avoid referencing memory over and over.
2866      For speed, but also for correctness when mem is volatile.  */
2867   if (MEM_P (op0))
2868     op0 = force_reg (mode, op0);
2869
2870   /* ACCUM starts out either as OP0 or as a zero, depending on
2871      the first operation.  */
2872
2873   if (alg->op[0] == alg_zero)
2874     {
2875       accum = copy_to_mode_reg (mode, const0_rtx);
2876       val_so_far = 0;
2877     }
2878   else if (alg->op[0] == alg_m)
2879     {
2880       accum = copy_to_mode_reg (mode, op0);
2881       val_so_far = 1;
2882     }
2883   else
2884     gcc_unreachable ();
2885
2886   for (opno = 1; opno < alg->ops; opno++)
2887     {
2888       int log = alg->log[opno];
2889       rtx shift_subtarget = optimize ? 0 : accum;
2890       rtx add_target
2891         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2892            && !optimize)
2893           ? target : 0;
2894       rtx accum_target = optimize ? 0 : accum;
2895
2896       switch (alg->op[opno])
2897         {
2898         case alg_shift:
2899           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2900                                 build_int_cst (NULL_TREE, log),
2901                                 NULL_RTX, 0);
2902           val_so_far <<= log;
2903           break;
2904
2905         case alg_add_t_m2:
2906           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2907                               build_int_cst (NULL_TREE, log),
2908                               NULL_RTX, 0);
2909           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2910                                  add_target ? add_target : accum_target);
2911           val_so_far += (HOST_WIDE_INT) 1 << log;
2912           break;
2913
2914         case alg_sub_t_m2:
2915           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2916                               build_int_cst (NULL_TREE, log),
2917                               NULL_RTX, 0);
2918           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2919                                  add_target ? add_target : accum_target);
2920           val_so_far -= (HOST_WIDE_INT) 1 << log;
2921           break;
2922
2923         case alg_add_t2_m:
2924           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2925                                 build_int_cst (NULL_TREE, log),
2926                                 shift_subtarget,
2927                                 0);
2928           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2929                                  add_target ? add_target : accum_target);
2930           val_so_far = (val_so_far << log) + 1;
2931           break;
2932
2933         case alg_sub_t2_m:
2934           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2935                                 build_int_cst (NULL_TREE, log),
2936                                 shift_subtarget, 0);
2937           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2938                                  add_target ? add_target : accum_target);
2939           val_so_far = (val_so_far << log) - 1;
2940           break;
2941
2942         case alg_add_factor:
2943           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2944                               build_int_cst (NULL_TREE, log),
2945                               NULL_RTX, 0);
2946           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2947                                  add_target ? add_target : accum_target);
2948           val_so_far += val_so_far << log;
2949           break;
2950
2951         case alg_sub_factor:
2952           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2953                               build_int_cst (NULL_TREE, log),
2954                               NULL_RTX, 0);
2955           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2956                                  (add_target
2957                                   ? add_target : (optimize ? 0 : tem)));
2958           val_so_far = (val_so_far << log) - val_so_far;
2959           break;
2960
2961         default:
2962           gcc_unreachable ();
2963         }
2964
2965       /* Write a REG_EQUAL note on the last insn so that we can cse
2966          multiplication sequences.  Note that if ACCUM is a SUBREG,
2967          we've set the inner register and must properly indicate
2968          that.  */
2969
2970       tem = op0, nmode = mode;
2971       if (GET_CODE (accum) == SUBREG)
2972         {
2973           nmode = GET_MODE (SUBREG_REG (accum));
2974           tem = gen_lowpart (nmode, op0);
2975         }
2976
2977       insn = get_last_insn ();
2978       set_unique_reg_note (insn, REG_EQUAL,
2979                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
2980     }
2981
2982   if (variant == negate_variant)
2983     {
2984       val_so_far = -val_so_far;
2985       accum = expand_unop (mode, neg_optab, accum, target, 0);
2986     }
2987   else if (variant == add_variant)
2988     {
2989       val_so_far = val_so_far + 1;
2990       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2991     }
2992
2993   /* Compare only the bits of val and val_so_far that are significant
2994      in the result mode, to avoid sign-/zero-extension confusion.  */
2995   val &= GET_MODE_MASK (mode);
2996   val_so_far &= GET_MODE_MASK (mode);
2997   gcc_assert (val == val_so_far);
2998
2999   return accum;
3000 }
3001
3002 /* Perform a multiplication and return an rtx for the result.
3003    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3004    TARGET is a suggestion for where to store the result (an rtx).
3005
3006    We check specially for a constant integer as OP1.
3007    If you want this check for OP0 as well, then before calling
3008    you should swap the two operands if OP0 would be constant.  */
3009
3010 rtx
3011 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3012              int unsignedp)
3013 {
3014   enum mult_variant variant;
3015   struct algorithm algorithm;
3016   int max_cost;
3017
3018   /* Handling const0_rtx here allows us to use zero as a rogue value for
3019      coeff below.  */
3020   if (op1 == const0_rtx)
3021     return const0_rtx;
3022   if (op1 == const1_rtx)
3023     return op0;
3024   if (op1 == constm1_rtx)
3025     return expand_unop (mode,
3026                         GET_MODE_CLASS (mode) == MODE_INT
3027                         && !unsignedp && flag_trapv
3028                         ? negv_optab : neg_optab,
3029                         op0, target, 0);
3030
3031   /* These are the operations that are potentially turned into a sequence
3032      of shifts and additions.  */
3033   if (GET_MODE_CLASS (mode) == MODE_INT
3034       && (unsignedp || !flag_trapv))
3035     {
3036       HOST_WIDE_INT coeff = 0;
3037
3038       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3039          less than or equal in size to `unsigned int' this doesn't matter.
3040          If the mode is larger than `unsigned int', then synth_mult works
3041          only if the constant value exactly fits in an `unsigned int' without
3042          any truncation.  This means that multiplying by negative values does
3043          not work; results are off by 2^32 on a 32 bit machine.  */
3044
3045       if (GET_CODE (op1) == CONST_INT)
3046         {
3047           /* Attempt to handle multiplication of DImode values by negative
3048              coefficients, by performing the multiplication by a positive
3049              multiplier and then inverting the result.  */
3050           if (INTVAL (op1) < 0
3051               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3052             {
3053               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3054                  result is interpreted as an unsigned coefficient.  */
3055               max_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET)
3056                          - neg_cost[mode];
3057               if (max_cost > 0
3058                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3059                                           &variant, max_cost))
3060                 {
3061                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3062                                                 NULL_RTX, &algorithm,
3063                                                 variant);
3064                   return expand_unop (mode, neg_optab, temp, target, 0);
3065                 }
3066             }
3067           else coeff = INTVAL (op1);
3068         }
3069       else if (GET_CODE (op1) == CONST_DOUBLE)
3070         {
3071           /* If we are multiplying in DImode, it may still be a win
3072              to try to work with shifts and adds.  */
3073           if (CONST_DOUBLE_HIGH (op1) == 0)
3074             coeff = CONST_DOUBLE_LOW (op1);
3075           else if (CONST_DOUBLE_LOW (op1) == 0
3076                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3077             {
3078               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3079                           + HOST_BITS_PER_WIDE_INT;
3080               return expand_shift (LSHIFT_EXPR, mode, op0,
3081                                    build_int_cst (NULL_TREE, shift),
3082                                    target, unsignedp);
3083             }
3084         }
3085
3086       /* We used to test optimize here, on the grounds that it's better to
3087          produce a smaller program when -O is not used.  But this causes
3088          such a terrible slowdown sometimes that it seems better to always
3089          use synth_mult.  */
3090       if (coeff != 0)
3091         {
3092           /* Special case powers of two.  */
3093           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3094             return expand_shift (LSHIFT_EXPR, mode, op0,
3095                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3096                                  target, unsignedp);
3097
3098           max_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
3099           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3100                                    max_cost))
3101             return expand_mult_const (mode, op0, coeff, target,
3102                                       &algorithm, variant);
3103         }
3104     }
3105
3106   if (GET_CODE (op0) == CONST_DOUBLE)
3107     {
3108       rtx temp = op0;
3109       op0 = op1;
3110       op1 = temp;
3111     }
3112
3113   /* Expand x*2.0 as x+x.  */
3114   if (GET_CODE (op1) == CONST_DOUBLE
3115       && GET_MODE_CLASS (mode) == MODE_FLOAT)
3116     {
3117       REAL_VALUE_TYPE d;
3118       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3119
3120       if (REAL_VALUES_EQUAL (d, dconst2))
3121         {
3122           op0 = force_reg (GET_MODE (op0), op0);
3123           return expand_binop (mode, add_optab, op0, op0,
3124                                target, unsignedp, OPTAB_LIB_WIDEN);
3125         }
3126     }
3127
3128   /* This used to use umul_optab if unsigned, but for non-widening multiply
3129      there is no difference between signed and unsigned.  */
3130   op0 = expand_binop (mode,
3131                       ! unsignedp
3132                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3133                       ? smulv_optab : smul_optab,
3134                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3135   gcc_assert (op0);
3136   return op0;
3137 }
3138 \f
3139 /* Return the smallest n such that 2**n >= X.  */
3140
3141 int
3142 ceil_log2 (unsigned HOST_WIDE_INT x)
3143 {
3144   return floor_log2 (x - 1) + 1;
3145 }
3146
3147 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3148    replace division by D, and put the least significant N bits of the result
3149    in *MULTIPLIER_PTR and return the most significant bit.
3150
3151    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3152    needed precision is in PRECISION (should be <= N).
3153
3154    PRECISION should be as small as possible so this function can choose
3155    multiplier more freely.
3156
3157    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3158    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3159
3160    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3161    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3162
3163 static
3164 unsigned HOST_WIDE_INT
3165 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3166                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3167 {
3168   HOST_WIDE_INT mhigh_hi, mlow_hi;
3169   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3170   int lgup, post_shift;
3171   int pow, pow2;
3172   unsigned HOST_WIDE_INT nl, dummy1;
3173   HOST_WIDE_INT nh, dummy2;
3174
3175   /* lgup = ceil(log2(divisor)); */
3176   lgup = ceil_log2 (d);
3177
3178   gcc_assert (lgup <= n);
3179
3180   pow = n + lgup;
3181   pow2 = n + lgup - precision;
3182
3183   /* We could handle this with some effort, but this case is much
3184      better handled directly with a scc insn, so rely on caller using
3185      that.  */
3186   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3187
3188   /* mlow = 2^(N + lgup)/d */
3189  if (pow >= HOST_BITS_PER_WIDE_INT)
3190     {
3191       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3192       nl = 0;
3193     }
3194   else
3195     {
3196       nh = 0;
3197       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3198     }
3199   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3200                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3201
3202   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3203   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3204     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3205   else
3206     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3207   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3208                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3209
3210   gcc_assert (!mhigh_hi || nh - d < d);
3211   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3212   /* Assert that mlow < mhigh.  */
3213   gcc_assert (mlow_hi < mhigh_hi
3214               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3215
3216   /* If precision == N, then mlow, mhigh exceed 2^N
3217      (but they do not exceed 2^(N+1)).  */
3218
3219   /* Reduce to lowest terms.  */
3220   for (post_shift = lgup; post_shift > 0; post_shift--)
3221     {
3222       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3223       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3224       if (ml_lo >= mh_lo)
3225         break;
3226
3227       mlow_hi = 0;
3228       mlow_lo = ml_lo;
3229       mhigh_hi = 0;
3230       mhigh_lo = mh_lo;
3231     }
3232
3233   *post_shift_ptr = post_shift;
3234   *lgup_ptr = lgup;
3235   if (n < HOST_BITS_PER_WIDE_INT)
3236     {
3237       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3238       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3239       return mhigh_lo >= mask;
3240     }
3241   else
3242     {
3243       *multiplier_ptr = GEN_INT (mhigh_lo);
3244       return mhigh_hi;
3245     }
3246 }
3247
3248 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3249    congruent to 1 (mod 2**N).  */
3250
3251 static unsigned HOST_WIDE_INT
3252 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3253 {
3254   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3255
3256   /* The algorithm notes that the choice y = x satisfies
3257      x*y == 1 mod 2^3, since x is assumed odd.
3258      Each iteration doubles the number of bits of significance in y.  */
3259
3260   unsigned HOST_WIDE_INT mask;
3261   unsigned HOST_WIDE_INT y = x;
3262   int nbit = 3;
3263
3264   mask = (n == HOST_BITS_PER_WIDE_INT
3265           ? ~(unsigned HOST_WIDE_INT) 0
3266           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3267
3268   while (nbit < n)
3269     {
3270       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3271       nbit *= 2;
3272     }
3273   return y;
3274 }
3275
3276 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3277    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3278    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3279    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3280    become signed.
3281
3282    The result is put in TARGET if that is convenient.
3283
3284    MODE is the mode of operation.  */
3285
3286 rtx
3287 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3288                              rtx op1, rtx target, int unsignedp)
3289 {
3290   rtx tem;
3291   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3292
3293   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3294                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3295                       NULL_RTX, 0);
3296   tem = expand_and (mode, tem, op1, NULL_RTX);
3297   adj_operand
3298     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3299                      adj_operand);
3300
3301   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3302                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3303                       NULL_RTX, 0);
3304   tem = expand_and (mode, tem, op0, NULL_RTX);
3305   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3306                           target);
3307
3308   return target;
3309 }
3310
3311 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3312
3313 static rtx
3314 extract_high_half (enum machine_mode mode, rtx op)
3315 {
3316   enum machine_mode wider_mode;
3317
3318   if (mode == word_mode)
3319     return gen_highpart (mode, op);
3320
3321   wider_mode = GET_MODE_WIDER_MODE (mode);
3322   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3323                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3324   return convert_modes (mode, wider_mode, op, 0);
3325 }
3326
3327 /* Like expand_mult_highpart, but only consider using a multiplication
3328    optab.  OP1 is an rtx for the constant operand.  */
3329
3330 static rtx
3331 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3332                             rtx target, int unsignedp, int max_cost)
3333 {
3334   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3335   enum machine_mode wider_mode;
3336   optab moptab;
3337   rtx tem;
3338   int size;
3339
3340   wider_mode = GET_MODE_WIDER_MODE (mode);
3341   size = GET_MODE_BITSIZE (mode);
3342
3343   /* Firstly, try using a multiplication insn that only generates the needed
3344      high part of the product, and in the sign flavor of unsignedp.  */
3345   if (mul_highpart_cost[mode] < max_cost)
3346     {
3347       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3348       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3349                           unsignedp, OPTAB_DIRECT);
3350       if (tem)
3351         return tem;
3352     }
3353
3354   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3355      Need to adjust the result after the multiplication.  */
3356   if (size - 1 < BITS_PER_WORD
3357       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3358           + 4 * add_cost[mode] < max_cost))
3359     {
3360       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3361       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3362                           unsignedp, OPTAB_DIRECT);
3363       if (tem)
3364         /* We used the wrong signedness.  Adjust the result.  */
3365         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3366                                             tem, unsignedp);
3367     }
3368
3369   /* Try widening multiplication.  */
3370   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3371   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3372       && mul_widen_cost[wider_mode] < max_cost)
3373     {
3374       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3375                           unsignedp, OPTAB_WIDEN);
3376       if (tem)
3377         return extract_high_half (mode, tem);
3378     }
3379
3380   /* Try widening the mode and perform a non-widening multiplication.  */
3381   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3382       && size - 1 < BITS_PER_WORD
3383       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3384     {
3385       rtx insns, wop0, wop1;
3386
3387       /* We need to widen the operands, for example to ensure the
3388          constant multiplier is correctly sign or zero extended.
3389          Use a sequence to clean-up any instructions emitted by
3390          the conversions if things don't work out.  */
3391       start_sequence ();
3392       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3393       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3394       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3395                           unsignedp, OPTAB_WIDEN);
3396       insns = get_insns ();
3397       end_sequence ();
3398
3399       if (tem)
3400         {
3401           emit_insn (insns);
3402           return extract_high_half (mode, tem);
3403         }
3404     }
3405
3406   /* Try widening multiplication of opposite signedness, and adjust.  */
3407   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3408   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3409       && size - 1 < BITS_PER_WORD
3410       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3411           + 4 * add_cost[mode] < max_cost))
3412     {
3413       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3414                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3415       if (tem != 0)
3416         {
3417           tem = extract_high_half (mode, tem);
3418           /* We used the wrong signedness.  Adjust the result.  */
3419           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3420                                               target, unsignedp);
3421         }
3422     }
3423
3424   return 0;
3425 }
3426
3427 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3428    putting the high half of the result in TARGET if that is convenient,
3429    and return where the result is.  If the operation can not be performed,
3430    0 is returned.
3431
3432    MODE is the mode of operation and result.
3433
3434    UNSIGNEDP nonzero means unsigned multiply.
3435
3436    MAX_COST is the total allowed cost for the expanded RTL.  */
3437
3438 static rtx
3439 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3440                       rtx target, int unsignedp, int max_cost)
3441 {
3442   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3443   unsigned HOST_WIDE_INT cnst1;
3444   int extra_cost;
3445   bool sign_adjust = false;
3446   enum mult_variant variant;
3447   struct algorithm alg;
3448   rtx tem;
3449
3450   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3451   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3452
3453   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3454
3455   /* We can't optimize modes wider than BITS_PER_WORD.
3456      ??? We might be able to perform double-word arithmetic if
3457      mode == word_mode, however all the cost calculations in
3458      synth_mult etc. assume single-word operations.  */
3459   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3460     return expand_mult_highpart_optab (mode, op0, op1, target,
3461                                        unsignedp, max_cost);
3462
3463   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3464
3465   /* Check whether we try to multiply by a negative constant.  */
3466   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3467     {
3468       sign_adjust = true;
3469       extra_cost += add_cost[mode];
3470     }
3471
3472   /* See whether shift/add multiplication is cheap enough.  */
3473   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3474                            max_cost - extra_cost))
3475     {
3476       /* See whether the specialized multiplication optabs are
3477          cheaper than the shift/add version.  */
3478       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3479                                         alg.cost.cost + extra_cost);
3480       if (tem)
3481         return tem;
3482
3483       tem = convert_to_mode (wider_mode, op0, unsignedp);
3484       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3485       tem = extract_high_half (mode, tem);
3486
3487       /* Adjust result for signedness.  */
3488       if (sign_adjust)
3489         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3490
3491       return tem;
3492     }
3493   return expand_mult_highpart_optab (mode, op0, op1, target,
3494                                      unsignedp, max_cost);
3495 }
3496
3497
3498 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3499
3500 static rtx
3501 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3502 {
3503   unsigned HOST_WIDE_INT masklow, maskhigh;
3504   rtx result, temp, shift, label;
3505   int logd;
3506
3507   logd = floor_log2 (d);
3508   result = gen_reg_rtx (mode);
3509
3510   /* Avoid conditional branches when they're expensive.  */
3511   if (BRANCH_COST >= 2
3512       && !optimize_size)
3513     {
3514       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3515                                       mode, 0, -1);
3516       if (signmask)
3517         {
3518           signmask = force_reg (mode, signmask);
3519           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3520           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3521
3522           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3523              which instruction sequence to use.  If logical right shifts
3524              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3525              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3526
3527           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3528           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3529               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3530             {
3531               temp = expand_binop (mode, xor_optab, op0, signmask,
3532                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3533               temp = expand_binop (mode, sub_optab, temp, signmask,
3534                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3535               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3536                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3537               temp = expand_binop (mode, xor_optab, temp, signmask,
3538                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3539               temp = expand_binop (mode, sub_optab, temp, signmask,
3540                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3541             }
3542           else
3543             {
3544               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3545                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3546               signmask = force_reg (mode, signmask);
3547
3548               temp = expand_binop (mode, add_optab, op0, signmask,
3549                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3550               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3551                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3552               temp = expand_binop (mode, sub_optab, temp, signmask,
3553                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3554             }
3555           return temp;
3556         }
3557     }
3558
3559   /* Mask contains the mode's signbit and the significant bits of the
3560      modulus.  By including the signbit in the operation, many targets
3561      can avoid an explicit compare operation in the following comparison
3562      against zero.  */
3563
3564   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3565   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3566     {
3567       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3568       maskhigh = -1;
3569     }
3570   else
3571     maskhigh = (HOST_WIDE_INT) -1
3572                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3573
3574   temp = expand_binop (mode, and_optab, op0,
3575                        immed_double_const (masklow, maskhigh, mode),
3576                        result, 1, OPTAB_LIB_WIDEN);
3577   if (temp != result)
3578     emit_move_insn (result, temp);
3579
3580   label = gen_label_rtx ();
3581   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3582
3583   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3584                        0, OPTAB_LIB_WIDEN);
3585   masklow = (HOST_WIDE_INT) -1 << logd;
3586   maskhigh = -1;
3587   temp = expand_binop (mode, ior_optab, temp,
3588                        immed_double_const (masklow, maskhigh, mode),
3589                        result, 1, OPTAB_LIB_WIDEN);
3590   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3591                        0, OPTAB_LIB_WIDEN);
3592   if (temp != result)
3593     emit_move_insn (result, temp);
3594   emit_label (label);
3595   return result;
3596 }
3597
3598 /* Expand signed division of OP0 by a power of two D in mode MODE.
3599    This routine is only called for positive values of D.  */
3600
3601 static rtx
3602 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3603 {
3604   rtx temp, label;
3605   tree shift;
3606   int logd;
3607
3608   logd = floor_log2 (d);
3609   shift = build_int_cst (NULL_TREE, logd);
3610
3611   if (d == 2 && BRANCH_COST >= 1)
3612     {
3613       temp = gen_reg_rtx (mode);
3614       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3615       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3616                            0, OPTAB_LIB_WIDEN);
3617       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3618     }
3619
3620 #ifdef HAVE_conditional_move
3621   if (BRANCH_COST >= 2)
3622     {
3623       rtx temp2;
3624
3625       /* ??? emit_conditional_move forces a stack adjustment via
3626          compare_from_rtx so, if the sequence is discarded, it will
3627          be lost.  Do it now instead.  */
3628       do_pending_stack_adjust ();
3629
3630       start_sequence ();
3631       temp2 = copy_to_mode_reg (mode, op0);
3632       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3633                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3634       temp = force_reg (mode, temp);
3635
3636       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3637       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3638                                      mode, temp, temp2, mode, 0);
3639       if (temp2)
3640         {
3641           rtx seq = get_insns ();
3642           end_sequence ();
3643           emit_insn (seq);
3644           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3645         }
3646       end_sequence ();
3647     }
3648 #endif
3649
3650   if (BRANCH_COST >= 2)
3651     {
3652       int ushift = GET_MODE_BITSIZE (mode) - logd;
3653
3654       temp = gen_reg_rtx (mode);
3655       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3656       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3657         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3658                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3659       else
3660         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3661                              build_int_cst (NULL_TREE, ushift),
3662                              NULL_RTX, 1);
3663       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3664                            0, OPTAB_LIB_WIDEN);
3665       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3666     }
3667
3668   label = gen_label_rtx ();
3669   temp = copy_to_mode_reg (mode, op0);
3670   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3671   expand_inc (temp, GEN_INT (d - 1));
3672   emit_label (label);
3673   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3674 }
3675 \f
3676 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3677    if that is convenient, and returning where the result is.
3678    You may request either the quotient or the remainder as the result;
3679    specify REM_FLAG nonzero to get the remainder.
3680
3681    CODE is the expression code for which kind of division this is;
3682    it controls how rounding is done.  MODE is the machine mode to use.
3683    UNSIGNEDP nonzero means do unsigned division.  */
3684
3685 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3686    and then correct it by or'ing in missing high bits
3687    if result of ANDI is nonzero.
3688    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3689    This could optimize to a bfexts instruction.
3690    But C doesn't use these operations, so their optimizations are
3691    left for later.  */
3692 /* ??? For modulo, we don't actually need the highpart of the first product,
3693    the low part will do nicely.  And for small divisors, the second multiply
3694    can also be a low-part only multiply or even be completely left out.
3695    E.g. to calculate the remainder of a division by 3 with a 32 bit
3696    multiply, multiply with 0x55555556 and extract the upper two bits;
3697    the result is exact for inputs up to 0x1fffffff.
3698    The input range can be reduced by using cross-sum rules.
3699    For odd divisors >= 3, the following table gives right shift counts
3700    so that if a number is shifted by an integer multiple of the given
3701    amount, the remainder stays the same:
3702    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3703    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3704    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3705    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3706    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3707
3708    Cross-sum rules for even numbers can be derived by leaving as many bits
3709    to the right alone as the divisor has zeros to the right.
3710    E.g. if x is an unsigned 32 bit number:
3711    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3712    */
3713
3714 rtx
3715 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3716                rtx op0, rtx op1, rtx target, int unsignedp)
3717 {
3718   enum machine_mode compute_mode;
3719   rtx tquotient;
3720   rtx quotient = 0, remainder = 0;
3721   rtx last;
3722   int size;
3723   rtx insn, set;
3724   optab optab1, optab2;
3725   int op1_is_constant, op1_is_pow2 = 0;
3726   int max_cost, extra_cost;
3727   static HOST_WIDE_INT last_div_const = 0;
3728   static HOST_WIDE_INT ext_op1;
3729
3730   op1_is_constant = GET_CODE (op1) == CONST_INT;
3731   if (op1_is_constant)
3732     {
3733       ext_op1 = INTVAL (op1);
3734       if (unsignedp)
3735         ext_op1 &= GET_MODE_MASK (mode);
3736       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3737                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3738     }
3739
3740   /*
3741      This is the structure of expand_divmod:
3742
3743      First comes code to fix up the operands so we can perform the operations
3744      correctly and efficiently.
3745
3746      Second comes a switch statement with code specific for each rounding mode.
3747      For some special operands this code emits all RTL for the desired
3748      operation, for other cases, it generates only a quotient and stores it in
3749      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3750      to indicate that it has not done anything.
3751
3752      Last comes code that finishes the operation.  If QUOTIENT is set and
3753      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3754      QUOTIENT is not set, it is computed using trunc rounding.
3755
3756      We try to generate special code for division and remainder when OP1 is a
3757      constant.  If |OP1| = 2**n we can use shifts and some other fast
3758      operations.  For other values of OP1, we compute a carefully selected
3759      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3760      by m.
3761
3762      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3763      half of the product.  Different strategies for generating the product are
3764      implemented in expand_mult_highpart.
3765
3766      If what we actually want is the remainder, we generate that by another
3767      by-constant multiplication and a subtraction.  */
3768
3769   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3770      code below will malfunction if we are, so check here and handle
3771      the special case if so.  */
3772   if (op1 == const1_rtx)
3773     return rem_flag ? const0_rtx : op0;
3774
3775     /* When dividing by -1, we could get an overflow.
3776      negv_optab can handle overflows.  */
3777   if (! unsignedp && op1 == constm1_rtx)
3778     {
3779       if (rem_flag)
3780         return const0_rtx;
3781       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3782                           ? negv_optab : neg_optab, op0, target, 0);
3783     }
3784
3785   if (target
3786       /* Don't use the function value register as a target
3787          since we have to read it as well as write it,
3788          and function-inlining gets confused by this.  */
3789       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3790           /* Don't clobber an operand while doing a multi-step calculation.  */
3791           || ((rem_flag || op1_is_constant)
3792               && (reg_mentioned_p (target, op0)
3793                   || (MEM_P (op0) && MEM_P (target))))
3794           || reg_mentioned_p (target, op1)
3795           || (MEM_P (op1) && MEM_P (target))))
3796     target = 0;
3797
3798   /* Get the mode in which to perform this computation.  Normally it will
3799      be MODE, but sometimes we can't do the desired operation in MODE.
3800      If so, pick a wider mode in which we can do the operation.  Convert
3801      to that mode at the start to avoid repeated conversions.
3802
3803      First see what operations we need.  These depend on the expression
3804      we are evaluating.  (We assume that divxx3 insns exist under the
3805      same conditions that modxx3 insns and that these insns don't normally
3806      fail.  If these assumptions are not correct, we may generate less
3807      efficient code in some cases.)
3808
3809      Then see if we find a mode in which we can open-code that operation
3810      (either a division, modulus, or shift).  Finally, check for the smallest
3811      mode for which we can do the operation with a library call.  */
3812
3813   /* We might want to refine this now that we have division-by-constant
3814      optimization.  Since expand_mult_highpart tries so many variants, it is
3815      not straightforward to generalize this.  Maybe we should make an array
3816      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3817
3818   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3819             ? (unsignedp ? lshr_optab : ashr_optab)
3820             : (unsignedp ? udiv_optab : sdiv_optab));
3821   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3822             ? optab1
3823             : (unsignedp ? udivmod_optab : sdivmod_optab));
3824
3825   for (compute_mode = mode; compute_mode != VOIDmode;
3826        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3827     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3828         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3829       break;
3830
3831   if (compute_mode == VOIDmode)
3832     for (compute_mode = mode; compute_mode != VOIDmode;
3833          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3834       if (optab1->handlers[compute_mode].libfunc
3835           || optab2->handlers[compute_mode].libfunc)
3836         break;
3837
3838   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3839      in expand_binop.  */
3840   if (compute_mode == VOIDmode)
3841     compute_mode = mode;
3842
3843   if (target && GET_MODE (target) == compute_mode)
3844     tquotient = target;
3845   else
3846     tquotient = gen_reg_rtx (compute_mode);
3847
3848   size = GET_MODE_BITSIZE (compute_mode);
3849 #if 0
3850   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3851      (mode), and thereby get better code when OP1 is a constant.  Do that
3852      later.  It will require going over all usages of SIZE below.  */
3853   size = GET_MODE_BITSIZE (mode);
3854 #endif
3855
3856   /* Only deduct something for a REM if the last divide done was
3857      for a different constant.   Then set the constant of the last
3858      divide.  */
3859   max_cost = div_cost[compute_mode]
3860     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3861                       && INTVAL (op1) == last_div_const)
3862        ? mul_cost[compute_mode] + add_cost[compute_mode]
3863        : 0);
3864
3865   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3866
3867   /* Now convert to the best mode to use.  */
3868   if (compute_mode != mode)
3869     {
3870       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3871       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3872
3873       /* convert_modes may have placed op1 into a register, so we
3874          must recompute the following.  */
3875       op1_is_constant = GET_CODE (op1) == CONST_INT;
3876       op1_is_pow2 = (op1_is_constant
3877                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3878                           || (! unsignedp
3879                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3880     }
3881
3882   /* If one of the operands is a volatile MEM, copy it into a register.  */
3883
3884   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3885     op0 = force_reg (compute_mode, op0);
3886   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3887     op1 = force_reg (compute_mode, op1);
3888
3889   /* If we need the remainder or if OP1 is constant, we need to
3890      put OP0 in a register in case it has any queued subexpressions.  */
3891   if (rem_flag || op1_is_constant)
3892     op0 = force_reg (compute_mode, op0);
3893
3894   last = get_last_insn ();
3895
3896   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3897   if (unsignedp)
3898     {
3899       if (code == FLOOR_DIV_EXPR)
3900         code = TRUNC_DIV_EXPR;
3901       if (code == FLOOR_MOD_EXPR)
3902         code = TRUNC_MOD_EXPR;
3903       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3904         code = TRUNC_DIV_EXPR;
3905     }
3906
3907   if (op1 != const0_rtx)
3908     switch (code)
3909       {
3910       case TRUNC_MOD_EXPR:
3911       case TRUNC_DIV_EXPR:
3912         if (op1_is_constant)
3913           {
3914             if (unsignedp)
3915               {
3916                 unsigned HOST_WIDE_INT mh;
3917                 int pre_shift, post_shift;
3918                 int dummy;
3919                 rtx ml;
3920                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3921                                             & GET_MODE_MASK (compute_mode));
3922
3923                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3924                   {
3925                     pre_shift = floor_log2 (d);
3926                     if (rem_flag)
3927                       {
3928                         remainder
3929                           = expand_binop (compute_mode, and_optab, op0,
3930                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3931                                           remainder, 1,
3932                                           OPTAB_LIB_WIDEN);
3933                         if (remainder)
3934                           return gen_lowpart (mode, remainder);
3935                       }
3936                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3937                                              build_int_cst (NULL_TREE,
3938                                                             pre_shift),
3939                                              tquotient, 1);
3940                   }
3941                 else if (size <= HOST_BITS_PER_WIDE_INT)
3942                   {
3943                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3944                       {
3945                         /* Most significant bit of divisor is set; emit an scc
3946                            insn.  */
3947                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3948                                                     compute_mode, 1, 1);
3949                         if (quotient == 0)
3950                           goto fail1;
3951                       }
3952                     else
3953                       {
3954                         /* Find a suitable multiplier and right shift count
3955                            instead of multiplying with D.  */
3956
3957                         mh = choose_multiplier (d, size, size,
3958                                                 &ml, &post_shift, &dummy);
3959
3960                         /* If the suggested multiplier is more than SIZE bits,
3961                            we can do better for even divisors, using an
3962                            initial right shift.  */
3963                         if (mh != 0 && (d & 1) == 0)
3964                           {
3965                             pre_shift = floor_log2 (d & -d);
3966                             mh = choose_multiplier (d >> pre_shift, size,
3967                                                     size - pre_shift,
3968                                                     &ml, &post_shift, &dummy);
3969                             gcc_assert (!mh);
3970                           }
3971                         else
3972                           pre_shift = 0;
3973
3974                         if (mh != 0)
3975                           {
3976                             rtx t1, t2, t3, t4;
3977
3978                             if (post_shift - 1 >= BITS_PER_WORD)
3979                               goto fail1;
3980
3981                             extra_cost
3982                               = (shift_cost[compute_mode][post_shift - 1]
3983                                  + shift_cost[compute_mode][1]
3984                                  + 2 * add_cost[compute_mode]);
3985                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3986                                                        NULL_RTX, 1,
3987                                                        max_cost - extra_cost);
3988                             if (t1 == 0)
3989                               goto fail1;
3990                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3991                                                                op0, t1),
3992                                                 NULL_RTX);
3993                             t3 = expand_shift
3994                               (RSHIFT_EXPR, compute_mode, t2,
3995                                build_int_cst (NULL_TREE, 1),
3996                                NULL_RTX,1);
3997                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3998                                                               t1, t3),
3999                                                 NULL_RTX);
4000                             quotient = expand_shift
4001                               (RSHIFT_EXPR, compute_mode, t4,
4002                                build_int_cst (NULL_TREE, post_shift - 1),
4003                                tquotient, 1);
4004                           }
4005                         else
4006                           {
4007                             rtx t1, t2;
4008
4009                             if (pre_shift >= BITS_PER_WORD
4010                                 || post_shift >= BITS_PER_WORD)
4011                               goto fail1;
4012
4013                             t1 = expand_shift
4014                               (RSHIFT_EXPR, compute_mode, op0,
4015                                build_int_cst (NULL_TREE, pre_shift),
4016                                NULL_RTX, 1);
4017                             extra_cost
4018                               = (shift_cost[compute_mode][pre_shift]
4019                                  + shift_cost[compute_mode][post_shift]);
4020                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4021                                                        NULL_RTX, 1,
4022                                                        max_cost - extra_cost);
4023                             if (t2 == 0)
4024                               goto fail1;
4025                             quotient = expand_shift
4026                               (RSHIFT_EXPR, compute_mode, t2,
4027                                build_int_cst (NULL_TREE, post_shift),
4028                                tquotient, 1);
4029                           }
4030                       }
4031                   }
4032                 else            /* Too wide mode to use tricky code */
4033                   break;
4034
4035                 insn = get_last_insn ();
4036                 if (insn != last
4037                     && (set = single_set (insn)) != 0
4038                     && SET_DEST (set) == quotient)
4039                   set_unique_reg_note (insn,
4040                                        REG_EQUAL,
4041                                        gen_rtx_UDIV (compute_mode, op0, op1));
4042               }
4043             else                /* TRUNC_DIV, signed */
4044               {
4045                 unsigned HOST_WIDE_INT ml;
4046                 int lgup, post_shift;
4047                 rtx mlr;
4048                 HOST_WIDE_INT d = INTVAL (op1);
4049                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
4050
4051                 /* n rem d = n rem -d */
4052                 if (rem_flag && d < 0)
4053                   {
4054                     d = abs_d;
4055                     op1 = gen_int_mode (abs_d, compute_mode);
4056                   }
4057
4058                 if (d == 1)
4059                   quotient = op0;
4060                 else if (d == -1)
4061                   quotient = expand_unop (compute_mode, neg_optab, op0,
4062                                           tquotient, 0);
4063                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4064                   {
4065                     /* This case is not handled correctly below.  */
4066                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4067                                                 compute_mode, 1, 1);
4068                     if (quotient == 0)
4069                       goto fail1;
4070                   }
4071                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4072                          && (rem_flag ? smod_pow2_cheap[compute_mode]
4073                                       : sdiv_pow2_cheap[compute_mode])
4074                          /* We assume that cheap metric is true if the
4075                             optab has an expander for this mode.  */
4076                          && (((rem_flag ? smod_optab : sdiv_optab)
4077                               ->handlers[compute_mode].insn_code
4078                               != CODE_FOR_nothing)
4079                              || (sdivmod_optab->handlers[compute_mode]
4080                                  .insn_code != CODE_FOR_nothing)))
4081                   ;
4082                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4083                   {
4084                     if (rem_flag)
4085                       {
4086                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4087                         if (remainder)
4088                           return gen_lowpart (mode, remainder);
4089                       }
4090
4091                     if (sdiv_pow2_cheap[compute_mode]
4092                         && ((sdiv_optab->handlers[compute_mode].insn_code
4093                              != CODE_FOR_nothing)
4094                             || (sdivmod_optab->handlers[compute_mode].insn_code
4095                                 != CODE_FOR_nothing)))
4096                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4097                                                 compute_mode, op0,
4098                                                 gen_int_mode (abs_d,
4099                                                               compute_mode),
4100                                                 NULL_RTX, 0);
4101                     else
4102                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4103
4104                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4105                        negate the quotient.  */
4106                     if (d < 0)
4107                       {
4108                         insn = get_last_insn ();
4109                         if (insn != last
4110                             && (set = single_set (insn)) != 0
4111                             && SET_DEST (set) == quotient
4112                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4113                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4114                           set_unique_reg_note (insn,
4115                                                REG_EQUAL,
4116                                                gen_rtx_DIV (compute_mode,
4117                                                             op0,
4118                                                             GEN_INT
4119                                                             (trunc_int_for_mode
4120                                                              (abs_d,
4121                                                               compute_mode))));
4122
4123                         quotient = expand_unop (compute_mode, neg_optab,
4124                                                 quotient, quotient, 0);
4125                       }
4126                   }
4127                 else if (size <= HOST_BITS_PER_WIDE_INT)
4128                   {
4129                     choose_multiplier (abs_d, size, size - 1,
4130                                        &mlr, &post_shift, &lgup);
4131                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4132                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4133                       {
4134                         rtx t1, t2, t3;
4135
4136                         if (post_shift >= BITS_PER_WORD
4137                             || size - 1 >= BITS_PER_WORD)
4138                           goto fail1;
4139
4140                         extra_cost = (shift_cost[compute_mode][post_shift]
4141                                       + shift_cost[compute_mode][size - 1]
4142                                       + add_cost[compute_mode]);
4143                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4144                                                    NULL_RTX, 0,
4145                                                    max_cost - extra_cost);
4146                         if (t1 == 0)
4147                           goto fail1;
4148                         t2 = expand_shift
4149                           (RSHIFT_EXPR, compute_mode, t1,
4150                            build_int_cst (NULL_TREE, post_shift),
4151                            NULL_RTX, 0);
4152                         t3 = expand_shift
4153                           (RSHIFT_EXPR, compute_mode, op0,
4154                            build_int_cst (NULL_TREE, size - 1),
4155                            NULL_RTX, 0);
4156                         if (d < 0)
4157                           quotient
4158                             = force_operand (gen_rtx_MINUS (compute_mode,
4159                                                             t3, t2),
4160                                              tquotient);
4161                         else
4162                           quotient
4163                             = force_operand (gen_rtx_MINUS (compute_mode,
4164                                                             t2, t3),
4165                                              tquotient);
4166                       }
4167                     else
4168                       {
4169                         rtx t1, t2, t3, t4;
4170
4171                         if (post_shift >= BITS_PER_WORD
4172                             || size - 1 >= BITS_PER_WORD)
4173                           goto fail1;
4174
4175                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4176                         mlr = gen_int_mode (ml, compute_mode);
4177                         extra_cost = (shift_cost[compute_mode][post_shift]
4178                                       + shift_cost[compute_mode][size - 1]
4179                                       + 2 * add_cost[compute_mode]);
4180                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4181                                                    NULL_RTX, 0,
4182                                                    max_cost - extra_cost);
4183                         if (t1 == 0)
4184                           goto fail1;
4185                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4186                                                           t1, op0),
4187                                             NULL_RTX);
4188                         t3 = expand_shift
4189                           (RSHIFT_EXPR, compute_mode, t2,
4190                            build_int_cst (NULL_TREE, post_shift),
4191                            NULL_RTX, 0);
4192                         t4 = expand_shift
4193                           (RSHIFT_EXPR, compute_mode, op0,
4194                            build_int_cst (NULL_TREE, size - 1),
4195                            NULL_RTX, 0);
4196                         if (d < 0)
4197                           quotient
4198                             = force_operand (gen_rtx_MINUS (compute_mode,
4199                                                             t4, t3),
4200                                              tquotient);
4201                         else
4202                           quotient
4203                             = force_operand (gen_rtx_MINUS (compute_mode,
4204                                                             t3, t4),
4205                                              tquotient);
4206                       }
4207                   }
4208                 else            /* Too wide mode to use tricky code */
4209                   break;
4210
4211                 insn = get_last_insn ();
4212                 if (insn != last
4213                     && (set = single_set (insn)) != 0
4214                     && SET_DEST (set) == quotient)
4215                   set_unique_reg_note (insn,
4216                                        REG_EQUAL,
4217                                        gen_rtx_DIV (compute_mode, op0, op1));
4218               }
4219             break;
4220           }
4221       fail1:
4222         delete_insns_since (last);
4223         break;
4224
4225       case FLOOR_DIV_EXPR:
4226       case FLOOR_MOD_EXPR:
4227       /* We will come here only for signed operations.  */
4228         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4229           {
4230             unsigned HOST_WIDE_INT mh;
4231             int pre_shift, lgup, post_shift;
4232             HOST_WIDE_INT d = INTVAL (op1);
4233             rtx ml;
4234
4235             if (d > 0)
4236               {
4237                 /* We could just as easily deal with negative constants here,
4238                    but it does not seem worth the trouble for GCC 2.6.  */
4239                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4240                   {
4241                     pre_shift = floor_log2 (d);
4242                     if (rem_flag)
4243                       {
4244                         remainder = expand_binop (compute_mode, and_optab, op0,
4245                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4246                                                   remainder, 0, OPTAB_LIB_WIDEN);
4247                         if (remainder)
4248                           return gen_lowpart (mode, remainder);
4249                       }
4250                     quotient = expand_shift
4251                       (RSHIFT_EXPR, compute_mode, op0,
4252                        build_int_cst (NULL_TREE, pre_shift),
4253                        tquotient, 0);
4254                   }
4255                 else
4256                   {
4257                     rtx t1, t2, t3, t4;
4258
4259                     mh = choose_multiplier (d, size, size - 1,
4260                                             &ml, &post_shift, &lgup);
4261                     gcc_assert (!mh);
4262
4263                     if (post_shift < BITS_PER_WORD
4264                         && size - 1 < BITS_PER_WORD)
4265                       {
4266                         t1 = expand_shift
4267                           (RSHIFT_EXPR, compute_mode, op0,
4268                            build_int_cst (NULL_TREE, size - 1),
4269                            NULL_RTX, 0);
4270                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4271                                            NULL_RTX, 0, OPTAB_WIDEN);
4272                         extra_cost = (shift_cost[compute_mode][post_shift]
4273                                       + shift_cost[compute_mode][size - 1]
4274                                       + 2 * add_cost[compute_mode]);
4275                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4276                                                    NULL_RTX, 1,
4277                                                    max_cost - extra_cost);
4278                         if (t3 != 0)
4279                           {
4280                             t4 = expand_shift
4281                               (RSHIFT_EXPR, compute_mode, t3,
4282                                build_int_cst (NULL_TREE, post_shift),
4283                                NULL_RTX, 1);
4284                             quotient = expand_binop (compute_mode, xor_optab,
4285                                                      t4, t1, tquotient, 0,
4286                                                      OPTAB_WIDEN);
4287                           }
4288                       }
4289                   }
4290               }
4291             else
4292               {
4293                 rtx nsign, t1, t2, t3, t4;
4294                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4295                                                   op0, constm1_rtx), NULL_RTX);
4296                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4297                                    0, OPTAB_WIDEN);
4298                 nsign = expand_shift
4299                   (RSHIFT_EXPR, compute_mode, t2,
4300                    build_int_cst (NULL_TREE, size - 1),
4301                    NULL_RTX, 0);
4302                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4303                                     NULL_RTX);
4304                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4305                                     NULL_RTX, 0);
4306                 if (t4)
4307                   {
4308                     rtx t5;
4309                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4310                                       NULL_RTX, 0);
4311                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4312                                                             t4, t5),
4313                                               tquotient);
4314                   }
4315               }
4316           }
4317
4318         if (quotient != 0)
4319           break;
4320         delete_insns_since (last);
4321
4322         /* Try using an instruction that produces both the quotient and
4323            remainder, using truncation.  We can easily compensate the quotient
4324            or remainder to get floor rounding, once we have the remainder.
4325            Notice that we compute also the final remainder value here,
4326            and return the result right away.  */
4327         if (target == 0 || GET_MODE (target) != compute_mode)
4328           target = gen_reg_rtx (compute_mode);
4329
4330         if (rem_flag)
4331           {
4332             remainder
4333               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4334             quotient = gen_reg_rtx (compute_mode);
4335           }
4336         else
4337           {
4338             quotient
4339               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4340             remainder = gen_reg_rtx (compute_mode);
4341           }
4342
4343         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4344                                  quotient, remainder, 0))
4345           {
4346             /* This could be computed with a branch-less sequence.
4347                Save that for later.  */
4348             rtx tem;
4349             rtx label = gen_label_rtx ();
4350             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4351             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4352                                 NULL_RTX, 0, OPTAB_WIDEN);
4353             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4354             expand_dec (quotient, const1_rtx);
4355             expand_inc (remainder, op1);
4356             emit_label (label);
4357             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4358           }
4359
4360         /* No luck with division elimination or divmod.  Have to do it
4361            by conditionally adjusting op0 *and* the result.  */
4362         {
4363           rtx label1, label2, label3, label4, label5;
4364           rtx adjusted_op0;
4365           rtx tem;
4366
4367           quotient = gen_reg_rtx (compute_mode);
4368           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4369           label1 = gen_label_rtx ();
4370           label2 = gen_label_rtx ();
4371           label3 = gen_label_rtx ();
4372           label4 = gen_label_rtx ();
4373           label5 = gen_label_rtx ();
4374           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4375           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4376           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4377                               quotient, 0, OPTAB_LIB_WIDEN);
4378           if (tem != quotient)
4379             emit_move_insn (quotient, tem);
4380           emit_jump_insn (gen_jump (label5));
4381           emit_barrier ();
4382           emit_label (label1);
4383           expand_inc (adjusted_op0, const1_rtx);
4384           emit_jump_insn (gen_jump (label4));
4385           emit_barrier ();
4386           emit_label (label2);
4387           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4388           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4389                               quotient, 0, OPTAB_LIB_WIDEN);
4390           if (tem != quotient)
4391             emit_move_insn (quotient, tem);
4392           emit_jump_insn (gen_jump (label5));
4393           emit_barrier ();
4394           emit_label (label3);
4395           expand_dec (adjusted_op0, const1_rtx);
4396           emit_label (label4);
4397           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4398                               quotient, 0, OPTAB_LIB_WIDEN);
4399           if (tem != quotient)
4400             emit_move_insn (quotient, tem);
4401           expand_dec (quotient, const1_rtx);
4402           emit_label (label5);
4403         }
4404         break;
4405
4406       case CEIL_DIV_EXPR:
4407       case CEIL_MOD_EXPR:
4408         if (unsignedp)
4409           {
4410             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4411               {
4412                 rtx t1, t2, t3;
4413                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4414                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4415                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4416                                    tquotient, 1);
4417                 t2 = expand_binop (compute_mode, and_optab, op0,
4418                                    GEN_INT (d - 1),
4419                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4420                 t3 = gen_reg_rtx (compute_mode);
4421                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4422                                       compute_mode, 1, 1);
4423                 if (t3 == 0)
4424                   {
4425                     rtx lab;
4426                     lab = gen_label_rtx ();
4427                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4428                     expand_inc (t1, const1_rtx);
4429                     emit_label (lab);
4430                     quotient = t1;
4431                   }
4432                 else
4433                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4434                                                           t1, t3),
4435                                             tquotient);
4436                 break;
4437               }
4438
4439             /* Try using an instruction that produces both the quotient and
4440                remainder, using truncation.  We can easily compensate the
4441                quotient or remainder to get ceiling rounding, once we have the
4442                remainder.  Notice that we compute also the final remainder
4443                value here, and return the result right away.  */
4444             if (target == 0 || GET_MODE (target) != compute_mode)
4445               target = gen_reg_rtx (compute_mode);
4446
4447             if (rem_flag)
4448               {
4449                 remainder = (REG_P (target)
4450                              ? target : gen_reg_rtx (compute_mode));
4451                 quotient = gen_reg_rtx (compute_mode);
4452               }
4453             else
4454               {
4455                 quotient = (REG_P (target)
4456                             ? target : gen_reg_rtx (compute_mode));
4457                 remainder = gen_reg_rtx (compute_mode);
4458               }
4459
4460             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4461                                      remainder, 1))
4462               {
4463                 /* This could be computed with a branch-less sequence.
4464                    Save that for later.  */
4465                 rtx label = gen_label_rtx ();
4466                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4467                                  compute_mode, label);
4468                 expand_inc (quotient, const1_rtx);
4469                 expand_dec (remainder, op1);
4470                 emit_label (label);
4471                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4472               }
4473
4474             /* No luck with division elimination or divmod.  Have to do it
4475                by conditionally adjusting op0 *and* the result.  */
4476             {
4477               rtx label1, label2;
4478               rtx adjusted_op0, tem;
4479
4480               quotient = gen_reg_rtx (compute_mode);
4481               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4482               label1 = gen_label_rtx ();
4483               label2 = gen_label_rtx ();
4484               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4485                                compute_mode, label1);
4486               emit_move_insn  (quotient, const0_rtx);
4487               emit_jump_insn (gen_jump (label2));
4488               emit_barrier ();
4489               emit_label (label1);
4490               expand_dec (adjusted_op0, const1_rtx);
4491               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4492                                   quotient, 1, OPTAB_LIB_WIDEN);
4493               if (tem != quotient)
4494                 emit_move_insn (quotient, tem);
4495               expand_inc (quotient, const1_rtx);
4496               emit_label (label2);
4497             }
4498           }
4499         else /* signed */
4500           {
4501             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4502                 && INTVAL (op1) >= 0)
4503               {
4504                 /* This is extremely similar to the code for the unsigned case
4505                    above.  For 2.7 we should merge these variants, but for
4506                    2.6.1 I don't want to touch the code for unsigned since that
4507                    get used in C.  The signed case will only be used by other
4508                    languages (Ada).  */
4509
4510                 rtx t1, t2, t3;
4511                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4512                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4513                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4514                                    tquotient, 0);
4515                 t2 = expand_binop (compute_mode, and_optab, op0,
4516                                    GEN_INT (d - 1),
4517                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4518                 t3 = gen_reg_rtx (compute_mode);
4519                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4520                                       compute_mode, 1, 1);
4521                 if (t3 == 0)
4522                   {
4523                     rtx lab;
4524                     lab = gen_label_rtx ();
4525                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4526                     expand_inc (t1, const1_rtx);
4527                     emit_label (lab);
4528                     quotient = t1;
4529                   }
4530                 else
4531                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4532                                                           t1, t3),
4533                                             tquotient);
4534                 break;
4535               }
4536
4537             /* Try using an instruction that produces both the quotient and
4538                remainder, using truncation.  We can easily compensate the
4539                quotient or remainder to get ceiling rounding, once we have the
4540                remainder.  Notice that we compute also the final remainder
4541                value here, and return the result right away.  */
4542             if (target == 0 || GET_MODE (target) != compute_mode)
4543               target = gen_reg_rtx (compute_mode);
4544             if (rem_flag)
4545               {
4546                 remainder= (REG_P (target)
4547                             ? target : gen_reg_rtx (compute_mode));
4548                 quotient = gen_reg_rtx (compute_mode);
4549               }
4550             else
4551               {
4552                 quotient = (REG_P (target)
4553                             ? target : gen_reg_rtx (compute_mode));
4554                 remainder = gen_reg_rtx (compute_mode);
4555               }
4556
4557             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4558                                      remainder, 0))
4559               {
4560                 /* This could be computed with a branch-less sequence.
4561                    Save that for later.  */
4562                 rtx tem;
4563                 rtx label = gen_label_rtx ();
4564                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4565                                  compute_mode, label);
4566                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4567                                     NULL_RTX, 0, OPTAB_WIDEN);
4568                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4569                 expand_inc (quotient, const1_rtx);
4570                 expand_dec (remainder, op1);
4571                 emit_label (label);
4572                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4573               }
4574
4575             /* No luck with division elimination or divmod.  Have to do it
4576                by conditionally adjusting op0 *and* the result.  */
4577             {
4578               rtx label1, label2, label3, label4, label5;
4579               rtx adjusted_op0;
4580               rtx tem;
4581
4582               quotient = gen_reg_rtx (compute_mode);
4583               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4584               label1 = gen_label_rtx ();
4585               label2 = gen_label_rtx ();
4586               label3 = gen_label_rtx ();
4587               label4 = gen_label_rtx ();
4588               label5 = gen_label_rtx ();
4589               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4590               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4591                                compute_mode, label1);
4592               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4593                                   quotient, 0, OPTAB_LIB_WIDEN);
4594               if (tem != quotient)
4595                 emit_move_insn (quotient, tem);
4596               emit_jump_insn (gen_jump (label5));
4597               emit_barrier ();
4598               emit_label (label1);
4599               expand_dec (adjusted_op0, const1_rtx);
4600               emit_jump_insn (gen_jump (label4));
4601               emit_barrier ();
4602               emit_label (label2);
4603               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4604                                compute_mode, label3);
4605               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4606                                   quotient, 0, OPTAB_LIB_WIDEN);
4607               if (tem != quotient)
4608                 emit_move_insn (quotient, tem);
4609               emit_jump_insn (gen_jump (label5));
4610               emit_barrier ();
4611               emit_label (label3);
4612               expand_inc (adjusted_op0, const1_rtx);
4613               emit_label (label4);
4614               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4615                                   quotient, 0, OPTAB_LIB_WIDEN);
4616               if (tem != quotient)
4617                 emit_move_insn (quotient, tem);
4618               expand_inc (quotient, const1_rtx);
4619               emit_label (label5);
4620             }
4621           }
4622         break;
4623
4624       case EXACT_DIV_EXPR:
4625         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4626           {
4627             HOST_WIDE_INT d = INTVAL (op1);
4628             unsigned HOST_WIDE_INT ml;
4629             int pre_shift;
4630             rtx t1;
4631
4632             pre_shift = floor_log2 (d & -d);
4633             ml = invert_mod2n (d >> pre_shift, size);
4634             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4635                                build_int_cst (NULL_TREE, pre_shift),
4636                                NULL_RTX, unsignedp);
4637             quotient = expand_mult (compute_mode, t1,
4638                                     gen_int_mode (ml, compute_mode),
4639                                     NULL_RTX, 1);
4640
4641             insn = get_last_insn ();
4642             set_unique_reg_note (insn,
4643                                  REG_EQUAL,
4644                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4645                                                  compute_mode,
4646                                                  op0, op1));
4647           }
4648         break;
4649
4650       case ROUND_DIV_EXPR:
4651       case ROUND_MOD_EXPR:
4652         if (unsignedp)
4653           {
4654             rtx tem;
4655             rtx label;
4656             label = gen_label_rtx ();
4657             quotient = gen_reg_rtx (compute_mode);
4658             remainder = gen_reg_rtx (compute_mode);
4659             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4660               {
4661                 rtx tem;
4662                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4663                                          quotient, 1, OPTAB_LIB_WIDEN);
4664                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4665                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4666                                           remainder, 1, OPTAB_LIB_WIDEN);
4667               }
4668             tem = plus_constant (op1, -1);
4669             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4670                                 build_int_cst (NULL_TREE, 1),
4671                                 NULL_RTX, 1);
4672             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4673             expand_inc (quotient, const1_rtx);
4674             expand_dec (remainder, op1);
4675             emit_label (label);
4676           }
4677         else
4678           {
4679             rtx abs_rem, abs_op1, tem, mask;
4680             rtx label;
4681             label = gen_label_rtx ();
4682             quotient = gen_reg_rtx (compute_mode);
4683             remainder = gen_reg_rtx (compute_mode);
4684             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4685               {
4686                 rtx tem;
4687                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4688                                          quotient, 0, OPTAB_LIB_WIDEN);
4689                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4690                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4691                                           remainder, 0, OPTAB_LIB_WIDEN);
4692               }
4693             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4694             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4695             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4696                                 build_int_cst (NULL_TREE, 1),
4697                                 NULL_RTX, 1);
4698             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4699             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4700                                 NULL_RTX, 0, OPTAB_WIDEN);
4701             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4702                                  build_int_cst (NULL_TREE, size - 1),
4703                                  NULL_RTX, 0);
4704             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4705                                 NULL_RTX, 0, OPTAB_WIDEN);
4706             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4707                                 NULL_RTX, 0, OPTAB_WIDEN);
4708             expand_inc (quotient, tem);
4709             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4710                                 NULL_RTX, 0, OPTAB_WIDEN);
4711             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4712                                 NULL_RTX, 0, OPTAB_WIDEN);
4713             expand_dec (remainder, tem);
4714             emit_label (label);
4715           }
4716         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4717
4718       default:
4719         gcc_unreachable ();
4720       }
4721
4722   if (quotient == 0)
4723     {
4724       if (target && GET_MODE (target) != compute_mode)
4725         target = 0;
4726
4727       if (rem_flag)
4728         {
4729           /* Try to produce the remainder without producing the quotient.
4730              If we seem to have a divmod pattern that does not require widening,
4731              don't try widening here.  We should really have a WIDEN argument
4732              to expand_twoval_binop, since what we'd really like to do here is
4733              1) try a mod insn in compute_mode
4734              2) try a divmod insn in compute_mode
4735              3) try a div insn in compute_mode and multiply-subtract to get
4736                 remainder
4737              4) try the same things with widening allowed.  */
4738           remainder
4739             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4740                                  op0, op1, target,
4741                                  unsignedp,
4742                                  ((optab2->handlers[compute_mode].insn_code
4743                                    != CODE_FOR_nothing)
4744                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4745           if (remainder == 0)
4746             {
4747               /* No luck there.  Can we do remainder and divide at once
4748                  without a library call?  */
4749               remainder = gen_reg_rtx (compute_mode);
4750               if (! expand_twoval_binop ((unsignedp
4751                                           ? udivmod_optab
4752                                           : sdivmod_optab),
4753                                          op0, op1,
4754                                          NULL_RTX, remainder, unsignedp))
4755                 remainder = 0;
4756             }
4757
4758           if (remainder)
4759             return gen_lowpart (mode, remainder);
4760         }
4761
4762       /* Produce the quotient.  Try a quotient insn, but not a library call.
4763          If we have a divmod in this mode, use it in preference to widening
4764          the div (for this test we assume it will not fail). Note that optab2
4765          is set to the one of the two optabs that the call below will use.  */
4766       quotient
4767         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4768                              op0, op1, rem_flag ? NULL_RTX : target,
4769                              unsignedp,
4770                              ((optab2->handlers[compute_mode].insn_code
4771                                != CODE_FOR_nothing)
4772                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4773
4774       if (quotient == 0)
4775         {
4776           /* No luck there.  Try a quotient-and-remainder insn,
4777              keeping the quotient alone.  */
4778           quotient = gen_reg_rtx (compute_mode);
4779           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4780                                      op0, op1,
4781                                      quotient, NULL_RTX, unsignedp))
4782             {
4783               quotient = 0;
4784               if (! rem_flag)
4785                 /* Still no luck.  If we are not computing the remainder,
4786                    use a library call for the quotient.  */
4787                 quotient = sign_expand_binop (compute_mode,
4788                                               udiv_optab, sdiv_optab,
4789                                               op0, op1, target,
4790                                               unsignedp, OPTAB_LIB_WIDEN);
4791             }
4792         }
4793     }
4794
4795   if (rem_flag)
4796     {
4797       if (target && GET_MODE (target) != compute_mode)
4798         target = 0;
4799
4800       if (quotient == 0)
4801         {
4802           /* No divide instruction either.  Use library for remainder.  */
4803           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4804                                          op0, op1, target,
4805                                          unsignedp, OPTAB_LIB_WIDEN);
4806           /* No remainder function.  Try a quotient-and-remainder
4807              function, keeping the remainder.  */
4808           if (!remainder)
4809             {
4810               remainder = gen_reg_rtx (compute_mode);
4811               if (!expand_twoval_binop_libfunc
4812                   (unsignedp ? udivmod_optab : sdivmod_optab,
4813                    op0, op1,
4814                    NULL_RTX, remainder,
4815                    unsignedp ? UMOD : MOD))
4816                 remainder = NULL_RTX;
4817             }
4818         }
4819       else
4820         {
4821           /* We divided.  Now finish doing X - Y * (X / Y).  */
4822           remainder = expand_mult (compute_mode, quotient, op1,
4823                                    NULL_RTX, unsignedp);
4824           remainder = expand_binop (compute_mode, sub_optab, op0,
4825                                     remainder, target, unsignedp,
4826                                     OPTAB_LIB_WIDEN);
4827         }
4828     }
4829
4830   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4831 }
4832 \f
4833 /* Return a tree node with data type TYPE, describing the value of X.
4834    Usually this is an VAR_DECL, if there is no obvious better choice.
4835    X may be an expression, however we only support those expressions
4836    generated by loop.c.  */
4837
4838 tree
4839 make_tree (tree type, rtx x)
4840 {
4841   tree t;
4842
4843   switch (GET_CODE (x))
4844     {
4845     case CONST_INT:
4846       {
4847         HOST_WIDE_INT hi = 0;
4848
4849         if (INTVAL (x) < 0
4850             && !(TYPE_UNSIGNED (type)
4851                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4852                      < HOST_BITS_PER_WIDE_INT)))
4853           hi = -1;
4854
4855         t = build_int_cst_wide (type, INTVAL (x), hi);
4856
4857         return t;
4858       }
4859
4860     case CONST_DOUBLE:
4861       if (GET_MODE (x) == VOIDmode)
4862         t = build_int_cst_wide (type,
4863                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4864       else
4865         {
4866           REAL_VALUE_TYPE d;
4867
4868           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4869           t = build_real (type, d);
4870         }
4871
4872       return t;
4873
4874     case CONST_VECTOR:
4875       {
4876         int i, units;
4877         rtx elt;
4878         tree t = NULL_TREE;
4879
4880         units = CONST_VECTOR_NUNITS (x);
4881
4882         /* Build a tree with vector elements.  */
4883         for (i = units - 1; i >= 0; --i)
4884           {
4885             elt = CONST_VECTOR_ELT (x, i);
4886             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4887           }
4888
4889         return build_vector (type, t);
4890       }
4891
4892     case PLUS:
4893       return fold (build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4894                            make_tree (type, XEXP (x, 1))));
4895
4896     case MINUS:
4897       return fold (build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4898                            make_tree (type, XEXP (x, 1))));
4899
4900     case NEG:
4901       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4902
4903     case MULT:
4904       return fold (build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4905                            make_tree (type, XEXP (x, 1))));
4906
4907     case ASHIFT:
4908       return fold (build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4909                            make_tree (type, XEXP (x, 1))));
4910
4911     case LSHIFTRT:
4912       t = lang_hooks.types.unsigned_type (type);
4913       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4914                                          make_tree (t, XEXP (x, 0)),
4915                                          make_tree (type, XEXP (x, 1))));
4916
4917     case ASHIFTRT:
4918       t = lang_hooks.types.signed_type (type);
4919       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4920                                          make_tree (t, XEXP (x, 0)),
4921                                          make_tree (type, XEXP (x, 1))));
4922
4923     case DIV:
4924       if (TREE_CODE (type) != REAL_TYPE)
4925         t = lang_hooks.types.signed_type (type);
4926       else
4927         t = type;
4928
4929       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4930                                          make_tree (t, XEXP (x, 0)),
4931                                          make_tree (t, XEXP (x, 1))));
4932     case UDIV:
4933       t = lang_hooks.types.unsigned_type (type);
4934       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4935                                          make_tree (t, XEXP (x, 0)),
4936                                          make_tree (t, XEXP (x, 1))));
4937
4938     case SIGN_EXTEND:
4939     case ZERO_EXTEND:
4940       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4941                                           GET_CODE (x) == ZERO_EXTEND);
4942       return fold_convert (type, make_tree (t, XEXP (x, 0)));
4943
4944     default:
4945       t = build_decl (VAR_DECL, NULL_TREE, type);
4946
4947       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4948          ptr_mode.  So convert.  */
4949       if (POINTER_TYPE_P (type))
4950         x = convert_memory_address (TYPE_MODE (type), x);
4951
4952       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4953          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4954       t->decl.rtl = x;
4955
4956       return t;
4957     }
4958 }
4959
4960 /* Check whether the multiplication X * MULT + ADD overflows.
4961    X, MULT and ADD must be CONST_*.
4962    MODE is the machine mode for the computation.
4963    X and MULT must have mode MODE.  ADD may have a different mode.
4964    So can X (defaults to same as MODE).
4965    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4966
4967 bool
4968 const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
4969                            enum machine_mode mode, int unsignedp)
4970 {
4971   tree type, mult_type, add_type, result;
4972
4973   type = lang_hooks.types.type_for_mode (mode, unsignedp);
4974
4975   /* In order to get a proper overflow indication from an unsigned
4976      type, we have to pretend that it's a sizetype.  */
4977   mult_type = type;
4978   if (unsignedp)
4979     {
4980       /* FIXME:It would be nice if we could step directly from this
4981          type to its sizetype equivalent.  */
4982       mult_type = build_distinct_type_copy (type);
4983       TYPE_IS_SIZETYPE (mult_type) = 1;
4984     }
4985
4986   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4987               : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
4988
4989   result = fold (build2 (PLUS_EXPR, mult_type,
4990                          fold (build2 (MULT_EXPR, mult_type,
4991                                        make_tree (mult_type, x),
4992                                        make_tree (mult_type, mult))),
4993                          make_tree (add_type, add)));
4994
4995   return TREE_CONSTANT_OVERFLOW (result);
4996 }
4997
4998 /* Return an rtx representing the value of X * MULT + ADD.
4999    TARGET is a suggestion for where to store the result (an rtx).
5000    MODE is the machine mode for the computation.
5001    X and MULT must have mode MODE.  ADD may have a different mode.
5002    So can X (defaults to same as MODE).
5003    UNSIGNEDP is nonzero to do unsigned multiplication.
5004    This may emit insns.  */
5005
5006 rtx
5007 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
5008                  int unsignedp)
5009 {
5010   tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
5011   tree add_type = (GET_MODE (add) == VOIDmode
5012                    ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
5013                                                            unsignedp));
5014   tree result =  fold (build2 (PLUS_EXPR, type,
5015                                fold (build2 (MULT_EXPR, type,
5016                                              make_tree (type, x),
5017                                              make_tree (type, mult))),
5018                                make_tree (add_type, add)));
5019
5020   return expand_expr (result, target, VOIDmode, 0);
5021 }
5022 \f
5023 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5024    and returning TARGET.
5025
5026    If TARGET is 0, a pseudo-register or constant is returned.  */
5027
5028 rtx
5029 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5030 {
5031   rtx tem = 0;
5032
5033   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5034     tem = simplify_binary_operation (AND, mode, op0, op1);
5035   if (tem == 0)
5036     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5037
5038   if (target == 0)
5039     target = tem;
5040   else if (tem != target)
5041     emit_move_insn (target, tem);
5042   return target;
5043 }
5044 \f
5045 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5046    and storing in TARGET.  Normally return TARGET.
5047    Return 0 if that cannot be done.
5048
5049    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5050    it is VOIDmode, they cannot both be CONST_INT.
5051
5052    UNSIGNEDP is for the case where we have to widen the operands
5053    to perform the operation.  It says to use zero-extension.
5054
5055    NORMALIZEP is 1 if we should convert the result to be either zero
5056    or one.  Normalize is -1 if we should convert the result to be
5057    either zero or -1.  If NORMALIZEP is zero, the result will be left
5058    "raw" out of the scc insn.  */
5059
5060 rtx
5061 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5062                  enum machine_mode mode, int unsignedp, int normalizep)
5063 {
5064   rtx subtarget;
5065   enum insn_code icode;
5066   enum machine_mode compare_mode;
5067   enum machine_mode target_mode = GET_MODE (target);
5068   rtx tem;
5069   rtx last = get_last_insn ();
5070   rtx pattern, comparison;
5071
5072   if (unsignedp)
5073     code = unsigned_condition (code);
5074
5075   /* If one operand is constant, make it the second one.  Only do this
5076      if the other operand is not constant as well.  */
5077
5078   if (swap_commutative_operands_p (op0, op1))
5079     {
5080       tem = op0;
5081       op0 = op1;
5082       op1 = tem;
5083       code = swap_condition (code);
5084     }
5085
5086   if (mode == VOIDmode)
5087     mode = GET_MODE (op0);
5088
5089   /* For some comparisons with 1 and -1, we can convert this to
5090      comparisons with zero.  This will often produce more opportunities for
5091      store-flag insns.  */
5092
5093   switch (code)
5094     {
5095     case LT:
5096       if (op1 == const1_rtx)
5097         op1 = const0_rtx, code = LE;
5098       break;
5099     case LE:
5100       if (op1 == constm1_rtx)
5101         op1 = const0_rtx, code = LT;
5102       break;
5103     case GE:
5104       if (op1 == const1_rtx)
5105         op1 = const0_rtx, code = GT;
5106       break;
5107     case GT:
5108       if (op1 == constm1_rtx)
5109         op1 = const0_rtx, code = GE;
5110       break;
5111     case GEU:
5112       if (op1 == const1_rtx)
5113         op1 = const0_rtx, code = NE;
5114       break;
5115     case LTU:
5116       if (op1 == const1_rtx)
5117         op1 = const0_rtx, code = EQ;
5118       break;
5119     default:
5120       break;
5121     }
5122
5123   /* If we are comparing a double-word integer with zero or -1, we can
5124      convert the comparison into one involving a single word.  */
5125   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5126       && GET_MODE_CLASS (mode) == MODE_INT
5127       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5128     {
5129       if ((code == EQ || code == NE)
5130           && (op1 == const0_rtx || op1 == constm1_rtx))
5131         {
5132           rtx op00, op01, op0both;
5133
5134           /* Do a logical OR or AND of the two words and compare the result.  */
5135           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5136           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5137           op0both = expand_binop (word_mode,
5138                                   op1 == const0_rtx ? ior_optab : and_optab,
5139                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5140
5141           if (op0both != 0)
5142             return emit_store_flag (target, code, op0both, op1, word_mode,
5143                                     unsignedp, normalizep);
5144         }
5145       else if ((code == LT || code == GE) && op1 == const0_rtx)
5146         {
5147           rtx op0h;
5148
5149           /* If testing the sign bit, can just test on high word.  */
5150           op0h = simplify_gen_subreg (word_mode, op0, mode,
5151                                       subreg_highpart_offset (word_mode, mode));
5152           return emit_store_flag (target, code, op0h, op1, word_mode,
5153                                   unsignedp, normalizep);
5154         }
5155     }
5156
5157   /* From now on, we won't change CODE, so set ICODE now.  */
5158   icode = setcc_gen_code[(int) code];
5159
5160   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5161      complement of A (for GE) and shifting the sign bit to the low bit.  */
5162   if (op1 == const0_rtx && (code == LT || code == GE)
5163       && GET_MODE_CLASS (mode) == MODE_INT
5164       && (normalizep || STORE_FLAG_VALUE == 1
5165           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5166               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5167                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5168     {
5169       subtarget = target;
5170
5171       /* If the result is to be wider than OP0, it is best to convert it
5172          first.  If it is to be narrower, it is *incorrect* to convert it
5173          first.  */
5174       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5175         {
5176           op0 = convert_modes (target_mode, mode, op0, 0);
5177           mode = target_mode;
5178         }
5179
5180       if (target_mode != mode)
5181         subtarget = 0;
5182
5183       if (code == GE)
5184         op0 = expand_unop (mode, one_cmpl_optab, op0,
5185                            ((STORE_FLAG_VALUE == 1 || normalizep)
5186                             ? 0 : subtarget), 0);
5187
5188       if (STORE_FLAG_VALUE == 1 || normalizep)
5189         /* If we are supposed to produce a 0/1 value, we want to do
5190            a logical shift from the sign bit to the low-order bit; for
5191            a -1/0 value, we do an arithmetic shift.  */
5192         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5193                             size_int (GET_MODE_BITSIZE (mode) - 1),
5194                             subtarget, normalizep != -1);
5195
5196       if (mode != target_mode)
5197         op0 = convert_modes (target_mode, mode, op0, 0);
5198
5199       return op0;
5200     }
5201
5202   if (icode != CODE_FOR_nothing)
5203     {
5204       insn_operand_predicate_fn pred;
5205
5206       /* We think we may be able to do this with a scc insn.  Emit the
5207          comparison and then the scc insn.  */
5208
5209       do_pending_stack_adjust ();
5210       last = get_last_insn ();
5211
5212       comparison
5213         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5214       if (CONSTANT_P (comparison))
5215         {
5216           switch (GET_CODE (comparison))
5217             {
5218             case CONST_INT:
5219               if (comparison == const0_rtx)
5220                 return const0_rtx;
5221               break;
5222
5223 #ifdef FLOAT_STORE_FLAG_VALUE
5224             case CONST_DOUBLE:
5225               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5226                 return const0_rtx;
5227               break;
5228 #endif
5229             default:
5230               gcc_unreachable ();
5231             }
5232
5233           if (normalizep == 1)
5234             return const1_rtx;
5235           if (normalizep == -1)
5236             return constm1_rtx;
5237           return const_true_rtx;
5238         }
5239
5240       /* The code of COMPARISON may not match CODE if compare_from_rtx
5241          decided to swap its operands and reverse the original code.
5242
5243          We know that compare_from_rtx returns either a CONST_INT or
5244          a new comparison code, so it is safe to just extract the
5245          code from COMPARISON.  */
5246       code = GET_CODE (comparison);
5247
5248       /* Get a reference to the target in the proper mode for this insn.  */
5249       compare_mode = insn_data[(int) icode].operand[0].mode;
5250       subtarget = target;
5251       pred = insn_data[(int) icode].operand[0].predicate;
5252       if (optimize || ! (*pred) (subtarget, compare_mode))
5253         subtarget = gen_reg_rtx (compare_mode);
5254
5255       pattern = GEN_FCN (icode) (subtarget);
5256       if (pattern)
5257         {
5258           emit_insn (pattern);
5259
5260           /* If we are converting to a wider mode, first convert to
5261              TARGET_MODE, then normalize.  This produces better combining
5262              opportunities on machines that have a SIGN_EXTRACT when we are
5263              testing a single bit.  This mostly benefits the 68k.
5264
5265              If STORE_FLAG_VALUE does not have the sign bit set when
5266              interpreted in COMPARE_MODE, we can do this conversion as
5267              unsigned, which is usually more efficient.  */
5268           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5269             {
5270               convert_move (target, subtarget,
5271                             (GET_MODE_BITSIZE (compare_mode)
5272                              <= HOST_BITS_PER_WIDE_INT)
5273                             && 0 == (STORE_FLAG_VALUE
5274                                      & ((HOST_WIDE_INT) 1
5275                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
5276               op0 = target;
5277               compare_mode = target_mode;
5278             }
5279           else
5280             op0 = subtarget;
5281
5282           /* If we want to keep subexpressions around, don't reuse our
5283              last target.  */
5284
5285           if (optimize)
5286             subtarget = 0;
5287
5288           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
5289              we don't have to do anything.  */
5290           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5291             ;
5292           /* STORE_FLAG_VALUE might be the most negative number, so write
5293              the comparison this way to avoid a compiler-time warning.  */
5294           else if (- normalizep == STORE_FLAG_VALUE)
5295             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5296
5297           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5298              makes it hard to use a value of just the sign bit due to
5299              ANSI integer constant typing rules.  */
5300           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5301                    && (STORE_FLAG_VALUE
5302                        & ((HOST_WIDE_INT) 1
5303                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
5304             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5305                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5306                                 subtarget, normalizep == 1);
5307           else
5308             {
5309               gcc_assert (STORE_FLAG_VALUE & 1);
5310
5311               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5312               if (normalizep == -1)
5313                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5314             }
5315
5316           /* If we were converting to a smaller mode, do the
5317              conversion now.  */
5318           if (target_mode != compare_mode)
5319             {
5320               convert_move (target, op0, 0);
5321               return target;
5322             }
5323           else
5324             return op0;
5325         }
5326     }
5327
5328   delete_insns_since (last);
5329
5330   /* If optimizing, use different pseudo registers for each insn, instead
5331      of reusing the same pseudo.  This leads to better CSE, but slows
5332      down the compiler, since there are more pseudos */
5333   subtarget = (!optimize
5334                && (target_mode == mode)) ? target : NULL_RTX;
5335
5336   /* If we reached here, we can't do this with a scc insn.  However, there
5337      are some comparisons that can be done directly.  For example, if
5338      this is an equality comparison of integers, we can try to exclusive-or
5339      (or subtract) the two operands and use a recursive call to try the
5340      comparison with zero.  Don't do any of these cases if branches are
5341      very cheap.  */
5342
5343   if (BRANCH_COST > 0
5344       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5345       && op1 != const0_rtx)
5346     {
5347       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5348                           OPTAB_WIDEN);
5349
5350       if (tem == 0)
5351         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5352                             OPTAB_WIDEN);
5353       if (tem != 0)
5354         tem = emit_store_flag (target, code, tem, const0_rtx,
5355                                mode, unsignedp, normalizep);
5356       if (tem == 0)
5357         delete_insns_since (last);
5358       return tem;
5359     }
5360
5361   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5362      the constant zero.  Reject all other comparisons at this point.  Only
5363      do LE and GT if branches are expensive since they are expensive on
5364      2-operand machines.  */
5365
5366   if (BRANCH_COST == 0
5367       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5368       || (code != EQ && code != NE
5369           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5370     return 0;
5371
5372   /* See what we need to return.  We can only return a 1, -1, or the
5373      sign bit.  */
5374
5375   if (normalizep == 0)
5376     {
5377       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5378         normalizep = STORE_FLAG_VALUE;
5379
5380       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5381                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5382                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5383         ;
5384       else
5385         return 0;
5386     }
5387
5388   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5389      do the necessary operation below.  */
5390
5391   tem = 0;
5392
5393   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5394      the sign bit set.  */
5395
5396   if (code == LE)
5397     {
5398       /* This is destructive, so SUBTARGET can't be OP0.  */
5399       if (rtx_equal_p (subtarget, op0))
5400         subtarget = 0;
5401
5402       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5403                           OPTAB_WIDEN);
5404       if (tem)
5405         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5406                             OPTAB_WIDEN);
5407     }
5408
5409   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5410      number of bits in the mode of OP0, minus one.  */
5411
5412   if (code == GT)
5413     {
5414       if (rtx_equal_p (subtarget, op0))
5415         subtarget = 0;
5416
5417       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5418                           size_int (GET_MODE_BITSIZE (mode) - 1),
5419                           subtarget, 0);
5420       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5421                           OPTAB_WIDEN);
5422     }
5423
5424   if (code == EQ || code == NE)
5425     {
5426       /* For EQ or NE, one way to do the comparison is to apply an operation
5427          that converts the operand into a positive number if it is nonzero
5428          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5429          for NE we negate.  This puts the result in the sign bit.  Then we
5430          normalize with a shift, if needed.
5431
5432          Two operations that can do the above actions are ABS and FFS, so try
5433          them.  If that doesn't work, and MODE is smaller than a full word,
5434          we can use zero-extension to the wider mode (an unsigned conversion)
5435          as the operation.  */
5436
5437       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5438          that is compensated by the subsequent overflow when subtracting
5439          one / negating.  */
5440
5441       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5442         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5443       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5444         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5445       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5446         {
5447           tem = convert_modes (word_mode, mode, op0, 1);
5448           mode = word_mode;
5449         }
5450
5451       if (tem != 0)
5452         {
5453           if (code == EQ)
5454             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5455                                 0, OPTAB_WIDEN);
5456           else
5457             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5458         }
5459
5460       /* If we couldn't do it that way, for NE we can "or" the two's complement
5461          of the value with itself.  For EQ, we take the one's complement of
5462          that "or", which is an extra insn, so we only handle EQ if branches
5463          are expensive.  */
5464
5465       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5466         {
5467           if (rtx_equal_p (subtarget, op0))
5468             subtarget = 0;
5469
5470           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5471           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5472                               OPTAB_WIDEN);
5473
5474           if (tem && code == EQ)
5475             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5476         }
5477     }
5478
5479   if (tem && normalizep)
5480     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5481                         size_int (GET_MODE_BITSIZE (mode) - 1),
5482                         subtarget, normalizep == 1);
5483
5484   if (tem)
5485     {
5486       if (GET_MODE (tem) != target_mode)
5487         {
5488           convert_move (target, tem, 0);
5489           tem = target;
5490         }
5491       else if (!subtarget)
5492         {
5493           emit_move_insn (target, tem);
5494           tem = target;
5495         }
5496     }
5497   else
5498     delete_insns_since (last);
5499
5500   return tem;
5501 }
5502
5503 /* Like emit_store_flag, but always succeeds.  */
5504
5505 rtx
5506 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5507                        enum machine_mode mode, int unsignedp, int normalizep)
5508 {
5509   rtx tem, label;
5510
5511   /* First see if emit_store_flag can do the job.  */
5512   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5513   if (tem != 0)
5514     return tem;
5515
5516   if (normalizep == 0)
5517     normalizep = 1;
5518
5519   /* If this failed, we have to do this with set/compare/jump/set code.  */
5520
5521   if (!REG_P (target)
5522       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5523     target = gen_reg_rtx (GET_MODE (target));
5524
5525   emit_move_insn (target, const1_rtx);
5526   label = gen_label_rtx ();
5527   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5528                            NULL_RTX, label);
5529
5530   emit_move_insn (target, const0_rtx);
5531   emit_label (label);
5532
5533   return target;
5534 }
5535 \f
5536 /* Perform possibly multi-word comparison and conditional jump to LABEL
5537    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5538
5539    The algorithm is based on the code in expr.c:do_jump.
5540
5541    Note that this does not perform a general comparison.  Only variants
5542    generated within expmed.c are correctly handled, others abort (but could
5543    be handled if needed).  */
5544
5545 static void
5546 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5547                  rtx label)
5548 {
5549   /* If this mode is an integer too wide to compare properly,
5550      compare word by word.  Rely on cse to optimize constant cases.  */
5551
5552   if (GET_MODE_CLASS (mode) == MODE_INT
5553       && ! can_compare_p (op, mode, ccp_jump))
5554     {
5555       rtx label2 = gen_label_rtx ();
5556
5557       switch (op)
5558         {
5559         case LTU:
5560           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5561           break;
5562
5563         case LEU:
5564           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5565           break;
5566
5567         case LT:
5568           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5569           break;
5570
5571         case GT:
5572           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5573           break;
5574
5575         case GE:
5576           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5577           break;
5578
5579           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5580              that's the only equality operations we do */
5581         case EQ:
5582           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5583           do_jump_by_parts_equality_rtx (arg1, label2, label);
5584           break;
5585
5586         case NE:
5587           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5588           do_jump_by_parts_equality_rtx (arg1, label, label2);
5589           break;
5590
5591         default:
5592           gcc_unreachable ();
5593         }
5594
5595       emit_label (label2);
5596     }
5597   else
5598     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5599 }