gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
   5    Free Software Foundation, Inc.
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 2, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING.  If not, write to the Free
  21 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  22 02110-1301, USA.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "toplev.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "real.h"
  38 #include "recog.h"
  39 #include "langhooks.h"
  40
  41 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT,
  43                                    unsigned HOST_WIDE_INT, rtx);
  44 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  45                                    unsigned HOST_WIDE_INT, rtx);
  46 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT,
  49                                     unsigned HOST_WIDE_INT, rtx, int);
  50 static rtx mask_rtx (enum machine_mode, int, int, int);
  51 static rtx lshift_value (enum machine_mode, rtx, int, int);
  52 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  53                                     unsigned HOST_WIDE_INT, int);
  54 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  55 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  57
  58 /* Test whether a value is zero of a power of two.  */
  59 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  60
  61 /* Nonzero means divides or modulus operations are relatively cheap for
  62    powers of two, so don't use branches; emit the operation instead.
  63    Usually, this will mean that the MD file will emit non-branch
  64    sequences.  */
  65
  66 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  67 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  68
  69 #ifndef SLOW_UNALIGNED_ACCESS
  70 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  71 #endif
  72
  73 /* For compilers that support multiple targets with different word sizes,
  74    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  75    is the H8/300(H) compiler.  */
  76
  77 #ifndef MAX_BITS_PER_WORD
  78 #define MAX_BITS_PER_WORD BITS_PER_WORD
  79 #endif
  80
  81 /* Reduce conditional compilation elsewhere.  */
  82 #ifndef HAVE_insv
  83 #define HAVE_insv       0
  84 #define CODE_FOR_insv   CODE_FOR_nothing
  85 #define gen_insv(a,b,c,d) NULL_RTX
  86 #endif
  87 #ifndef HAVE_extv
  88 #define HAVE_extv       0
  89 #define CODE_FOR_extv   CODE_FOR_nothing
  90 #define gen_extv(a,b,c,d) NULL_RTX
  91 #endif
  92 #ifndef HAVE_extzv
  93 #define HAVE_extzv      0
  94 #define CODE_FOR_extzv  CODE_FOR_nothing
  95 #define gen_extzv(a,b,c,d) NULL_RTX
  96 #endif
  97
  98 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  99    shift count and some by mode.  */
 100 static int zero_cost;
 101 static int add_cost[NUM_MACHINE_MODES];
 102 static int neg_cost[NUM_MACHINE_MODES];
 103 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 104 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 106 static int mul_cost[NUM_MACHINE_MODES];
 107 static int sdiv_cost[NUM_MACHINE_MODES];
 108 static int udiv_cost[NUM_MACHINE_MODES];
 109 static int mul_widen_cost[NUM_MACHINE_MODES];
 110 static int mul_highpart_cost[NUM_MACHINE_MODES];
 111
 112 void
 113 init_expmed (void)
 114 {
 115   struct
 116   {
 117     struct rtx_def reg;         rtunion reg_fld[2];
 118     struct rtx_def plus;        rtunion plus_fld1;
 119     struct rtx_def neg;
 120     struct rtx_def mult;        rtunion mult_fld1;
 121     struct rtx_def sdiv;        rtunion sdiv_fld1;
 122     struct rtx_def udiv;        rtunion udiv_fld1;
 123     struct rtx_def zext;
 124     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 125     struct rtx_def smod_32;     rtunion smod_32_fld1;
 126     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 127     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 128     struct rtx_def wide_trunc;
 129     struct rtx_def shift;       rtunion shift_fld1;
 130     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 131     struct rtx_def shift_add;   rtunion shift_add_fld1;
 132     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 133   } all;
 134
 135   rtx pow2[MAX_BITS_PER_WORD];
 136   rtx cint[MAX_BITS_PER_WORD];
 137   int m, n;
 138   enum machine_mode mode, wider_mode;
 139
 140   zero_cost = rtx_cost (const0_rtx, 0);
 141
 142   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 143     {
 144       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 145       cint[m] = GEN_INT (m);
 146     }
 147
 148   memset (&all, 0, sizeof all);
 149
 150   PUT_CODE (&all.reg, REG);
 151   /* Avoid using hard regs in ways which may be unsupported.  */
 152   REGNO (&all.reg) = LAST_VIRTUAL_REGISTER + 1;
 153
 154   PUT_CODE (&all.plus, PLUS);
 155   XEXP (&all.plus, 0) = &all.reg;
 156   XEXP (&all.plus, 1) = &all.reg;
 157
 158   PUT_CODE (&all.neg, NEG);
 159   XEXP (&all.neg, 0) = &all.reg;
 160
 161   PUT_CODE (&all.mult, MULT);
 162   XEXP (&all.mult, 0) = &all.reg;
 163   XEXP (&all.mult, 1) = &all.reg;
 164
 165   PUT_CODE (&all.sdiv, DIV);
 166   XEXP (&all.sdiv, 0) = &all.reg;
 167   XEXP (&all.sdiv, 1) = &all.reg;
 168
 169   PUT_CODE (&all.udiv, UDIV);
 170   XEXP (&all.udiv, 0) = &all.reg;
 171   XEXP (&all.udiv, 1) = &all.reg;
 172
 173   PUT_CODE (&all.sdiv_32, DIV);
 174   XEXP (&all.sdiv_32, 0) = &all.reg;
 175   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 176
 177   PUT_CODE (&all.smod_32, MOD);
 178   XEXP (&all.smod_32, 0) = &all.reg;
 179   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 180
 181   PUT_CODE (&all.zext, ZERO_EXTEND);
 182   XEXP (&all.zext, 0) = &all.reg;
 183
 184   PUT_CODE (&all.wide_mult, MULT);
 185   XEXP (&all.wide_mult, 0) = &all.zext;
 186   XEXP (&all.wide_mult, 1) = &all.zext;
 187
 188   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 189   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 190
 191   PUT_CODE (&all.wide_trunc, TRUNCATE);
 192   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 193
 194   PUT_CODE (&all.shift, ASHIFT);
 195   XEXP (&all.shift, 0) = &all.reg;
 196
 197   PUT_CODE (&all.shift_mult, MULT);
 198   XEXP (&all.shift_mult, 0) = &all.reg;
 199
 200   PUT_CODE (&all.shift_add, PLUS);
 201   XEXP (&all.shift_add, 0) = &all.shift_mult;
 202   XEXP (&all.shift_add, 1) = &all.reg;
 203
 204   PUT_CODE (&all.shift_sub, MINUS);
 205   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 206   XEXP (&all.shift_sub, 1) = &all.reg;
 207
 208   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 209        mode != VOIDmode;
 210        mode = GET_MODE_WIDER_MODE (mode))
 211     {
 212       PUT_MODE (&all.reg, mode);
 213       PUT_MODE (&all.plus, mode);
 214       PUT_MODE (&all.neg, mode);
 215       PUT_MODE (&all.mult, mode);
 216       PUT_MODE (&all.sdiv, mode);
 217       PUT_MODE (&all.udiv, mode);
 218       PUT_MODE (&all.sdiv_32, mode);
 219       PUT_MODE (&all.smod_32, mode);
 220       PUT_MODE (&all.wide_trunc, mode);
 221       PUT_MODE (&all.shift, mode);
 222       PUT_MODE (&all.shift_mult, mode);
 223       PUT_MODE (&all.shift_add, mode);
 224       PUT_MODE (&all.shift_sub, mode);
 225
 226       add_cost[mode] = rtx_cost (&all.plus, SET);
 227       neg_cost[mode] = rtx_cost (&all.neg, SET);
 228       mul_cost[mode] = rtx_cost (&all.mult, SET);
 229       sdiv_cost[mode] = rtx_cost (&all.sdiv, SET);
 230       udiv_cost[mode] = rtx_cost (&all.udiv, SET);
 231
 232       sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET)
 233                                <= 2 * add_cost[mode]);
 234       smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET)
 235                                <= 4 * add_cost[mode]);
 236
 237       wider_mode = GET_MODE_WIDER_MODE (mode);
 238       if (wider_mode != VOIDmode)
 239         {
 240           PUT_MODE (&all.zext, wider_mode);
 241           PUT_MODE (&all.wide_mult, wider_mode);
 242           PUT_MODE (&all.wide_lshr, wider_mode);
 243           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 244
 245           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 246           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 247         }
 248
 249       shift_cost[mode][0] = 0;
 250       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 251
 252       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 253       for (m = 1; m < n; m++)
 254         {
 255           XEXP (&all.shift, 1) = cint[m];
 256           XEXP (&all.shift_mult, 1) = pow2[m];
 257
 258           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 259           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 260           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 261         }
 262     }
 263 }
 264
 265 /* Return an rtx representing minus the value of X.
 266    MODE is the intended mode of the result,
 267    useful if X is a CONST_INT.  */
 268
 269 rtx
 270 negate_rtx (enum machine_mode mode, rtx x)
 271 {
 272   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 273
 274   if (result == 0)
 275     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 276
 277   return result;
 278 }
 279
 280 /* Report on the availability of insv/extv/extzv and the desired mode
 281    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 282    is false; else the mode of the specified operand.  If OPNO is -1,
 283    all the caller cares about is whether the insn is available.  */
 284 enum machine_mode
 285 mode_for_extraction (enum extraction_pattern pattern, int opno)
 286 {
 287   const struct insn_data *data;
 288
 289   switch (pattern)
 290     {
 291     case EP_insv:
 292       if (HAVE_insv)
 293         {
 294           data = &insn_data[CODE_FOR_insv];
 295           break;
 296         }
 297       return MAX_MACHINE_MODE;
 298
 299     case EP_extv:
 300       if (HAVE_extv)
 301         {
 302           data = &insn_data[CODE_FOR_extv];
 303           break;
 304         }
 305       return MAX_MACHINE_MODE;
 306
 307     case EP_extzv:
 308       if (HAVE_extzv)
 309         {
 310           data = &insn_data[CODE_FOR_extzv];
 311           break;
 312         }
 313       return MAX_MACHINE_MODE;
 314
 315     default:
 316       gcc_unreachable ();
 317     }
 318
 319   if (opno == -1)
 320     return VOIDmode;
 321
 322   /* Everyone who uses this function used to follow it with
 323      if (result == VOIDmode) result = word_mode; */
 324   if (data->operand[opno].mode == VOIDmode)
 325     return word_mode;
 326   return data->operand[opno].mode;
 327 }
 328
 329 \f
 330 /* Generate code to store value from rtx VALUE
 331    into a bit-field within structure STR_RTX
 332    containing BITSIZE bits starting at bit BITNUM.
 333    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 334    ALIGN is the alignment that STR_RTX is known to have.
 335    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 336
 337 /* ??? Note that there are two different ideas here for how
 338    to determine the size to count bits within, for a register.
 339    One is BITS_PER_WORD, and the other is the size of operand 3
 340    of the insv pattern.
 341
 342    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 343    else, we use the mode of operand 3.  */
 344
 345 rtx
 346 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 347                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 348                  rtx value)
 349 {
 350   unsigned int unit
 351     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 352   unsigned HOST_WIDE_INT offset, bitpos;
 353   rtx op0 = str_rtx;
 354   int byte_offset;
 355   rtx orig_value;
 356
 357   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 358
 359   while (GET_CODE (op0) == SUBREG)
 360     {
 361       /* The following line once was done only if WORDS_BIG_ENDIAN,
 362          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 363          meaningful at a much higher level; when structures are copied
 364          between memory and regs, the higher-numbered regs
 365          always get higher addresses.  */
 366       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 367       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 368
 369       byte_offset = 0;
 370
 371       /* Paradoxical subregs need special handling on big endian machines.  */
 372       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 373         {
 374           int difference = inner_mode_size - outer_mode_size;
 375
 376           if (WORDS_BIG_ENDIAN)
 377             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 378           if (BYTES_BIG_ENDIAN)
 379             byte_offset += difference % UNITS_PER_WORD;
 380         }
 381       else
 382         byte_offset = SUBREG_BYTE (op0);
 383
 384       bitnum += byte_offset * BITS_PER_UNIT;
 385       op0 = SUBREG_REG (op0);
 386     }
 387
 388   /* No action is needed if the target is a register and if the field
 389      lies completely outside that register.  This can occur if the source
 390      code contains an out-of-bounds access to a small array.  */
 391   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 392     return value;
 393
 394   /* Use vec_set patterns for inserting parts of vectors whenever
 395      available.  */
 396   if (VECTOR_MODE_P (GET_MODE (op0))
 397       && !MEM_P (op0)
 398       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 399           != CODE_FOR_nothing)
 400       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 401       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 402       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 403     {
 404       enum machine_mode outermode = GET_MODE (op0);
 405       enum machine_mode innermode = GET_MODE_INNER (outermode);
 406       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 407       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 408       rtx rtxpos = GEN_INT (pos);
 409       rtx src = value;
 410       rtx dest = op0;
 411       rtx pat, seq;
 412       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 413       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 414       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 415
 416       start_sequence ();
 417
 418       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 419         src = copy_to_mode_reg (mode1, src);
 420
 421       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 422         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 423
 424       /* We could handle this, but we should always be called with a pseudo
 425          for our targets and all insns should take them as outputs.  */
 426       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 427                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 428                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 429       pat = GEN_FCN (icode) (dest, src, rtxpos);
 430       seq = get_insns ();
 431       end_sequence ();
 432       if (pat)
 433         {
 434           emit_insn (seq);
 435           emit_insn (pat);
 436           return dest;
 437         }
 438     }
 439
 440   /* If the target is a register, overwriting the entire object, or storing
 441      a full-word or multi-word field can be done with just a SUBREG.
 442
 443      If the target is memory, storing any naturally aligned field can be
 444      done with a simple store.  For targets that support fast unaligned
 445      memory, any naturally sized, unit aligned field can be done directly.  */
 446
 447   offset = bitnum / unit;
 448   bitpos = bitnum % unit;
 449   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 450                 + (offset * UNITS_PER_WORD);
 451
 452   if (bitpos == 0
 453       && bitsize == GET_MODE_BITSIZE (fieldmode)
 454       && (!MEM_P (op0)
 455           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 456              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 457              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 458           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 459              || (offset * BITS_PER_UNIT % bitsize == 0
 460                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 461     {
 462       if (MEM_P (op0))
 463         op0 = adjust_address (op0, fieldmode, offset);
 464       else if (GET_MODE (op0) != fieldmode)
 465         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 466                                    byte_offset);
 467       emit_move_insn (op0, value);
 468       return value;
 469     }
 470
 471   /* Make sure we are playing with integral modes.  Pun with subregs
 472      if we aren't.  This must come after the entire register case above,
 473      since that case is valid for any mode.  The following cases are only
 474      valid for integral modes.  */
 475   {
 476     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 477     if (imode != GET_MODE (op0))
 478       {
 479         if (MEM_P (op0))
 480           op0 = adjust_address (op0, imode, 0);
 481         else
 482           {
 483             gcc_assert (imode != BLKmode);
 484             op0 = gen_lowpart (imode, op0);
 485           }
 486       }
 487   }
 488
 489   /* We may be accessing data outside the field, which means
 490      we can alias adjacent data.  */
 491   if (MEM_P (op0))
 492     {
 493       op0 = shallow_copy_rtx (op0);
 494       set_mem_alias_set (op0, 0);
 495       set_mem_expr (op0, 0);
 496     }
 497
 498   /* If OP0 is a register, BITPOS must count within a word.
 499      But as we have it, it counts within whatever size OP0 now has.
 500      On a bigendian machine, these are not the same, so convert.  */
 501   if (BYTES_BIG_ENDIAN
 502       && !MEM_P (op0)
 503       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 504     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 505
 506   /* Storing an lsb-aligned field in a register
 507      can be done with a movestrict instruction.  */
 508
 509   if (!MEM_P (op0)
 510       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 511       && bitsize == GET_MODE_BITSIZE (fieldmode)
 512       && (movstrict_optab->handlers[fieldmode].insn_code
 513           != CODE_FOR_nothing))
 514     {
 515       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 516
 517       /* Get appropriate low part of the value being stored.  */
 518       if (GET_CODE (value) == CONST_INT || REG_P (value))
 519         value = gen_lowpart (fieldmode, value);
 520       else if (!(GET_CODE (value) == SYMBOL_REF
 521                  || GET_CODE (value) == LABEL_REF
 522                  || GET_CODE (value) == CONST))
 523         value = convert_to_mode (fieldmode, value, 0);
 524
 525       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 526         value = copy_to_mode_reg (fieldmode, value);
 527
 528       if (GET_CODE (op0) == SUBREG)
 529         {
 530           /* Else we've got some float mode source being extracted into
 531              a different float mode destination -- this combination of
 532              subregs results in Severe Tire Damage.  */
 533           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 534                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 535                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 536           op0 = SUBREG_REG (op0);
 537         }
 538
 539       emit_insn (GEN_FCN (icode)
 540                  (gen_rtx_SUBREG (fieldmode, op0,
 541                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 542                                   + (offset * UNITS_PER_WORD)),
 543                                   value));
 544
 545       return value;
 546     }
 547
 548   /* Handle fields bigger than a word.  */
 549
 550   if (bitsize > BITS_PER_WORD)
 551     {
 552       /* Here we transfer the words of the field
 553          in the order least significant first.
 554          This is because the most significant word is the one which may
 555          be less than full.
 556          However, only do that if the value is not BLKmode.  */
 557
 558       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 559       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 560       unsigned int i;
 561
 562       /* This is the mode we must force value to, so that there will be enough
 563          subwords to extract.  Note that fieldmode will often (always?) be
 564          VOIDmode, because that is what store_field uses to indicate that this
 565          is a bit field, but passing VOIDmode to operand_subword_force
 566          is not allowed.  */
 567       fieldmode = GET_MODE (value);
 568       if (fieldmode == VOIDmode)
 569         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 570
 571       for (i = 0; i < nwords; i++)
 572         {
 573           /* If I is 0, use the low-order word in both field and target;
 574              if I is 1, use the next to lowest word; and so on.  */
 575           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 576           unsigned int bit_offset = (backwards
 577                                      ? MAX ((int) bitsize - ((int) i + 1)
 578                                             * BITS_PER_WORD,
 579                                             0)
 580                                      : (int) i * BITS_PER_WORD);
 581
 582           store_bit_field (op0, MIN (BITS_PER_WORD,
 583                                      bitsize - i * BITS_PER_WORD),
 584                            bitnum + bit_offset, word_mode,
 585                            operand_subword_force (value, wordnum, fieldmode));
 586         }
 587       return value;
 588     }
 589
 590   /* From here on we can assume that the field to be stored in is
 591      a full-word (whatever type that is), since it is shorter than a word.  */
 592
 593   /* OFFSET is the number of words or bytes (UNIT says which)
 594      from STR_RTX to the first word or byte containing part of the field.  */
 595
 596   if (!MEM_P (op0))
 597     {
 598       if (offset != 0
 599           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 600         {
 601           if (!REG_P (op0))
 602             {
 603               /* Since this is a destination (lvalue), we can't copy
 604                  it to a pseudo.  We can remove a SUBREG that does not
 605                  change the size of the operand.  Such a SUBREG may
 606                  have been added above.  */
 607               gcc_assert (GET_CODE (op0) == SUBREG
 608                           && (GET_MODE_SIZE (GET_MODE (op0))
 609                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 610               op0 = SUBREG_REG (op0);
 611             }
 612           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 613                                 op0, (offset * UNITS_PER_WORD));
 614         }
 615       offset = 0;
 616     }
 617
 618   /* If VALUE has a floating-point or complex mode, access it as an
 619      integer of the corresponding size.  This can occur on a machine
 620      with 64 bit registers that uses SFmode for float.  It can also
 621      occur for unaligned float or complex fields.  */
 622   orig_value = value;
 623   if (GET_MODE (value) != VOIDmode
 624       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 625       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 626     {
 627       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 628       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 629     }
 630
 631   /* Now OFFSET is nonzero only if OP0 is memory
 632      and is therefore always measured in bytes.  */
 633
 634   if (HAVE_insv
 635       && GET_MODE (value) != BLKmode
 636       && bitsize > 0
 637       && GET_MODE_BITSIZE (op_mode) >= bitsize
 638       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 639             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 640     {
 641       int xbitpos = bitpos;
 642       rtx value1;
 643       rtx xop0 = op0;
 644       rtx last = get_last_insn ();
 645       rtx pat;
 646       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 647       int save_volatile_ok = volatile_ok;
 648
 649       volatile_ok = 1;
 650
 651       /* If this machine's insv can only insert into a register, copy OP0
 652          into a register and save it back later.  */
 653       if (MEM_P (op0)
 654           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 655                 (op0, VOIDmode)))
 656         {
 657           rtx tempreg;
 658           enum machine_mode bestmode;
 659
 660           /* Get the mode to use for inserting into this field.  If OP0 is
 661              BLKmode, get the smallest mode consistent with the alignment. If
 662              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 663              mode. Otherwise, use the smallest mode containing the field.  */
 664
 665           if (GET_MODE (op0) == BLKmode
 666               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 667             bestmode
 668               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 669                                MEM_VOLATILE_P (op0));
 670           else
 671             bestmode = GET_MODE (op0);
 672
 673           if (bestmode == VOIDmode
 674               || GET_MODE_SIZE (bestmode) < GET_MODE_SIZE (fieldmode)
 675               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 676                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 677             goto insv_loses;
 678
 679           /* Adjust address to point to the containing unit of that mode.
 680              Compute offset as multiple of this unit, counting in bytes.  */
 681           unit = GET_MODE_BITSIZE (bestmode);
 682           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 683           bitpos = bitnum % unit;
 684           op0 = adjust_address (op0, bestmode,  offset);
 685
 686           /* Fetch that unit, store the bitfield in it, then store
 687              the unit.  */
 688           tempreg = copy_to_reg (op0);
 689           store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
 690           emit_move_insn (op0, tempreg);
 691           return value;
 692         }
 693       volatile_ok = save_volatile_ok;
 694
 695       /* Add OFFSET into OP0's address.  */
 696       if (MEM_P (xop0))
 697         xop0 = adjust_address (xop0, byte_mode, offset);
 698
 699       /* If xop0 is a register, we need it in MAXMODE
 700          to make it acceptable to the format of insv.  */
 701       if (GET_CODE (xop0) == SUBREG)
 702         /* We can't just change the mode, because this might clobber op0,
 703            and we will need the original value of op0 if insv fails.  */
 704         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 705       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 706         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 707
 708       /* On big-endian machines, we count bits from the most significant.
 709          If the bit field insn does not, we must invert.  */
 710
 711       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 712         xbitpos = unit - bitsize - xbitpos;
 713
 714       /* We have been counting XBITPOS within UNIT.
 715          Count instead within the size of the register.  */
 716       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 717         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 718
 719       unit = GET_MODE_BITSIZE (maxmode);
 720
 721       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 722       value1 = value;
 723       if (GET_MODE (value) != maxmode)
 724         {
 725           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 726             {
 727               /* Optimization: Don't bother really extending VALUE
 728                  if it has all the bits we will actually use.  However,
 729                  if we must narrow it, be sure we do it correctly.  */
 730
 731               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 732                 {
 733                   rtx tmp;
 734
 735                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 736                   if (! tmp)
 737                     tmp = simplify_gen_subreg (maxmode,
 738                                                force_reg (GET_MODE (value),
 739                                                           value1),
 740                                                GET_MODE (value), 0);
 741                   value1 = tmp;
 742                 }
 743               else
 744                 value1 = gen_lowpart (maxmode, value1);
 745             }
 746           else if (GET_CODE (value) == CONST_INT)
 747             value1 = gen_int_mode (INTVAL (value), maxmode);
 748           else
 749             /* Parse phase is supposed to make VALUE's data type
 750                match that of the component reference, which is a type
 751                at least as wide as the field; so VALUE should have
 752                a mode that corresponds to that type.  */
 753             gcc_assert (CONSTANT_P (value));
 754         }
 755
 756       /* If this machine's insv insists on a register,
 757          get VALUE1 into a register.  */
 758       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 759              (value1, maxmode)))
 760         value1 = force_reg (maxmode, value1);
 761
 762       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 763       if (pat)
 764         emit_insn (pat);
 765       else
 766         {
 767           delete_insns_since (last);
 768           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 769         }
 770     }
 771   else
 772     insv_loses:
 773     /* Insv is not available; store using shifts and boolean ops.  */
 774     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 775   return value;
 776 }
 777 \f
 778 /* Use shifts and boolean operations to store VALUE
 779    into a bit field of width BITSIZE
 780    in a memory location specified by OP0 except offset by OFFSET bytes.
 781      (OFFSET must be 0 if OP0 is a register.)
 782    The field starts at position BITPOS within the byte.
 783     (If OP0 is a register, it may be a full word or a narrower mode,
 784      but BITPOS still counts within a full word,
 785      which is significant on bigendian machines.)  */
 786
 787 static void
 788 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 789                        unsigned HOST_WIDE_INT bitsize,
 790                        unsigned HOST_WIDE_INT bitpos, rtx value)
 791 {
 792   enum machine_mode mode;
 793   unsigned int total_bits = BITS_PER_WORD;
 794   rtx subtarget, temp;
 795   int all_zero = 0;
 796   int all_one = 0;
 797
 798   /* There is a case not handled here:
 799      a structure with a known alignment of just a halfword
 800      and a field split across two aligned halfwords within the structure.
 801      Or likewise a structure with a known alignment of just a byte
 802      and a field split across two bytes.
 803      Such cases are not supposed to be able to occur.  */
 804
 805   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 806     {
 807       gcc_assert (!offset);
 808       /* Special treatment for a bit field split across two registers.  */
 809       if (bitsize + bitpos > BITS_PER_WORD)
 810         {
 811           store_split_bit_field (op0, bitsize, bitpos, value);
 812           return;
 813         }
 814     }
 815   else
 816     {
 817       /* Get the proper mode to use for this field.  We want a mode that
 818          includes the entire field.  If such a mode would be larger than
 819          a word, we won't be doing the extraction the normal way.
 820          We don't want a mode bigger than the destination.  */
 821
 822       mode = GET_MODE (op0);
 823       if (GET_MODE_BITSIZE (mode) == 0
 824           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 825         mode = word_mode;
 826       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 827                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 828
 829       if (mode == VOIDmode)
 830         {
 831           /* The only way this should occur is if the field spans word
 832              boundaries.  */
 833           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 834                                  value);
 835           return;
 836         }
 837
 838       total_bits = GET_MODE_BITSIZE (mode);
 839
 840       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 841          be in the range 0 to total_bits-1, and put any excess bytes in
 842          OFFSET.  */
 843       if (bitpos >= total_bits)
 844         {
 845           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 846           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 847                      * BITS_PER_UNIT);
 848         }
 849
 850       /* Get ref to an aligned byte, halfword, or word containing the field.
 851          Adjust BITPOS to be position within a word,
 852          and OFFSET to be the offset of that word.
 853          Then alter OP0 to refer to that word.  */
 854       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 855       offset -= (offset % (total_bits / BITS_PER_UNIT));
 856       op0 = adjust_address (op0, mode, offset);
 857     }
 858
 859   mode = GET_MODE (op0);
 860
 861   /* Now MODE is either some integral mode for a MEM as OP0,
 862      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 863      The bit field is contained entirely within OP0.
 864      BITPOS is the starting bit number within OP0.
 865      (OP0's mode may actually be narrower than MODE.)  */
 866
 867   if (BYTES_BIG_ENDIAN)
 868       /* BITPOS is the distance between our msb
 869          and that of the containing datum.
 870          Convert it to the distance from the lsb.  */
 871       bitpos = total_bits - bitsize - bitpos;
 872
 873   /* Now BITPOS is always the distance between our lsb
 874      and that of OP0.  */
 875
 876   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 877      we must first convert its mode to MODE.  */
 878
 879   if (GET_CODE (value) == CONST_INT)
 880     {
 881       HOST_WIDE_INT v = INTVAL (value);
 882
 883       if (bitsize < HOST_BITS_PER_WIDE_INT)
 884         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 885
 886       if (v == 0)
 887         all_zero = 1;
 888       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 889                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 890                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 891         all_one = 1;
 892
 893       value = lshift_value (mode, value, bitpos, bitsize);
 894     }
 895   else
 896     {
 897       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 898                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 899
 900       if (GET_MODE (value) != mode)
 901         {
 902           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 903               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 904             value = gen_lowpart (mode, value);
 905           else
 906             value = convert_to_mode (mode, value, 1);
 907         }
 908
 909       if (must_and)
 910         value = expand_binop (mode, and_optab, value,
 911                               mask_rtx (mode, 0, bitsize, 0),
 912                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 913       if (bitpos > 0)
 914         value = expand_shift (LSHIFT_EXPR, mode, value,
 915                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 916     }
 917
 918   /* Now clear the chosen bits in OP0,
 919      except that if VALUE is -1 we need not bother.  */
 920
 921   subtarget = op0;
 922
 923   if (! all_one)
 924     {
 925       temp = expand_binop (mode, and_optab, op0,
 926                            mask_rtx (mode, bitpos, bitsize, 1),
 927                            subtarget, 1, OPTAB_LIB_WIDEN);
 928       subtarget = temp;
 929     }
 930   else
 931     temp = op0;
 932
 933   /* Now logical-or VALUE into OP0, unless it is zero.  */
 934
 935   if (! all_zero)
 936     temp = expand_binop (mode, ior_optab, temp, value,
 937                          subtarget, 1, OPTAB_LIB_WIDEN);
 938   if (op0 != temp)
 939     emit_move_insn (op0, temp);
 940 }
 941 \f
 942 /* Store a bit field that is split across multiple accessible memory objects.
 943
 944    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 945    BITSIZE is the field width; BITPOS the position of its first bit
 946    (within the word).
 947    VALUE is the value to store.
 948
 949    This does not yet handle fields wider than BITS_PER_WORD.  */
 950
 951 static void
 952 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 953                        unsigned HOST_WIDE_INT bitpos, rtx value)
 954 {
 955   unsigned int unit;
 956   unsigned int bitsdone = 0;
 957
 958   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 959      much at a time.  */
 960   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 961     unit = BITS_PER_WORD;
 962   else
 963     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 964
 965   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 966      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 967      that VALUE might be a floating-point constant.  */
 968   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 969     {
 970       rtx word = gen_lowpart_common (word_mode, value);
 971
 972       if (word && (value != word))
 973         value = word;
 974       else
 975         value = gen_lowpart_common (word_mode,
 976                                     force_reg (GET_MODE (value) != VOIDmode
 977                                                ? GET_MODE (value)
 978                                                : word_mode, value));
 979     }
 980
 981   while (bitsdone < bitsize)
 982     {
 983       unsigned HOST_WIDE_INT thissize;
 984       rtx part, word;
 985       unsigned HOST_WIDE_INT thispos;
 986       unsigned HOST_WIDE_INT offset;
 987
 988       offset = (bitpos + bitsdone) / unit;
 989       thispos = (bitpos + bitsdone) % unit;
 990
 991       /* THISSIZE must not overrun a word boundary.  Otherwise,
 992          store_fixed_bit_field will call us again, and we will mutually
 993          recurse forever.  */
 994       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 995       thissize = MIN (thissize, unit - thispos);
 996
 997       if (BYTES_BIG_ENDIAN)
 998         {
 999           int total_bits;
1000
1001           /* We must do an endian conversion exactly the same way as it is
1002              done in extract_bit_field, so that the two calls to
1003              extract_fixed_bit_field will have comparable arguments.  */
1004           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1005             total_bits = BITS_PER_WORD;
1006           else
1007             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1008
1009           /* Fetch successively less significant portions.  */
1010           if (GET_CODE (value) == CONST_INT)
1011             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1012                              >> (bitsize - bitsdone - thissize))
1013                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1014           else
1015             /* The args are chosen so that the last part includes the
1016                lsb.  Give extract_bit_field the value it needs (with
1017                endianness compensation) to fetch the piece we want.  */
1018             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1019                                             total_bits - bitsize + bitsdone,
1020                                             NULL_RTX, 1);
1021         }
1022       else
1023         {
1024           /* Fetch successively more significant portions.  */
1025           if (GET_CODE (value) == CONST_INT)
1026             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1027                              >> bitsdone)
1028                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1029           else
1030             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1031                                             bitsdone, NULL_RTX, 1);
1032         }
1033
1034       /* If OP0 is a register, then handle OFFSET here.
1035
1036          When handling multiword bitfields, extract_bit_field may pass
1037          down a word_mode SUBREG of a larger REG for a bitfield that actually
1038          crosses a word boundary.  Thus, for a SUBREG, we must find
1039          the current word starting from the base register.  */
1040       if (GET_CODE (op0) == SUBREG)
1041         {
1042           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1043           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1044                                         GET_MODE (SUBREG_REG (op0)));
1045           offset = 0;
1046         }
1047       else if (REG_P (op0))
1048         {
1049           word = operand_subword_force (op0, offset, GET_MODE (op0));
1050           offset = 0;
1051         }
1052       else
1053         word = op0;
1054
1055       /* OFFSET is in UNITs, and UNIT is in bits.
1056          store_fixed_bit_field wants offset in bytes.  */
1057       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1058                              thispos, part);
1059       bitsdone += thissize;
1060     }
1061 }
1062 \f
1063 /* Generate code to extract a byte-field from STR_RTX
1064    containing BITSIZE bits, starting at BITNUM,
1065    and put it in TARGET if possible (if TARGET is nonzero).
1066    Regardless of TARGET, we return the rtx for where the value is placed.
1067
1068    STR_RTX is the structure containing the byte (a REG or MEM).
1069    UNSIGNEDP is nonzero if this is an unsigned bit field.
1070    MODE is the natural mode of the field value once extracted.
1071    TMODE is the mode the caller would like the value to have;
1072    but the value may be returned with type MODE instead.
1073
1074    TOTAL_SIZE is the size in bytes of the containing structure,
1075    or -1 if varying.
1076
1077    If a TARGET is specified and we can store in it at no extra cost,
1078    we do so, and return TARGET.
1079    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1080    if they are equally easy.  */
1081
1082 rtx
1083 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1084                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1085                    enum machine_mode mode, enum machine_mode tmode)
1086 {
1087   unsigned int unit
1088     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1089   unsigned HOST_WIDE_INT offset, bitpos;
1090   rtx op0 = str_rtx;
1091   rtx spec_target = target;
1092   rtx spec_target_subreg = 0;
1093   enum machine_mode int_mode;
1094   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1095   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1096   enum machine_mode mode1;
1097   int byte_offset;
1098
1099   if (tmode == VOIDmode)
1100     tmode = mode;
1101
1102   while (GET_CODE (op0) == SUBREG)
1103     {
1104       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1105       op0 = SUBREG_REG (op0);
1106     }
1107
1108   /* If we have an out-of-bounds access to a register, just return an
1109      uninitialized register of the required mode.  This can occur if the
1110      source code contains an out-of-bounds access to a small array.  */
1111   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1112     return gen_reg_rtx (tmode);
1113
1114   if (REG_P (op0)
1115       && mode == GET_MODE (op0)
1116       && bitnum == 0
1117       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1118     {
1119       /* We're trying to extract a full register from itself.  */
1120       return op0;
1121     }
1122
1123   /* Use vec_extract patterns for extracting parts of vectors whenever
1124      available.  */
1125   if (VECTOR_MODE_P (GET_MODE (op0))
1126       && !MEM_P (op0)
1127       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1128           != CODE_FOR_nothing)
1129       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1130           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1131     {
1132       enum machine_mode outermode = GET_MODE (op0);
1133       enum machine_mode innermode = GET_MODE_INNER (outermode);
1134       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1135       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1136       rtx rtxpos = GEN_INT (pos);
1137       rtx src = op0;
1138       rtx dest = NULL, pat, seq;
1139       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1140       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1141       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1142
1143       if (innermode == tmode || innermode == mode)
1144         dest = target;
1145
1146       if (!dest)
1147         dest = gen_reg_rtx (innermode);
1148
1149       start_sequence ();
1150
1151       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1152         dest = copy_to_mode_reg (mode0, dest);
1153
1154       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1155         src = copy_to_mode_reg (mode1, src);
1156
1157       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1158         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1159
1160       /* We could handle this, but we should always be called with a pseudo
1161          for our targets and all insns should take them as outputs.  */
1162       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1163                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1164                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1165
1166       pat = GEN_FCN (icode) (dest, src, rtxpos);
1167       seq = get_insns ();
1168       end_sequence ();
1169       if (pat)
1170         {
1171           emit_insn (seq);
1172           emit_insn (pat);
1173           return dest;
1174         }
1175     }
1176
1177   /* Make sure we are playing with integral modes.  Pun with subregs
1178      if we aren't.  */
1179   {
1180     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1181     if (imode != GET_MODE (op0))
1182       {
1183         if (MEM_P (op0))
1184           op0 = adjust_address (op0, imode, 0);
1185         else
1186           {
1187             gcc_assert (imode != BLKmode);
1188             op0 = gen_lowpart (imode, op0);
1189
1190             /* If we got a SUBREG, force it into a register since we
1191                aren't going to be able to do another SUBREG on it.  */
1192             if (GET_CODE (op0) == SUBREG)
1193               op0 = force_reg (imode, op0);
1194           }
1195       }
1196   }
1197
1198   /* We may be accessing data outside the field, which means
1199      we can alias adjacent data.  */
1200   if (MEM_P (op0))
1201     {
1202       op0 = shallow_copy_rtx (op0);
1203       set_mem_alias_set (op0, 0);
1204       set_mem_expr (op0, 0);
1205     }
1206
1207   /* Extraction of a full-word or multi-word value from a structure
1208      in a register or aligned memory can be done with just a SUBREG.
1209      A subword value in the least significant part of a register
1210      can also be extracted with a SUBREG.  For this, we need the
1211      byte offset of the value in op0.  */
1212
1213   bitpos = bitnum % unit;
1214   offset = bitnum / unit;
1215   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1216
1217   /* If OP0 is a register, BITPOS must count within a word.
1218      But as we have it, it counts within whatever size OP0 now has.
1219      On a bigendian machine, these are not the same, so convert.  */
1220   if (BYTES_BIG_ENDIAN
1221       && !MEM_P (op0)
1222       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1223     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1224
1225   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1226      If that's wrong, the solution is to test for it and set TARGET to 0
1227      if needed.  */
1228
1229   /* Only scalar integer modes can be converted via subregs.  There is an
1230      additional problem for FP modes here in that they can have a precision
1231      which is different from the size.  mode_for_size uses precision, but
1232      we want a mode based on the size, so we must avoid calling it for FP
1233      modes.  */
1234   mode1  = (SCALAR_INT_MODE_P (tmode)
1235             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1236             : mode);
1237
1238   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1239         && bitpos % BITS_PER_WORD == 0)
1240        || (mode1 != BLKmode
1241            /* ??? The big endian test here is wrong.  This is correct
1242               if the value is in a register, and if mode_for_size is not
1243               the same mode as op0.  This causes us to get unnecessarily
1244               inefficient code from the Thumb port when -mbig-endian.  */
1245            && (BYTES_BIG_ENDIAN
1246                ? bitpos + bitsize == BITS_PER_WORD
1247                : bitpos == 0)))
1248       && ((!MEM_P (op0)
1249            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1250                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1251            && GET_MODE_SIZE (mode1) != 0
1252            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1253           || (MEM_P (op0)
1254               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1255                   || (offset * BITS_PER_UNIT % bitsize == 0
1256                       && MEM_ALIGN (op0) % bitsize == 0)))))
1257     {
1258       if (mode1 != GET_MODE (op0))
1259         {
1260           if (MEM_P (op0))
1261             op0 = adjust_address (op0, mode1, offset);
1262           else
1263             {
1264               rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1265                                              byte_offset);
1266               if (sub == NULL)
1267                 goto no_subreg_mode_swap;
1268               op0 = sub;
1269             }
1270         }
1271       if (mode1 != mode)
1272         return convert_to_mode (tmode, op0, unsignedp);
1273       return op0;
1274     }
1275  no_subreg_mode_swap:
1276
1277   /* Handle fields bigger than a word.  */
1278
1279   if (bitsize > BITS_PER_WORD)
1280     {
1281       /* Here we transfer the words of the field
1282          in the order least significant first.
1283          This is because the most significant word is the one which may
1284          be less than full.  */
1285
1286       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1287       unsigned int i;
1288
1289       if (target == 0 || !REG_P (target))
1290         target = gen_reg_rtx (mode);
1291
1292       /* Indicate for flow that the entire target reg is being set.  */
1293       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1294
1295       for (i = 0; i < nwords; i++)
1296         {
1297           /* If I is 0, use the low-order word in both field and target;
1298              if I is 1, use the next to lowest word; and so on.  */
1299           /* Word number in TARGET to use.  */
1300           unsigned int wordnum
1301             = (WORDS_BIG_ENDIAN
1302                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1303                : i);
1304           /* Offset from start of field in OP0.  */
1305           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1306                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1307                                                 * (int) BITS_PER_WORD))
1308                                      : (int) i * BITS_PER_WORD);
1309           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1310           rtx result_part
1311             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1312                                            bitsize - i * BITS_PER_WORD),
1313                                  bitnum + bit_offset, 1, target_part, mode,
1314                                  word_mode);
1315
1316           gcc_assert (target_part);
1317
1318           if (result_part != target_part)
1319             emit_move_insn (target_part, result_part);
1320         }
1321
1322       if (unsignedp)
1323         {
1324           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1325              need to be zero'd out.  */
1326           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1327             {
1328               unsigned int i, total_words;
1329
1330               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1331               for (i = nwords; i < total_words; i++)
1332                 emit_move_insn
1333                   (operand_subword (target,
1334                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1335                                     1, VOIDmode),
1336                    const0_rtx);
1337             }
1338           return target;
1339         }
1340
1341       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1342       target = expand_shift (LSHIFT_EXPR, mode, target,
1343                              build_int_cst (NULL_TREE,
1344                                             GET_MODE_BITSIZE (mode) - bitsize),
1345                              NULL_RTX, 0);
1346       return expand_shift (RSHIFT_EXPR, mode, target,
1347                            build_int_cst (NULL_TREE,
1348                                           GET_MODE_BITSIZE (mode) - bitsize),
1349                            NULL_RTX, 0);
1350     }
1351
1352   /* From here on we know the desired field is smaller than a word.  */
1353
1354   /* Check if there is a correspondingly-sized integer field, so we can
1355      safely extract it as one size of integer, if necessary; then
1356      truncate or extend to the size that is wanted; then use SUBREGs or
1357      convert_to_mode to get one of the modes we really wanted.  */
1358
1359   int_mode = int_mode_for_mode (tmode);
1360   if (int_mode == BLKmode)
1361     int_mode = int_mode_for_mode (mode);
1362   /* Should probably push op0 out to memory and then do a load.  */
1363   gcc_assert (int_mode != BLKmode);
1364
1365   /* OFFSET is the number of words or bytes (UNIT says which)
1366      from STR_RTX to the first word or byte containing part of the field.  */
1367   if (!MEM_P (op0))
1368     {
1369       if (offset != 0
1370           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1371         {
1372           if (!REG_P (op0))
1373             op0 = copy_to_reg (op0);
1374           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1375                                 op0, (offset * UNITS_PER_WORD));
1376         }
1377       offset = 0;
1378     }
1379
1380   /* Now OFFSET is nonzero only for memory operands.  */
1381
1382   if (unsignedp)
1383     {
1384       if (HAVE_extzv
1385           && bitsize > 0
1386           && GET_MODE_BITSIZE (extzv_mode) >= bitsize
1387           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1388                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1389         {
1390           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1391           rtx bitsize_rtx, bitpos_rtx;
1392           rtx last = get_last_insn ();
1393           rtx xop0 = op0;
1394           rtx xtarget = target;
1395           rtx xspec_target = spec_target;
1396           rtx xspec_target_subreg = spec_target_subreg;
1397           rtx pat;
1398           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1399
1400           if (MEM_P (xop0))
1401             {
1402               int save_volatile_ok = volatile_ok;
1403               volatile_ok = 1;
1404
1405               /* Is the memory operand acceptable?  */
1406               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1407                      (xop0, GET_MODE (xop0))))
1408                 {
1409                   /* No, load into a reg and extract from there.  */
1410                   enum machine_mode bestmode;
1411
1412                   /* Get the mode to use for inserting into this field.  If
1413                      OP0 is BLKmode, get the smallest mode consistent with the
1414                      alignment. If OP0 is a non-BLKmode object that is no
1415                      wider than MAXMODE, use its mode. Otherwise, use the
1416                      smallest mode containing the field.  */
1417
1418                   if (GET_MODE (xop0) == BLKmode
1419                       || (GET_MODE_SIZE (GET_MODE (op0))
1420                           > GET_MODE_SIZE (maxmode)))
1421                     bestmode = get_best_mode (bitsize, bitnum,
1422                                               MEM_ALIGN (xop0), maxmode,
1423                                               MEM_VOLATILE_P (xop0));
1424                   else
1425                     bestmode = GET_MODE (xop0);
1426
1427                   if (bestmode == VOIDmode
1428                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1429                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1430                     goto extzv_loses;
1431
1432                   /* Compute offset as multiple of this unit,
1433                      counting in bytes.  */
1434                   unit = GET_MODE_BITSIZE (bestmode);
1435                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1436                   xbitpos = bitnum % unit;
1437                   xop0 = adjust_address (xop0, bestmode, xoffset);
1438
1439                   /* Make sure register is big enough for the whole field. */
1440                   if (xoffset * BITS_PER_UNIT + unit
1441                       < offset * BITS_PER_UNIT + bitsize)
1442                     goto extzv_loses;
1443
1444                   /* Fetch it to a register in that size.  */
1445                   xop0 = force_reg (bestmode, xop0);
1446
1447                   /* XBITPOS counts within UNIT, which is what is expected.  */
1448                 }
1449               else
1450                 /* Get ref to first byte containing part of the field.  */
1451                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1452
1453               volatile_ok = save_volatile_ok;
1454             }
1455
1456           /* If op0 is a register, we need it in MAXMODE (which is usually
1457              SImode). to make it acceptable to the format of extzv.  */
1458           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1459             goto extzv_loses;
1460           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1461             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1462
1463           /* On big-endian machines, we count bits from the most significant.
1464              If the bit field insn does not, we must invert.  */
1465           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1466             xbitpos = unit - bitsize - xbitpos;
1467
1468           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1469           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1470             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1471
1472           unit = GET_MODE_BITSIZE (maxmode);
1473
1474           if (xtarget == 0)
1475             xtarget = xspec_target = gen_reg_rtx (tmode);
1476
1477           if (GET_MODE (xtarget) != maxmode)
1478             {
1479               if (REG_P (xtarget))
1480                 {
1481                   int wider = (GET_MODE_SIZE (maxmode)
1482                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1483                   xtarget = gen_lowpart (maxmode, xtarget);
1484                   if (wider)
1485                     xspec_target_subreg = xtarget;
1486                 }
1487               else
1488                 xtarget = gen_reg_rtx (maxmode);
1489             }
1490
1491           /* If this machine's extzv insists on a register target,
1492              make sure we have one.  */
1493           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1494                  (xtarget, maxmode)))
1495             xtarget = gen_reg_rtx (maxmode);
1496
1497           bitsize_rtx = GEN_INT (bitsize);
1498           bitpos_rtx = GEN_INT (xbitpos);
1499
1500           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1501           if (pat)
1502             {
1503               emit_insn (pat);
1504               target = xtarget;
1505               spec_target = xspec_target;
1506               spec_target_subreg = xspec_target_subreg;
1507             }
1508           else
1509             {
1510               delete_insns_since (last);
1511               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1512                                                 bitpos, target, 1);
1513             }
1514         }
1515       else
1516       extzv_loses:
1517         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1518                                           bitpos, target, 1);
1519     }
1520   else
1521     {
1522       if (HAVE_extv
1523           && bitsize > 0
1524           && GET_MODE_BITSIZE (extv_mode) >= bitsize
1525           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1526                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1527         {
1528           int xbitpos = bitpos, xoffset = offset;
1529           rtx bitsize_rtx, bitpos_rtx;
1530           rtx last = get_last_insn ();
1531           rtx xop0 = op0, xtarget = target;
1532           rtx xspec_target = spec_target;
1533           rtx xspec_target_subreg = spec_target_subreg;
1534           rtx pat;
1535           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1536
1537           if (MEM_P (xop0))
1538             {
1539               /* Is the memory operand acceptable?  */
1540               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1541                      (xop0, GET_MODE (xop0))))
1542                 {
1543                   /* No, load into a reg and extract from there.  */
1544                   enum machine_mode bestmode;
1545
1546                   /* Get the mode to use for inserting into this field.  If
1547                      OP0 is BLKmode, get the smallest mode consistent with the
1548                      alignment. If OP0 is a non-BLKmode object that is no
1549                      wider than MAXMODE, use its mode. Otherwise, use the
1550                      smallest mode containing the field.  */
1551
1552                   if (GET_MODE (xop0) == BLKmode
1553                       || (GET_MODE_SIZE (GET_MODE (op0))
1554                           > GET_MODE_SIZE (maxmode)))
1555                     bestmode = get_best_mode (bitsize, bitnum,
1556                                               MEM_ALIGN (xop0), maxmode,
1557                                               MEM_VOLATILE_P (xop0));
1558                   else
1559                     bestmode = GET_MODE (xop0);
1560
1561                   if (bestmode == VOIDmode
1562                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1563                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1564                     goto extv_loses;
1565
1566                   /* Compute offset as multiple of this unit,
1567                      counting in bytes.  */
1568                   unit = GET_MODE_BITSIZE (bestmode);
1569                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1570                   xbitpos = bitnum % unit;
1571                   xop0 = adjust_address (xop0, bestmode, xoffset);
1572
1573                   /* Make sure register is big enough for the whole field. */
1574                   if (xoffset * BITS_PER_UNIT + unit
1575                       < offset * BITS_PER_UNIT + bitsize)
1576                     goto extv_loses;
1577
1578                   /* Fetch it to a register in that size.  */
1579                   xop0 = force_reg (bestmode, xop0);
1580
1581                   /* XBITPOS counts within UNIT, which is what is expected.  */
1582                 }
1583               else
1584                 /* Get ref to first byte containing part of the field.  */
1585                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1586             }
1587
1588           /* If op0 is a register, we need it in MAXMODE (which is usually
1589              SImode) to make it acceptable to the format of extv.  */
1590           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1591             goto extv_loses;
1592           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1593             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1594
1595           /* On big-endian machines, we count bits from the most significant.
1596              If the bit field insn does not, we must invert.  */
1597           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1598             xbitpos = unit - bitsize - xbitpos;
1599
1600           /* XBITPOS counts within a size of UNIT.
1601              Adjust to count within a size of MAXMODE.  */
1602           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1603             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1604
1605           unit = GET_MODE_BITSIZE (maxmode);
1606
1607           if (xtarget == 0)
1608             xtarget = xspec_target = gen_reg_rtx (tmode);
1609
1610           if (GET_MODE (xtarget) != maxmode)
1611             {
1612               if (REG_P (xtarget))
1613                 {
1614                   int wider = (GET_MODE_SIZE (maxmode)
1615                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1616                   xtarget = gen_lowpart (maxmode, xtarget);
1617                   if (wider)
1618                     xspec_target_subreg = xtarget;
1619                 }
1620               else
1621                 xtarget = gen_reg_rtx (maxmode);
1622             }
1623
1624           /* If this machine's extv insists on a register target,
1625              make sure we have one.  */
1626           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1627                  (xtarget, maxmode)))
1628             xtarget = gen_reg_rtx (maxmode);
1629
1630           bitsize_rtx = GEN_INT (bitsize);
1631           bitpos_rtx = GEN_INT (xbitpos);
1632
1633           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1634           if (pat)
1635             {
1636               emit_insn (pat);
1637               target = xtarget;
1638               spec_target = xspec_target;
1639               spec_target_subreg = xspec_target_subreg;
1640             }
1641           else
1642             {
1643               delete_insns_since (last);
1644               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1645                                                 bitpos, target, 0);
1646             }
1647         }
1648       else
1649       extv_loses:
1650         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1651                                           bitpos, target, 0);
1652     }
1653   if (target == spec_target)
1654     return target;
1655   if (target == spec_target_subreg)
1656     return spec_target;
1657   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1658     {
1659       /* If the target mode is not a scalar integral, first convert to the
1660          integer mode of that size and then access it as a floating-point
1661          value via a SUBREG.  */
1662       if (!SCALAR_INT_MODE_P (tmode))
1663         {
1664           enum machine_mode smode
1665             = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1666           target = convert_to_mode (smode, target, unsignedp);
1667           target = force_reg (smode, target);
1668           return gen_lowpart (tmode, target);
1669         }
1670
1671       return convert_to_mode (tmode, target, unsignedp);
1672     }
1673   return target;
1674 }
1675 \f
1676 /* Extract a bit field using shifts and boolean operations
1677    Returns an rtx to represent the value.
1678    OP0 addresses a register (word) or memory (byte).
1679    BITPOS says which bit within the word or byte the bit field starts in.
1680    OFFSET says how many bytes farther the bit field starts;
1681     it is 0 if OP0 is a register.
1682    BITSIZE says how many bits long the bit field is.
1683     (If OP0 is a register, it may be narrower than a full word,
1684      but BITPOS still counts within a full word,
1685      which is significant on bigendian machines.)
1686
1687    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1688    If TARGET is nonzero, attempts to store the value there
1689    and return TARGET, but this is not guaranteed.
1690    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1691
1692 static rtx
1693 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1694                          unsigned HOST_WIDE_INT offset,
1695                          unsigned HOST_WIDE_INT bitsize,
1696                          unsigned HOST_WIDE_INT bitpos, rtx target,
1697                          int unsignedp)
1698 {
1699   unsigned int total_bits = BITS_PER_WORD;
1700   enum machine_mode mode;
1701
1702   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1703     {
1704       /* Special treatment for a bit field split across two registers.  */
1705       if (bitsize + bitpos > BITS_PER_WORD)
1706         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1707     }
1708   else
1709     {
1710       /* Get the proper mode to use for this field.  We want a mode that
1711          includes the entire field.  If such a mode would be larger than
1712          a word, we won't be doing the extraction the normal way.  */
1713
1714       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1715                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1716
1717       if (mode == VOIDmode)
1718         /* The only way this should occur is if the field spans word
1719            boundaries.  */
1720         return extract_split_bit_field (op0, bitsize,
1721                                         bitpos + offset * BITS_PER_UNIT,
1722                                         unsignedp);
1723
1724       total_bits = GET_MODE_BITSIZE (mode);
1725
1726       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1727          be in the range 0 to total_bits-1, and put any excess bytes in
1728          OFFSET.  */
1729       if (bitpos >= total_bits)
1730         {
1731           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1732           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1733                      * BITS_PER_UNIT);
1734         }
1735
1736       /* Get ref to an aligned byte, halfword, or word containing the field.
1737          Adjust BITPOS to be position within a word,
1738          and OFFSET to be the offset of that word.
1739          Then alter OP0 to refer to that word.  */
1740       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1741       offset -= (offset % (total_bits / BITS_PER_UNIT));
1742       op0 = adjust_address (op0, mode, offset);
1743     }
1744
1745   mode = GET_MODE (op0);
1746
1747   if (BYTES_BIG_ENDIAN)
1748     /* BITPOS is the distance between our msb and that of OP0.
1749        Convert it to the distance from the lsb.  */
1750     bitpos = total_bits - bitsize - bitpos;
1751
1752   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1753      We have reduced the big-endian case to the little-endian case.  */
1754
1755   if (unsignedp)
1756     {
1757       if (bitpos)
1758         {
1759           /* If the field does not already start at the lsb,
1760              shift it so it does.  */
1761           tree amount = build_int_cst (NULL_TREE, bitpos);
1762           /* Maybe propagate the target for the shift.  */
1763           /* But not if we will return it--could confuse integrate.c.  */
1764           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1765           if (tmode != mode) subtarget = 0;
1766           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1767         }
1768       /* Convert the value to the desired mode.  */
1769       if (mode != tmode)
1770         op0 = convert_to_mode (tmode, op0, 1);
1771
1772       /* Unless the msb of the field used to be the msb when we shifted,
1773          mask out the upper bits.  */
1774
1775       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1776         return expand_binop (GET_MODE (op0), and_optab, op0,
1777                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1778                              target, 1, OPTAB_LIB_WIDEN);
1779       return op0;
1780     }
1781
1782   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1783      then arithmetic-shift its lsb to the lsb of the word.  */
1784   op0 = force_reg (mode, op0);
1785   if (mode != tmode)
1786     target = 0;
1787
1788   /* Find the narrowest integer mode that contains the field.  */
1789
1790   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1791        mode = GET_MODE_WIDER_MODE (mode))
1792     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1793       {
1794         op0 = convert_to_mode (mode, op0, 0);
1795         break;
1796       }
1797
1798   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1799     {
1800       tree amount
1801         = build_int_cst (NULL_TREE,
1802                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1803       /* Maybe propagate the target for the shift.  */
1804       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1805       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1806     }
1807
1808   return expand_shift (RSHIFT_EXPR, mode, op0,
1809                        build_int_cst (NULL_TREE,
1810                                       GET_MODE_BITSIZE (mode) - bitsize),
1811                        target, 0);
1812 }
1813 \f
1814 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1815    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1816    complement of that if COMPLEMENT.  The mask is truncated if
1817    necessary to the width of mode MODE.  The mask is zero-extended if
1818    BITSIZE+BITPOS is too small for MODE.  */
1819
1820 static rtx
1821 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1822 {
1823   HOST_WIDE_INT masklow, maskhigh;
1824
1825   if (bitsize == 0)
1826     masklow = 0;
1827   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1828     masklow = (HOST_WIDE_INT) -1 << bitpos;
1829   else
1830     masklow = 0;
1831
1832   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1833     masklow &= ((unsigned HOST_WIDE_INT) -1
1834                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1835
1836   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1837     maskhigh = -1;
1838   else
1839     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1840
1841   if (bitsize == 0)
1842     maskhigh = 0;
1843   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1844     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1845                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1846   else
1847     maskhigh = 0;
1848
1849   if (complement)
1850     {
1851       maskhigh = ~maskhigh;
1852       masklow = ~masklow;
1853     }
1854
1855   return immed_double_const (masklow, maskhigh, mode);
1856 }
1857
1858 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1859    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1860
1861 static rtx
1862 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1863 {
1864   unsigned HOST_WIDE_INT v = INTVAL (value);
1865   HOST_WIDE_INT low, high;
1866
1867   if (bitsize < HOST_BITS_PER_WIDE_INT)
1868     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1869
1870   if (bitpos < HOST_BITS_PER_WIDE_INT)
1871     {
1872       low = v << bitpos;
1873       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1874     }
1875   else
1876     {
1877       low = 0;
1878       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1879     }
1880
1881   return immed_double_const (low, high, mode);
1882 }
1883 \f
1884 /* Extract a bit field from a memory by forcing the alignment of the
1885    memory.  This efficient only if the field spans at least 4 boundaries.
1886
1887    OP0 is the MEM.
1888    BITSIZE is the field width; BITPOS is the position of the first bit.
1889    UNSIGNEDP is true if the result should be zero-extended.  */
1890
1891 static rtx
1892 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1893                                    unsigned HOST_WIDE_INT bitpos,
1894                                    int unsignedp)
1895 {
1896   enum machine_mode mode, dmode;
1897   unsigned int m_bitsize, m_size;
1898   unsigned int sign_shift_up, sign_shift_dn;
1899   rtx base, a1, a2, v1, v2, comb, shift, result, start;
1900
1901   /* Choose a mode that will fit BITSIZE.  */
1902   mode = smallest_mode_for_size (bitsize, MODE_INT);
1903   m_size = GET_MODE_SIZE (mode);
1904   m_bitsize = GET_MODE_BITSIZE (mode);
1905
1906   /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1907   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1908   if (dmode == BLKmode)
1909     return NULL;
1910
1911   do_pending_stack_adjust ();
1912   start = get_last_insn ();
1913
1914   /* At the end, we'll need an additional shift to deal with sign/zero
1915      extension.  By default this will be a left+right shift of the
1916      appropriate size.  But we may be able to eliminate one of them.  */
1917   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1918
1919   if (STRICT_ALIGNMENT)
1920     {
1921       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1922       bitpos %= BITS_PER_UNIT;
1923
1924       /* We load two values to be concatenate.  There's an edge condition
1925          that bears notice -- an aligned value at the end of a page can
1926          only load one value lest we segfault.  So the two values we load
1927          are at "base & -size" and "(base + size - 1) & -size".  If base
1928          is unaligned, the addresses will be aligned and sequential; if
1929          base is aligned, the addresses will both be equal to base.  */
1930
1931       a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1932                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1933                                 NULL, true, OPTAB_LIB_WIDEN);
1934       mark_reg_pointer (a1, m_bitsize);
1935       v1 = gen_rtx_MEM (mode, a1);
1936       set_mem_align (v1, m_bitsize);
1937       v1 = force_reg (mode, validize_mem (v1));
1938
1939       a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1940       a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1941                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1942                                 NULL, true, OPTAB_LIB_WIDEN);
1943       v2 = gen_rtx_MEM (mode, a2);
1944       set_mem_align (v2, m_bitsize);
1945       v2 = force_reg (mode, validize_mem (v2));
1946
1947       /* Combine these two values into a double-word value.  */
1948       if (m_bitsize == BITS_PER_WORD)
1949         {
1950           comb = gen_reg_rtx (dmode);
1951           emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1952           emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1953           emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1954         }
1955       else
1956         {
1957           if (BYTES_BIG_ENDIAN)
1958             comb = v1, v1 = v2, v2 = comb;
1959           v1 = convert_modes (dmode, mode, v1, true);
1960           if (v1 == NULL)
1961             goto fail;
1962           v2 = convert_modes (dmode, mode, v2, true);
1963           v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1964                                     NULL, true, OPTAB_LIB_WIDEN);
1965           if (v2 == NULL)
1966             goto fail;
1967           comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1968                                       true, OPTAB_LIB_WIDEN);
1969           if (comb == NULL)
1970             goto fail;
1971         }
1972
1973       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1974                                    NULL, true, OPTAB_LIB_WIDEN);
1975       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1976
1977       if (bitpos != 0)
1978         {
1979           if (sign_shift_up <= bitpos)
1980             bitpos -= sign_shift_up, sign_shift_up = 0;
1981           shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1982                                        NULL, true, OPTAB_LIB_WIDEN);
1983         }
1984     }
1985   else
1986     {
1987       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1988       bitpos %= BITS_PER_UNIT;
1989
1990       /* When strict alignment is not required, we can just load directly
1991          from memory without masking.  If the remaining BITPOS offset is
1992          small enough, we may be able to do all operations in MODE as
1993          opposed to DMODE.  */
1994       if (bitpos + bitsize <= m_bitsize)
1995         dmode = mode;
1996       comb = adjust_address (op0, dmode, offset);
1997
1998       if (sign_shift_up <= bitpos)
1999         bitpos -= sign_shift_up, sign_shift_up = 0;
2000       shift = GEN_INT (bitpos);
2001     }
2002
2003   /* Shift down the double-word such that the requested value is at bit 0.  */
2004   if (shift != const0_rtx)
2005     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
2006                                 comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
2007   if (comb == NULL)
2008     goto fail;
2009
2010   /* If the field exactly matches MODE, then all we need to do is return the
2011      lowpart.  Otherwise, shift to get the sign bits set properly.  */
2012   result = force_reg (mode, gen_lowpart (mode, comb));
2013
2014   if (sign_shift_up)
2015     result = expand_simple_binop (mode, ASHIFT, result,
2016                                   GEN_INT (sign_shift_up),
2017                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
2018   if (sign_shift_dn)
2019     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
2020                                   result, GEN_INT (sign_shift_dn),
2021                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
2022
2023   return result;
2024
2025  fail:
2026   delete_insns_since (start);
2027   return NULL;
2028 }
2029
2030 /* Extract a bit field that is split across two words
2031    and return an RTX for the result.
2032
2033    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2034    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2035    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2036
2037 static rtx
2038 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2039                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
2040 {
2041   unsigned int unit;
2042   unsigned int bitsdone = 0;
2043   rtx result = NULL_RTX;
2044   int first = 1;
2045
2046   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2047      much at a time.  */
2048   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2049     unit = BITS_PER_WORD;
2050   else
2051     {
2052       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2053       if (0 && bitsize / unit > 2)
2054         {
2055           rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2056                                                        unsignedp);
2057           if (tmp)
2058             return tmp;
2059         }
2060     }
2061
2062   while (bitsdone < bitsize)
2063     {
2064       unsigned HOST_WIDE_INT thissize;
2065       rtx part, word;
2066       unsigned HOST_WIDE_INT thispos;
2067       unsigned HOST_WIDE_INT offset;
2068
2069       offset = (bitpos + bitsdone) / unit;
2070       thispos = (bitpos + bitsdone) % unit;
2071
2072       /* THISSIZE must not overrun a word boundary.  Otherwise,
2073          extract_fixed_bit_field will call us again, and we will mutually
2074          recurse forever.  */
2075       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2076       thissize = MIN (thissize, unit - thispos);
2077
2078       /* If OP0 is a register, then handle OFFSET here.
2079
2080          When handling multiword bitfields, extract_bit_field may pass
2081          down a word_mode SUBREG of a larger REG for a bitfield that actually
2082          crosses a word boundary.  Thus, for a SUBREG, we must find
2083          the current word starting from the base register.  */
2084       if (GET_CODE (op0) == SUBREG)
2085         {
2086           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2087           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2088                                         GET_MODE (SUBREG_REG (op0)));
2089           offset = 0;
2090         }
2091       else if (REG_P (op0))
2092         {
2093           word = operand_subword_force (op0, offset, GET_MODE (op0));
2094           offset = 0;
2095         }
2096       else
2097         word = op0;
2098
2099       /* Extract the parts in bit-counting order,
2100          whose meaning is determined by BYTES_PER_UNIT.
2101          OFFSET is in UNITs, and UNIT is in bits.
2102          extract_fixed_bit_field wants offset in bytes.  */
2103       part = extract_fixed_bit_field (word_mode, word,
2104                                       offset * unit / BITS_PER_UNIT,
2105                                       thissize, thispos, 0, 1);
2106       bitsdone += thissize;
2107
2108       /* Shift this part into place for the result.  */
2109       if (BYTES_BIG_ENDIAN)
2110         {
2111           if (bitsize != bitsdone)
2112             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2113                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
2114                                  0, 1);
2115         }
2116       else
2117         {
2118           if (bitsdone != thissize)
2119             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2120                                  build_int_cst (NULL_TREE,
2121                                                 bitsdone - thissize), 0, 1);
2122         }
2123
2124       if (first)
2125         result = part;
2126       else
2127         /* Combine the parts with bitwise or.  This works
2128            because we extracted each part as an unsigned bit field.  */
2129         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2130                                OPTAB_LIB_WIDEN);
2131
2132       first = 0;
2133     }
2134
2135   /* Unsigned bit field: we are done.  */
2136   if (unsignedp)
2137     return result;
2138   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2139   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2140                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2141                          NULL_RTX, 0);
2142   return expand_shift (RSHIFT_EXPR, word_mode, result,
2143                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2144                        NULL_RTX, 0);
2145 }
2146 \f
2147 /* Add INC into TARGET.  */
2148
2149 void
2150 expand_inc (rtx target, rtx inc)
2151 {
2152   rtx value = expand_binop (GET_MODE (target), add_optab,
2153                             target, inc,
2154                             target, 0, OPTAB_LIB_WIDEN);
2155   if (value != target)
2156     emit_move_insn (target, value);
2157 }
2158
2159 /* Subtract DEC from TARGET.  */
2160
2161 void
2162 expand_dec (rtx target, rtx dec)
2163 {
2164   rtx value = expand_binop (GET_MODE (target), sub_optab,
2165                             target, dec,
2166                             target, 0, OPTAB_LIB_WIDEN);
2167   if (value != target)
2168     emit_move_insn (target, value);
2169 }
2170 \f
2171 /* Output a shift instruction for expression code CODE,
2172    with SHIFTED being the rtx for the value to shift,
2173    and AMOUNT the tree for the amount to shift by.
2174    Store the result in the rtx TARGET, if that is convenient.
2175    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2176    Return the rtx for where the value is.  */
2177
2178 rtx
2179 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2180               tree amount, rtx target, int unsignedp)
2181 {
2182   rtx op1, temp = 0;
2183   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2184   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2185   int try;
2186
2187   /* Previously detected shift-counts computed by NEGATE_EXPR
2188      and shifted in the other direction; but that does not work
2189      on all machines.  */
2190
2191   op1 = expand_normal (amount);
2192
2193   if (SHIFT_COUNT_TRUNCATED)
2194     {
2195       if (GET_CODE (op1) == CONST_INT
2196           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2197               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2198         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2199                        % GET_MODE_BITSIZE (mode));
2200       else if (GET_CODE (op1) == SUBREG
2201                && subreg_lowpart_p (op1))
2202         op1 = SUBREG_REG (op1);
2203     }
2204
2205   if (op1 == const0_rtx)
2206     return shifted;
2207
2208   /* Check whether its cheaper to implement a left shift by a constant
2209      bit count by a sequence of additions.  */
2210   if (code == LSHIFT_EXPR
2211       && GET_CODE (op1) == CONST_INT
2212       && INTVAL (op1) > 0
2213       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2214       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2215     {
2216       int i;
2217       for (i = 0; i < INTVAL (op1); i++)
2218         {
2219           temp = force_reg (mode, shifted);
2220           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2221                                   unsignedp, OPTAB_LIB_WIDEN);
2222         }
2223       return shifted;
2224     }
2225
2226   for (try = 0; temp == 0 && try < 3; try++)
2227     {
2228       enum optab_methods methods;
2229
2230       if (try == 0)
2231         methods = OPTAB_DIRECT;
2232       else if (try == 1)
2233         methods = OPTAB_WIDEN;
2234       else
2235         methods = OPTAB_LIB_WIDEN;
2236
2237       if (rotate)
2238         {
2239           /* Widening does not work for rotation.  */
2240           if (methods == OPTAB_WIDEN)
2241             continue;
2242           else if (methods == OPTAB_LIB_WIDEN)
2243             {
2244               /* If we have been unable to open-code this by a rotation,
2245                  do it as the IOR of two shifts.  I.e., to rotate A
2246                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2247                  where C is the bitsize of A.
2248
2249                  It is theoretically possible that the target machine might
2250                  not be able to perform either shift and hence we would
2251                  be making two libcalls rather than just the one for the
2252                  shift (similarly if IOR could not be done).  We will allow
2253                  this extremely unlikely lossage to avoid complicating the
2254                  code below.  */
2255
2256               rtx subtarget = target == shifted ? 0 : target;
2257               rtx temp1;
2258               tree type = TREE_TYPE (amount);
2259               tree new_amount = make_tree (type, op1);
2260               tree other_amount
2261                 = fold_build2 (MINUS_EXPR, type,
2262                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2263                                amount);
2264
2265               shifted = force_reg (mode, shifted);
2266
2267               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2268                                    mode, shifted, new_amount, 0, 1);
2269               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2270                                     mode, shifted, other_amount, subtarget, 1);
2271               return expand_binop (mode, ior_optab, temp, temp1, target,
2272                                    unsignedp, methods);
2273             }
2274
2275           temp = expand_binop (mode,
2276                                left ? rotl_optab : rotr_optab,
2277                                shifted, op1, target, unsignedp, methods);
2278         }
2279       else if (unsignedp)
2280         temp = expand_binop (mode,
2281                              left ? ashl_optab : lshr_optab,
2282                              shifted, op1, target, unsignedp, methods);
2283
2284       /* Do arithmetic shifts.
2285          Also, if we are going to widen the operand, we can just as well
2286          use an arithmetic right-shift instead of a logical one.  */
2287       if (temp == 0 && ! rotate
2288           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2289         {
2290           enum optab_methods methods1 = methods;
2291
2292           /* If trying to widen a log shift to an arithmetic shift,
2293              don't accept an arithmetic shift of the same size.  */
2294           if (unsignedp)
2295             methods1 = OPTAB_MUST_WIDEN;
2296
2297           /* Arithmetic shift */
2298
2299           temp = expand_binop (mode,
2300                                left ? ashl_optab : ashr_optab,
2301                                shifted, op1, target, unsignedp, methods1);
2302         }
2303
2304       /* We used to try extzv here for logical right shifts, but that was
2305          only useful for one machine, the VAX, and caused poor code
2306          generation there for lshrdi3, so the code was deleted and a
2307          define_expand for lshrsi3 was added to vax.md.  */
2308     }
2309
2310   gcc_assert (temp);
2311   return temp;
2312 }
2313 \f
2314 enum alg_code {
2315   alg_unknown,
2316   alg_zero,
2317   alg_m, alg_shift,
2318   alg_add_t_m2,
2319   alg_sub_t_m2,
2320   alg_add_factor,
2321   alg_sub_factor,
2322   alg_add_t2_m,
2323   alg_sub_t2_m,
2324   alg_impossible
2325 };
2326
2327 /* This structure holds the "cost" of a multiply sequence.  The
2328    "cost" field holds the total rtx_cost of every operator in the
2329    synthetic multiplication sequence, hence cost(a op b) is defined
2330    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2331    The "latency" field holds the minimum possible latency of the
2332    synthetic multiply, on a hypothetical infinitely parallel CPU.
2333    This is the critical path, or the maximum height, of the expression
2334    tree which is the sum of rtx_costs on the most expensive path from
2335    any leaf to the root.  Hence latency(a op b) is defined as zero for
2336    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2337
2338 struct mult_cost {
2339   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2340   short latency;  /* The latency of the multiplication sequence.  */
2341 };
2342
2343 /* This macro is used to compare a pointer to a mult_cost against an
2344    single integer "rtx_cost" value.  This is equivalent to the macro
2345    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2346 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2347                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2348
2349 /* This macro is used to compare two pointers to mult_costs against
2350    each other.  The macro returns true if X is cheaper than Y.
2351    Currently, the cheaper of two mult_costs is the one with the
2352    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2353 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2354                                  || ((X)->cost == (Y)->cost     \
2355                                      && (X)->latency < (Y)->latency))
2356
2357 /* This structure records a sequence of operations.
2358    `ops' is the number of operations recorded.
2359    `cost' is their total cost.
2360    The operations are stored in `op' and the corresponding
2361    logarithms of the integer coefficients in `log'.
2362
2363    These are the operations:
2364    alg_zero             total := 0;
2365    alg_m                total := multiplicand;
2366    alg_shift            total := total * coeff
2367    alg_add_t_m2         total := total + multiplicand * coeff;
2368    alg_sub_t_m2         total := total - multiplicand * coeff;
2369    alg_add_factor       total := total * coeff + total;
2370    alg_sub_factor       total := total * coeff - total;
2371    alg_add_t2_m         total := total * coeff + multiplicand;
2372    alg_sub_t2_m         total := total * coeff - multiplicand;
2373
2374    The first operand must be either alg_zero or alg_m.  */
2375
2376 struct algorithm
2377 {
2378   struct mult_cost cost;
2379   short ops;
2380   /* The size of the OP and LOG fields are not directly related to the
2381      word size, but the worst-case algorithms will be if we have few
2382      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2383      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2384      in total wordsize operations.  */
2385   enum alg_code op[MAX_BITS_PER_WORD];
2386   char log[MAX_BITS_PER_WORD];
2387 };
2388
2389 /* The entry for our multiplication cache/hash table.  */
2390 struct alg_hash_entry {
2391   /* The number we are multiplying by.  */
2392   unsigned int t;
2393
2394   /* The mode in which we are multiplying something by T.  */
2395   enum machine_mode mode;
2396
2397   /* The best multiplication algorithm for t.  */
2398   enum alg_code alg;
2399
2400   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2401      Otherwise, the cost within which multiplication by T is
2402      impossible.  */
2403   struct mult_cost cost;
2404 };
2405
2406 /* The number of cache/hash entries.  */
2407 #define NUM_ALG_HASH_ENTRIES 307
2408
2409 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2410    actually a hash table.  If we have a collision, that the older
2411    entry is kicked out.  */
2412 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2413
2414 /* Indicates the type of fixup needed after a constant multiplication.
2415    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2416    the result should be negated, and ADD_VARIANT means that the
2417    multiplicand should be added to the result.  */
2418 enum mult_variant {basic_variant, negate_variant, add_variant};
2419
2420 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2421                         const struct mult_cost *, enum machine_mode mode);
2422 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2423                                  struct algorithm *, enum mult_variant *, int);
2424 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2425                               const struct algorithm *, enum mult_variant);
2426 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2427                                                  int, rtx *, int *, int *);
2428 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2429 static rtx extract_high_half (enum machine_mode, rtx);
2430 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2431 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2432                                        int, int);
2433 /* Compute and return the best algorithm for multiplying by T.
2434    The algorithm must cost less than cost_limit
2435    If retval.cost >= COST_LIMIT, no algorithm was found and all
2436    other field of the returned struct are undefined.
2437    MODE is the machine mode of the multiplication.  */
2438
2439 static void
2440 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2441             const struct mult_cost *cost_limit, enum machine_mode mode)
2442 {
2443   int m;
2444   struct algorithm *alg_in, *best_alg;
2445   struct mult_cost best_cost;
2446   struct mult_cost new_limit;
2447   int op_cost, op_latency;
2448   unsigned HOST_WIDE_INT q;
2449   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2450   int hash_index;
2451   bool cache_hit = false;
2452   enum alg_code cache_alg = alg_zero;
2453
2454   /* Indicate that no algorithm is yet found.  If no algorithm
2455      is found, this value will be returned and indicate failure.  */
2456   alg_out->cost.cost = cost_limit->cost + 1;
2457   alg_out->cost.latency = cost_limit->latency + 1;
2458
2459   if (cost_limit->cost < 0
2460       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2461     return;
2462
2463   /* Restrict the bits of "t" to the multiplication's mode.  */
2464   t &= GET_MODE_MASK (mode);
2465
2466   /* t == 1 can be done in zero cost.  */
2467   if (t == 1)
2468     {
2469       alg_out->ops = 1;
2470       alg_out->cost.cost = 0;
2471       alg_out->cost.latency = 0;
2472       alg_out->op[0] = alg_m;
2473       return;
2474     }
2475
2476   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2477      fail now.  */
2478   if (t == 0)
2479     {
2480       if (MULT_COST_LESS (cost_limit, zero_cost))
2481         return;
2482       else
2483         {
2484           alg_out->ops = 1;
2485           alg_out->cost.cost = zero_cost;
2486           alg_out->cost.latency = zero_cost;
2487           alg_out->op[0] = alg_zero;
2488           return;
2489         }
2490     }
2491
2492   /* We'll be needing a couple extra algorithm structures now.  */
2493
2494   alg_in = alloca (sizeof (struct algorithm));
2495   best_alg = alloca (sizeof (struct algorithm));
2496   best_cost = *cost_limit;
2497
2498   /* Compute the hash index.  */
2499   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2500
2501   /* See if we already know what to do for T.  */
2502   if (alg_hash[hash_index].t == t
2503       && alg_hash[hash_index].mode == mode
2504       && alg_hash[hash_index].alg != alg_unknown)
2505     {
2506       cache_alg = alg_hash[hash_index].alg;
2507
2508       if (cache_alg == alg_impossible)
2509         {
2510           /* The cache tells us that it's impossible to synthesize
2511              multiplication by T within alg_hash[hash_index].cost.  */
2512           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2513             /* COST_LIMIT is at least as restrictive as the one
2514                recorded in the hash table, in which case we have no
2515                hope of synthesizing a multiplication.  Just
2516                return.  */
2517             return;
2518
2519           /* If we get here, COST_LIMIT is less restrictive than the
2520              one recorded in the hash table, so we may be able to
2521              synthesize a multiplication.  Proceed as if we didn't
2522              have the cache entry.  */
2523         }
2524       else
2525         {
2526           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2527             /* The cached algorithm shows that this multiplication
2528                requires more cost than COST_LIMIT.  Just return.  This
2529                way, we don't clobber this cache entry with
2530                alg_impossible but retain useful information.  */
2531             return;
2532
2533           cache_hit = true;
2534
2535           switch (cache_alg)
2536             {
2537             case alg_shift:
2538               goto do_alg_shift;
2539
2540             case alg_add_t_m2:
2541             case alg_sub_t_m2:
2542               goto do_alg_addsub_t_m2;
2543
2544             case alg_add_factor:
2545             case alg_sub_factor:
2546               goto do_alg_addsub_factor;
2547
2548             case alg_add_t2_m:
2549               goto do_alg_add_t2_m;
2550
2551             case alg_sub_t2_m:
2552               goto do_alg_sub_t2_m;
2553
2554             default:
2555               gcc_unreachable ();
2556             }
2557         }
2558     }
2559
2560   /* If we have a group of zero bits at the low-order part of T, try
2561      multiplying by the remaining bits and then doing a shift.  */
2562
2563   if ((t & 1) == 0)
2564     {
2565     do_alg_shift:
2566       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2567       if (m < maxm)
2568         {
2569           q = t >> m;
2570           /* The function expand_shift will choose between a shift and
2571              a sequence of additions, so the observed cost is given as
2572              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2573           op_cost = m * add_cost[mode];
2574           if (shift_cost[mode][m] < op_cost)
2575             op_cost = shift_cost[mode][m];
2576           new_limit.cost = best_cost.cost - op_cost;
2577           new_limit.latency = best_cost.latency - op_cost;
2578           synth_mult (alg_in, q, &new_limit, mode);
2579
2580           alg_in->cost.cost += op_cost;
2581           alg_in->cost.latency += op_cost;
2582           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2583             {
2584               struct algorithm *x;
2585               best_cost = alg_in->cost;
2586               x = alg_in, alg_in = best_alg, best_alg = x;
2587               best_alg->log[best_alg->ops] = m;
2588               best_alg->op[best_alg->ops] = alg_shift;
2589             }
2590         }
2591       if (cache_hit)
2592         goto done;
2593     }
2594
2595   /* If we have an odd number, add or subtract one.  */
2596   if ((t & 1) != 0)
2597     {
2598       unsigned HOST_WIDE_INT w;
2599
2600     do_alg_addsub_t_m2:
2601       for (w = 1; (w & t) != 0; w <<= 1)
2602         ;
2603       /* If T was -1, then W will be zero after the loop.  This is another
2604          case where T ends with ...111.  Handling this with (T + 1) and
2605          subtract 1 produces slightly better code and results in algorithm
2606          selection much faster than treating it like the ...0111 case
2607          below.  */
2608       if (w == 0
2609           || (w > 2
2610               /* Reject the case where t is 3.
2611                  Thus we prefer addition in that case.  */
2612               && t != 3))
2613         {
2614           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2615
2616           op_cost = add_cost[mode];
2617           new_limit.cost = best_cost.cost - op_cost;
2618           new_limit.latency = best_cost.latency - op_cost;
2619           synth_mult (alg_in, t + 1, &new_limit, mode);
2620
2621           alg_in->cost.cost += op_cost;
2622           alg_in->cost.latency += op_cost;
2623           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2624             {
2625               struct algorithm *x;
2626               best_cost = alg_in->cost;
2627               x = alg_in, alg_in = best_alg, best_alg = x;
2628               best_alg->log[best_alg->ops] = 0;
2629               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2630             }
2631         }
2632       else
2633         {
2634           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2635
2636           op_cost = add_cost[mode];
2637           new_limit.cost = best_cost.cost - op_cost;
2638           new_limit.latency = best_cost.latency - op_cost;
2639           synth_mult (alg_in, t - 1, &new_limit, mode);
2640
2641           alg_in->cost.cost += op_cost;
2642           alg_in->cost.latency += op_cost;
2643           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2644             {
2645               struct algorithm *x;
2646               best_cost = alg_in->cost;
2647               x = alg_in, alg_in = best_alg, best_alg = x;
2648               best_alg->log[best_alg->ops] = 0;
2649               best_alg->op[best_alg->ops] = alg_add_t_m2;
2650             }
2651         }
2652       if (cache_hit)
2653         goto done;
2654     }
2655
2656   /* Look for factors of t of the form
2657      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2658      If we find such a factor, we can multiply by t using an algorithm that
2659      multiplies by q, shift the result by m and add/subtract it to itself.
2660
2661      We search for large factors first and loop down, even if large factors
2662      are less probable than small; if we find a large factor we will find a
2663      good sequence quickly, and therefore be able to prune (by decreasing
2664      COST_LIMIT) the search.  */
2665
2666  do_alg_addsub_factor:
2667   for (m = floor_log2 (t - 1); m >= 2; m--)
2668     {
2669       unsigned HOST_WIDE_INT d;
2670
2671       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2672       if (t % d == 0 && t > d && m < maxm
2673           && (!cache_hit || cache_alg == alg_add_factor))
2674         {
2675           /* If the target has a cheap shift-and-add instruction use
2676              that in preference to a shift insn followed by an add insn.
2677              Assume that the shift-and-add is "atomic" with a latency
2678              equal to its cost, otherwise assume that on superscalar
2679              hardware the shift may be executed concurrently with the
2680              earlier steps in the algorithm.  */
2681           op_cost = add_cost[mode] + shift_cost[mode][m];
2682           if (shiftadd_cost[mode][m] < op_cost)
2683             {
2684               op_cost = shiftadd_cost[mode][m];
2685               op_latency = op_cost;
2686             }
2687           else
2688             op_latency = add_cost[mode];
2689
2690           new_limit.cost = best_cost.cost - op_cost;
2691           new_limit.latency = best_cost.latency - op_latency;
2692           synth_mult (alg_in, t / d, &new_limit, mode);
2693
2694           alg_in->cost.cost += op_cost;
2695           alg_in->cost.latency += op_latency;
2696           if (alg_in->cost.latency < op_cost)
2697             alg_in->cost.latency = op_cost;
2698           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2699             {
2700               struct algorithm *x;
2701               best_cost = alg_in->cost;
2702               x = alg_in, alg_in = best_alg, best_alg = x;
2703               best_alg->log[best_alg->ops] = m;
2704               best_alg->op[best_alg->ops] = alg_add_factor;
2705             }
2706           /* Other factors will have been taken care of in the recursion.  */
2707           break;
2708         }
2709
2710       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2711       if (t % d == 0 && t > d && m < maxm
2712           && (!cache_hit || cache_alg == alg_sub_factor))
2713         {
2714           /* If the target has a cheap shift-and-subtract insn use
2715              that in preference to a shift insn followed by a sub insn.
2716              Assume that the shift-and-sub is "atomic" with a latency
2717              equal to it's cost, otherwise assume that on superscalar
2718              hardware the shift may be executed concurrently with the
2719              earlier steps in the algorithm.  */
2720           op_cost = add_cost[mode] + shift_cost[mode][m];
2721           if (shiftsub_cost[mode][m] < op_cost)
2722             {
2723               op_cost = shiftsub_cost[mode][m];
2724               op_latency = op_cost;
2725             }
2726           else
2727             op_latency = add_cost[mode];
2728
2729           new_limit.cost = best_cost.cost - op_cost;
2730           new_limit.latency = best_cost.latency - op_latency;
2731           synth_mult (alg_in, t / d, &new_limit, mode);
2732
2733           alg_in->cost.cost += op_cost;
2734           alg_in->cost.latency += op_latency;
2735           if (alg_in->cost.latency < op_cost)
2736             alg_in->cost.latency = op_cost;
2737           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2738             {
2739               struct algorithm *x;
2740               best_cost = alg_in->cost;
2741               x = alg_in, alg_in = best_alg, best_alg = x;
2742               best_alg->log[best_alg->ops] = m;
2743               best_alg->op[best_alg->ops] = alg_sub_factor;
2744             }
2745           break;
2746         }
2747     }
2748   if (cache_hit)
2749     goto done;
2750
2751   /* Try shift-and-add (load effective address) instructions,
2752      i.e. do a*3, a*5, a*9.  */
2753   if ((t & 1) != 0)
2754     {
2755     do_alg_add_t2_m:
2756       q = t - 1;
2757       q = q & -q;
2758       m = exact_log2 (q);
2759       if (m >= 0 && m < maxm)
2760         {
2761           op_cost = shiftadd_cost[mode][m];
2762           new_limit.cost = best_cost.cost - op_cost;
2763           new_limit.latency = best_cost.latency - op_cost;
2764           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2765
2766           alg_in->cost.cost += op_cost;
2767           alg_in->cost.latency += op_cost;
2768           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2769             {
2770               struct algorithm *x;
2771               best_cost = alg_in->cost;
2772               x = alg_in, alg_in = best_alg, best_alg = x;
2773               best_alg->log[best_alg->ops] = m;
2774               best_alg->op[best_alg->ops] = alg_add_t2_m;
2775             }
2776         }
2777       if (cache_hit)
2778         goto done;
2779
2780     do_alg_sub_t2_m:
2781       q = t + 1;
2782       q = q & -q;
2783       m = exact_log2 (q);
2784       if (m >= 0 && m < maxm)
2785         {
2786           op_cost = shiftsub_cost[mode][m];
2787           new_limit.cost = best_cost.cost - op_cost;
2788           new_limit.latency = best_cost.latency - op_cost;
2789           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2790
2791           alg_in->cost.cost += op_cost;
2792           alg_in->cost.latency += op_cost;
2793           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2794             {
2795               struct algorithm *x;
2796               best_cost = alg_in->cost;
2797               x = alg_in, alg_in = best_alg, best_alg = x;
2798               best_alg->log[best_alg->ops] = m;
2799               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2800             }
2801         }
2802       if (cache_hit)
2803         goto done;
2804     }
2805
2806  done:
2807   /* If best_cost has not decreased, we have not found any algorithm.  */
2808   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2809     {
2810       /* We failed to find an algorithm.  Record alg_impossible for
2811          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2812          we are asked to find an algorithm for T within the same or
2813          lower COST_LIMIT, we can immediately return to the
2814          caller.  */
2815       alg_hash[hash_index].t = t;
2816       alg_hash[hash_index].mode = mode;
2817       alg_hash[hash_index].alg = alg_impossible;
2818       alg_hash[hash_index].cost = *cost_limit;
2819       return;
2820     }
2821
2822   /* Cache the result.  */
2823   if (!cache_hit)
2824     {
2825       alg_hash[hash_index].t = t;
2826       alg_hash[hash_index].mode = mode;
2827       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2828       alg_hash[hash_index].cost.cost = best_cost.cost;
2829       alg_hash[hash_index].cost.latency = best_cost.latency;
2830     }
2831
2832   /* If we are getting a too long sequence for `struct algorithm'
2833      to record, make this search fail.  */
2834   if (best_alg->ops == MAX_BITS_PER_WORD)
2835     return;
2836
2837   /* Copy the algorithm from temporary space to the space at alg_out.
2838      We avoid using structure assignment because the majority of
2839      best_alg is normally undefined, and this is a critical function.  */
2840   alg_out->ops = best_alg->ops + 1;
2841   alg_out->cost = best_cost;
2842   memcpy (alg_out->op, best_alg->op,
2843           alg_out->ops * sizeof *alg_out->op);
2844   memcpy (alg_out->log, best_alg->log,
2845           alg_out->ops * sizeof *alg_out->log);
2846 }
2847 \f
2848 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2849    Try three variations:
2850
2851        - a shift/add sequence based on VAL itself
2852        - a shift/add sequence based on -VAL, followed by a negation
2853        - a shift/add sequence based on VAL - 1, followed by an addition.
2854
2855    Return true if the cheapest of these cost less than MULT_COST,
2856    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2857
2858 static bool
2859 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2860                      struct algorithm *alg, enum mult_variant *variant,
2861                      int mult_cost)
2862 {
2863   struct algorithm alg2;
2864   struct mult_cost limit;
2865   int op_cost;
2866
2867   /* Fail quickly for impossible bounds.  */
2868   if (mult_cost < 0)
2869     return false;
2870
2871   /* Ensure that mult_cost provides a reasonable upper bound.
2872      Any constant multiplication can be performed with less
2873      than 2 * bits additions.  */
2874   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
2875   if (mult_cost > op_cost)
2876     mult_cost = op_cost;
2877
2878   *variant = basic_variant;
2879   limit.cost = mult_cost;
2880   limit.latency = mult_cost;
2881   synth_mult (alg, val, &limit, mode);
2882
2883   /* This works only if the inverted value actually fits in an
2884      `unsigned int' */
2885   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2886     {
2887       op_cost = neg_cost[mode];
2888       if (MULT_COST_LESS (&alg->cost, mult_cost))
2889         {
2890           limit.cost = alg->cost.cost - op_cost;
2891           limit.latency = alg->cost.latency - op_cost;
2892         }
2893       else
2894         {
2895           limit.cost = mult_cost - op_cost;
2896           limit.latency = mult_cost - op_cost;
2897         }
2898
2899       synth_mult (&alg2, -val, &limit, mode);
2900       alg2.cost.cost += op_cost;
2901       alg2.cost.latency += op_cost;
2902       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2903         *alg = alg2, *variant = negate_variant;
2904     }
2905
2906   /* This proves very useful for division-by-constant.  */
2907   op_cost = add_cost[mode];
2908   if (MULT_COST_LESS (&alg->cost, mult_cost))
2909     {
2910       limit.cost = alg->cost.cost - op_cost;
2911       limit.latency = alg->cost.latency - op_cost;
2912     }
2913   else
2914     {
2915       limit.cost = mult_cost - op_cost;
2916       limit.latency = mult_cost - op_cost;
2917     }
2918
2919   synth_mult (&alg2, val - 1, &limit, mode);
2920   alg2.cost.cost += op_cost;
2921   alg2.cost.latency += op_cost;
2922   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2923     *alg = alg2, *variant = add_variant;
2924
2925   return MULT_COST_LESS (&alg->cost, mult_cost);
2926 }
2927
2928 /* A subroutine of expand_mult, used for constant multiplications.
2929    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2930    convenient.  Use the shift/add sequence described by ALG and apply
2931    the final fixup specified by VARIANT.  */
2932
2933 static rtx
2934 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2935                    rtx target, const struct algorithm *alg,
2936                    enum mult_variant variant)
2937 {
2938   HOST_WIDE_INT val_so_far;
2939   rtx insn, accum, tem;
2940   int opno;
2941   enum machine_mode nmode;
2942
2943   /* Avoid referencing memory over and over.
2944      For speed, but also for correctness when mem is volatile.  */
2945   if (MEM_P (op0))
2946     op0 = force_reg (mode, op0);
2947
2948   /* ACCUM starts out either as OP0 or as a zero, depending on
2949      the first operation.  */
2950
2951   if (alg->op[0] == alg_zero)
2952     {
2953       accum = copy_to_mode_reg (mode, const0_rtx);
2954       val_so_far = 0;
2955     }
2956   else if (alg->op[0] == alg_m)
2957     {
2958       accum = copy_to_mode_reg (mode, op0);
2959       val_so_far = 1;
2960     }
2961   else
2962     gcc_unreachable ();
2963
2964   for (opno = 1; opno < alg->ops; opno++)
2965     {
2966       int log = alg->log[opno];
2967       rtx shift_subtarget = optimize ? 0 : accum;
2968       rtx add_target
2969         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2970            && !optimize)
2971           ? target : 0;
2972       rtx accum_target = optimize ? 0 : accum;
2973
2974       switch (alg->op[opno])
2975         {
2976         case alg_shift:
2977           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2978                                 build_int_cst (NULL_TREE, log),
2979                                 NULL_RTX, 0);
2980           val_so_far <<= log;
2981           break;
2982
2983         case alg_add_t_m2:
2984           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2985                               build_int_cst (NULL_TREE, log),
2986                               NULL_RTX, 0);
2987           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2988                                  add_target ? add_target : accum_target);
2989           val_so_far += (HOST_WIDE_INT) 1 << log;
2990           break;
2991
2992         case alg_sub_t_m2:
2993           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2994                               build_int_cst (NULL_TREE, log),
2995                               NULL_RTX, 0);
2996           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2997                                  add_target ? add_target : accum_target);
2998           val_so_far -= (HOST_WIDE_INT) 1 << log;
2999           break;
3000
3001         case alg_add_t2_m:
3002           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3003                                 build_int_cst (NULL_TREE, log),
3004                                 shift_subtarget,
3005                                 0);
3006           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3007                                  add_target ? add_target : accum_target);
3008           val_so_far = (val_so_far << log) + 1;
3009           break;
3010
3011         case alg_sub_t2_m:
3012           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3013                                 build_int_cst (NULL_TREE, log),
3014                                 shift_subtarget, 0);
3015           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3016                                  add_target ? add_target : accum_target);
3017           val_so_far = (val_so_far << log) - 1;
3018           break;
3019
3020         case alg_add_factor:
3021           tem = expand_shift (LSHIFT_EXPR, mode, accum,
3022                               build_int_cst (NULL_TREE, log),
3023                               NULL_RTX, 0);
3024           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3025                                  add_target ? add_target : accum_target);
3026           val_so_far += val_so_far << log;
3027           break;
3028
3029         case alg_sub_factor:
3030           tem = expand_shift (LSHIFT_EXPR, mode, accum,
3031                               build_int_cst (NULL_TREE, log),
3032                               NULL_RTX, 0);
3033           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3034                                  (add_target
3035                                   ? add_target : (optimize ? 0 : tem)));
3036           val_so_far = (val_so_far << log) - val_so_far;
3037           break;
3038
3039         default:
3040           gcc_unreachable ();
3041         }
3042
3043       /* Write a REG_EQUAL note on the last insn so that we can cse
3044          multiplication sequences.  Note that if ACCUM is a SUBREG,
3045          we've set the inner register and must properly indicate
3046          that.  */
3047
3048       tem = op0, nmode = mode;
3049       if (GET_CODE (accum) == SUBREG)
3050         {
3051           nmode = GET_MODE (SUBREG_REG (accum));
3052           tem = gen_lowpart (nmode, op0);
3053         }
3054
3055       insn = get_last_insn ();
3056       set_unique_reg_note (insn, REG_EQUAL,
3057                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
3058     }
3059
3060   if (variant == negate_variant)
3061     {
3062       val_so_far = -val_so_far;
3063       accum = expand_unop (mode, neg_optab, accum, target, 0);
3064     }
3065   else if (variant == add_variant)
3066     {
3067       val_so_far = val_so_far + 1;
3068       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3069     }
3070
3071   /* Compare only the bits of val and val_so_far that are significant
3072      in the result mode, to avoid sign-/zero-extension confusion.  */
3073   val &= GET_MODE_MASK (mode);
3074   val_so_far &= GET_MODE_MASK (mode);
3075   gcc_assert (val == val_so_far);
3076
3077   return accum;
3078 }
3079
3080 /* Perform a multiplication and return an rtx for the result.
3081    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3082    TARGET is a suggestion for where to store the result (an rtx).
3083
3084    We check specially for a constant integer as OP1.
3085    If you want this check for OP0 as well, then before calling
3086    you should swap the two operands if OP0 would be constant.  */
3087
3088 rtx
3089 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3090              int unsignedp)
3091 {
3092   enum mult_variant variant;
3093   struct algorithm algorithm;
3094   int max_cost;
3095
3096   /* Handling const0_rtx here allows us to use zero as a rogue value for
3097      coeff below.  */
3098   if (op1 == const0_rtx)
3099     return const0_rtx;
3100   if (op1 == const1_rtx)
3101     return op0;
3102   if (op1 == constm1_rtx)
3103     return expand_unop (mode,
3104                         GET_MODE_CLASS (mode) == MODE_INT
3105                         && !unsignedp && flag_trapv
3106                         ? negv_optab : neg_optab,
3107                         op0, target, 0);
3108
3109   /* These are the operations that are potentially turned into a sequence
3110      of shifts and additions.  */
3111   if (SCALAR_INT_MODE_P (mode)
3112       && (unsignedp || !flag_trapv))
3113     {
3114       HOST_WIDE_INT coeff = 0;
3115       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3116
3117       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3118          less than or equal in size to `unsigned int' this doesn't matter.
3119          If the mode is larger than `unsigned int', then synth_mult works
3120          only if the constant value exactly fits in an `unsigned int' without
3121          any truncation.  This means that multiplying by negative values does
3122          not work; results are off by 2^32 on a 32 bit machine.  */
3123
3124       if (GET_CODE (op1) == CONST_INT)
3125         {
3126           /* Attempt to handle multiplication of DImode values by negative
3127              coefficients, by performing the multiplication by a positive
3128              multiplier and then inverting the result.  */
3129           if (INTVAL (op1) < 0
3130               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3131             {
3132               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3133                  result is interpreted as an unsigned coefficient.
3134                  Exclude cost of op0 from max_cost to match the cost
3135                  calculation of the synth_mult.  */
3136               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
3137                          - neg_cost[mode];
3138               if (max_cost > 0
3139                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3140                                           &variant, max_cost))
3141                 {
3142                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3143                                                 NULL_RTX, &algorithm,
3144                                                 variant);
3145                   return expand_unop (mode, neg_optab, temp, target, 0);
3146                 }
3147             }
3148           else coeff = INTVAL (op1);
3149         }
3150       else if (GET_CODE (op1) == CONST_DOUBLE)
3151         {
3152           /* If we are multiplying in DImode, it may still be a win
3153              to try to work with shifts and adds.  */
3154           if (CONST_DOUBLE_HIGH (op1) == 0)
3155             coeff = CONST_DOUBLE_LOW (op1);
3156           else if (CONST_DOUBLE_LOW (op1) == 0
3157                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3158             {
3159               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3160                           + HOST_BITS_PER_WIDE_INT;
3161               return expand_shift (LSHIFT_EXPR, mode, op0,
3162                                    build_int_cst (NULL_TREE, shift),
3163                                    target, unsignedp);
3164             }
3165         }
3166
3167       /* We used to test optimize here, on the grounds that it's better to
3168          produce a smaller program when -O is not used.  But this causes
3169          such a terrible slowdown sometimes that it seems better to always
3170          use synth_mult.  */
3171       if (coeff != 0)
3172         {
3173           /* Special case powers of two.  */
3174           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3175             return expand_shift (LSHIFT_EXPR, mode, op0,
3176                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3177                                  target, unsignedp);
3178
3179           /* Exclude cost of op0 from max_cost to match the cost
3180              calculation of the synth_mult.  */
3181           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
3182           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3183                                    max_cost))
3184             return expand_mult_const (mode, op0, coeff, target,
3185                                       &algorithm, variant);
3186         }
3187     }
3188
3189   if (GET_CODE (op0) == CONST_DOUBLE)
3190     {
3191       rtx temp = op0;
3192       op0 = op1;
3193       op1 = temp;
3194     }
3195
3196   /* Expand x*2.0 as x+x.  */
3197   if (GET_CODE (op1) == CONST_DOUBLE
3198       && SCALAR_FLOAT_MODE_P (mode))
3199     {
3200       REAL_VALUE_TYPE d;
3201       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3202
3203       if (REAL_VALUES_EQUAL (d, dconst2))
3204         {
3205           op0 = force_reg (GET_MODE (op0), op0);
3206           return expand_binop (mode, add_optab, op0, op0,
3207                                target, unsignedp, OPTAB_LIB_WIDEN);
3208         }
3209     }
3210
3211   /* This used to use umul_optab if unsigned, but for non-widening multiply
3212      there is no difference between signed and unsigned.  */
3213   op0 = expand_binop (mode,
3214                       ! unsignedp
3215                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3216                       ? smulv_optab : smul_optab,
3217                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3218   gcc_assert (op0);
3219   return op0;
3220 }
3221 \f
3222 /* Return the smallest n such that 2**n >= X.  */
3223
3224 int
3225 ceil_log2 (unsigned HOST_WIDE_INT x)
3226 {
3227   return floor_log2 (x - 1) + 1;
3228 }
3229
3230 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3231    replace division by D, and put the least significant N bits of the result
3232    in *MULTIPLIER_PTR and return the most significant bit.
3233
3234    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3235    needed precision is in PRECISION (should be <= N).
3236
3237    PRECISION should be as small as possible so this function can choose
3238    multiplier more freely.
3239
3240    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3241    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3242
3243    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3244    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3245
3246 static
3247 unsigned HOST_WIDE_INT
3248 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3249                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3250 {
3251   HOST_WIDE_INT mhigh_hi, mlow_hi;
3252   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3253   int lgup, post_shift;
3254   int pow, pow2;
3255   unsigned HOST_WIDE_INT nl, dummy1;
3256   HOST_WIDE_INT nh, dummy2;
3257
3258   /* lgup = ceil(log2(divisor)); */
3259   lgup = ceil_log2 (d);
3260
3261   gcc_assert (lgup <= n);
3262
3263   pow = n + lgup;
3264   pow2 = n + lgup - precision;
3265
3266   /* We could handle this with some effort, but this case is much
3267      better handled directly with a scc insn, so rely on caller using
3268      that.  */
3269   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3270
3271   /* mlow = 2^(N + lgup)/d */
3272  if (pow >= HOST_BITS_PER_WIDE_INT)
3273     {
3274       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3275       nl = 0;
3276     }
3277   else
3278     {
3279       nh = 0;
3280       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3281     }
3282   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3283                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3284
3285   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3286   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3287     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3288   else
3289     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3290   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3291                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3292
3293   gcc_assert (!mhigh_hi || nh - d < d);
3294   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3295   /* Assert that mlow < mhigh.  */
3296   gcc_assert (mlow_hi < mhigh_hi
3297               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3298
3299   /* If precision == N, then mlow, mhigh exceed 2^N
3300      (but they do not exceed 2^(N+1)).  */
3301
3302   /* Reduce to lowest terms.  */
3303   for (post_shift = lgup; post_shift > 0; post_shift--)
3304     {
3305       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3306       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3307       if (ml_lo >= mh_lo)
3308         break;
3309
3310       mlow_hi = 0;
3311       mlow_lo = ml_lo;
3312       mhigh_hi = 0;
3313       mhigh_lo = mh_lo;
3314     }
3315
3316   *post_shift_ptr = post_shift;
3317   *lgup_ptr = lgup;
3318   if (n < HOST_BITS_PER_WIDE_INT)
3319     {
3320       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3321       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3322       return mhigh_lo >= mask;
3323     }
3324   else
3325     {
3326       *multiplier_ptr = GEN_INT (mhigh_lo);
3327       return mhigh_hi;
3328     }
3329 }
3330
3331 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3332    congruent to 1 (mod 2**N).  */
3333
3334 static unsigned HOST_WIDE_INT
3335 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3336 {
3337   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3338
3339   /* The algorithm notes that the choice y = x satisfies
3340      x*y == 1 mod 2^3, since x is assumed odd.
3341      Each iteration doubles the number of bits of significance in y.  */
3342
3343   unsigned HOST_WIDE_INT mask;
3344   unsigned HOST_WIDE_INT y = x;
3345   int nbit = 3;
3346
3347   mask = (n == HOST_BITS_PER_WIDE_INT
3348           ? ~(unsigned HOST_WIDE_INT) 0
3349           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3350
3351   while (nbit < n)
3352     {
3353       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3354       nbit *= 2;
3355     }
3356   return y;
3357 }
3358
3359 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3360    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3361    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3362    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3363    become signed.
3364
3365    The result is put in TARGET if that is convenient.
3366
3367    MODE is the mode of operation.  */
3368
3369 rtx
3370 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3371                              rtx op1, rtx target, int unsignedp)
3372 {
3373   rtx tem;
3374   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3375
3376   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3377                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3378                       NULL_RTX, 0);
3379   tem = expand_and (mode, tem, op1, NULL_RTX);
3380   adj_operand
3381     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3382                      adj_operand);
3383
3384   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3385                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3386                       NULL_RTX, 0);
3387   tem = expand_and (mode, tem, op0, NULL_RTX);
3388   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3389                           target);
3390
3391   return target;
3392 }
3393
3394 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3395
3396 static rtx
3397 extract_high_half (enum machine_mode mode, rtx op)
3398 {
3399   enum machine_mode wider_mode;
3400
3401   if (mode == word_mode)
3402     return gen_highpart (mode, op);
3403
3404   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3405
3406   wider_mode = GET_MODE_WIDER_MODE (mode);
3407   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3408                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3409   return convert_modes (mode, wider_mode, op, 0);
3410 }
3411
3412 /* Like expand_mult_highpart, but only consider using a multiplication
3413    optab.  OP1 is an rtx for the constant operand.  */
3414
3415 static rtx
3416 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3417                             rtx target, int unsignedp, int max_cost)
3418 {
3419   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3420   enum machine_mode wider_mode;
3421   optab moptab;
3422   rtx tem;
3423   int size;
3424
3425   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3426
3427   wider_mode = GET_MODE_WIDER_MODE (mode);
3428   size = GET_MODE_BITSIZE (mode);
3429
3430   /* Firstly, try using a multiplication insn that only generates the needed
3431      high part of the product, and in the sign flavor of unsignedp.  */
3432   if (mul_highpart_cost[mode] < max_cost)
3433     {
3434       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3435       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3436                           unsignedp, OPTAB_DIRECT);
3437       if (tem)
3438         return tem;
3439     }
3440
3441   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3442      Need to adjust the result after the multiplication.  */
3443   if (size - 1 < BITS_PER_WORD
3444       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3445           + 4 * add_cost[mode] < max_cost))
3446     {
3447       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3448       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3449                           unsignedp, OPTAB_DIRECT);
3450       if (tem)
3451         /* We used the wrong signedness.  Adjust the result.  */
3452         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3453                                             tem, unsignedp);
3454     }
3455
3456   /* Try widening multiplication.  */
3457   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3458   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3459       && mul_widen_cost[wider_mode] < max_cost)
3460     {
3461       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3462                           unsignedp, OPTAB_WIDEN);
3463       if (tem)
3464         return extract_high_half (mode, tem);
3465     }
3466
3467   /* Try widening the mode and perform a non-widening multiplication.  */
3468   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3469       && size - 1 < BITS_PER_WORD
3470       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3471     {
3472       rtx insns, wop0, wop1;
3473
3474       /* We need to widen the operands, for example to ensure the
3475          constant multiplier is correctly sign or zero extended.
3476          Use a sequence to clean-up any instructions emitted by
3477          the conversions if things don't work out.  */
3478       start_sequence ();
3479       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3480       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3481       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3482                           unsignedp, OPTAB_WIDEN);
3483       insns = get_insns ();
3484       end_sequence ();
3485
3486       if (tem)
3487         {
3488           emit_insn (insns);
3489           return extract_high_half (mode, tem);
3490         }
3491     }
3492
3493   /* Try widening multiplication of opposite signedness, and adjust.  */
3494   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3495   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3496       && size - 1 < BITS_PER_WORD
3497       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3498           + 4 * add_cost[mode] < max_cost))
3499     {
3500       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3501                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3502       if (tem != 0)
3503         {
3504           tem = extract_high_half (mode, tem);
3505           /* We used the wrong signedness.  Adjust the result.  */
3506           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3507                                               target, unsignedp);
3508         }
3509     }
3510
3511   return 0;
3512 }
3513
3514 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3515    putting the high half of the result in TARGET if that is convenient,
3516    and return where the result is.  If the operation can not be performed,
3517    0 is returned.
3518
3519    MODE is the mode of operation and result.
3520
3521    UNSIGNEDP nonzero means unsigned multiply.
3522
3523    MAX_COST is the total allowed cost for the expanded RTL.  */
3524
3525 static rtx
3526 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3527                       rtx target, int unsignedp, int max_cost)
3528 {
3529   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3530   unsigned HOST_WIDE_INT cnst1;
3531   int extra_cost;
3532   bool sign_adjust = false;
3533   enum mult_variant variant;
3534   struct algorithm alg;
3535   rtx tem;
3536
3537   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3538   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3539   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3540
3541   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3542
3543   /* We can't optimize modes wider than BITS_PER_WORD.
3544      ??? We might be able to perform double-word arithmetic if
3545      mode == word_mode, however all the cost calculations in
3546      synth_mult etc. assume single-word operations.  */
3547   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3548     return expand_mult_highpart_optab (mode, op0, op1, target,
3549                                        unsignedp, max_cost);
3550
3551   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3552
3553   /* Check whether we try to multiply by a negative constant.  */
3554   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3555     {
3556       sign_adjust = true;
3557       extra_cost += add_cost[mode];
3558     }
3559
3560   /* See whether shift/add multiplication is cheap enough.  */
3561   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3562                            max_cost - extra_cost))
3563     {
3564       /* See whether the specialized multiplication optabs are
3565          cheaper than the shift/add version.  */
3566       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3567                                         alg.cost.cost + extra_cost);
3568       if (tem)
3569         return tem;
3570
3571       tem = convert_to_mode (wider_mode, op0, unsignedp);
3572       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3573       tem = extract_high_half (mode, tem);
3574
3575       /* Adjust result for signedness.  */
3576       if (sign_adjust)
3577         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3578
3579       return tem;
3580     }
3581   return expand_mult_highpart_optab (mode, op0, op1, target,
3582                                      unsignedp, max_cost);
3583 }
3584
3585
3586 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3587
3588 static rtx
3589 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3590 {
3591   unsigned HOST_WIDE_INT masklow, maskhigh;
3592   rtx result, temp, shift, label;
3593   int logd;
3594
3595   logd = floor_log2 (d);
3596   result = gen_reg_rtx (mode);
3597
3598   /* Avoid conditional branches when they're expensive.  */
3599   if (BRANCH_COST >= 2
3600       && !optimize_size)
3601     {
3602       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3603                                       mode, 0, -1);
3604       if (signmask)
3605         {
3606           signmask = force_reg (mode, signmask);
3607           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3608           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3609
3610           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3611              which instruction sequence to use.  If logical right shifts
3612              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3613              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3614
3615           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3616           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3617               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3618             {
3619               temp = expand_binop (mode, xor_optab, op0, signmask,
3620                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3621               temp = expand_binop (mode, sub_optab, temp, signmask,
3622                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3623               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3624                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3625               temp = expand_binop (mode, xor_optab, temp, signmask,
3626                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3627               temp = expand_binop (mode, sub_optab, temp, signmask,
3628                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3629             }
3630           else
3631             {
3632               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3633                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3634               signmask = force_reg (mode, signmask);
3635
3636               temp = expand_binop (mode, add_optab, op0, signmask,
3637                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3638               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3639                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3640               temp = expand_binop (mode, sub_optab, temp, signmask,
3641                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3642             }
3643           return temp;
3644         }
3645     }
3646
3647   /* Mask contains the mode's signbit and the significant bits of the
3648      modulus.  By including the signbit in the operation, many targets
3649      can avoid an explicit compare operation in the following comparison
3650      against zero.  */
3651
3652   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3653   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3654     {
3655       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3656       maskhigh = -1;
3657     }
3658   else
3659     maskhigh = (HOST_WIDE_INT) -1
3660                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3661
3662   temp = expand_binop (mode, and_optab, op0,
3663                        immed_double_const (masklow, maskhigh, mode),
3664                        result, 1, OPTAB_LIB_WIDEN);
3665   if (temp != result)
3666     emit_move_insn (result, temp);
3667
3668   label = gen_label_rtx ();
3669   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3670
3671   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3672                        0, OPTAB_LIB_WIDEN);
3673   masklow = (HOST_WIDE_INT) -1 << logd;
3674   maskhigh = -1;
3675   temp = expand_binop (mode, ior_optab, temp,
3676                        immed_double_const (masklow, maskhigh, mode),
3677                        result, 1, OPTAB_LIB_WIDEN);
3678   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3679                        0, OPTAB_LIB_WIDEN);
3680   if (temp != result)
3681     emit_move_insn (result, temp);
3682   emit_label (label);
3683   return result;
3684 }
3685
3686 /* Expand signed division of OP0 by a power of two D in mode MODE.
3687    This routine is only called for positive values of D.  */
3688
3689 static rtx
3690 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3691 {
3692   rtx temp, label;
3693   tree shift;
3694   int logd;
3695
3696   logd = floor_log2 (d);
3697   shift = build_int_cst (NULL_TREE, logd);
3698
3699   if (d == 2 && BRANCH_COST >= 1)
3700     {
3701       temp = gen_reg_rtx (mode);
3702       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3703       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3704                            0, OPTAB_LIB_WIDEN);
3705       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3706     }
3707
3708 #ifdef HAVE_conditional_move
3709   if (BRANCH_COST >= 2)
3710     {
3711       rtx temp2;
3712
3713       /* ??? emit_conditional_move forces a stack adjustment via
3714          compare_from_rtx so, if the sequence is discarded, it will
3715          be lost.  Do it now instead.  */
3716       do_pending_stack_adjust ();
3717
3718       start_sequence ();
3719       temp2 = copy_to_mode_reg (mode, op0);
3720       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3721                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3722       temp = force_reg (mode, temp);
3723
3724       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3725       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3726                                      mode, temp, temp2, mode, 0);
3727       if (temp2)
3728         {
3729           rtx seq = get_insns ();
3730           end_sequence ();
3731           emit_insn (seq);
3732           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3733         }
3734       end_sequence ();
3735     }
3736 #endif
3737
3738   if (BRANCH_COST >= 2)
3739     {
3740       int ushift = GET_MODE_BITSIZE (mode) - logd;
3741
3742       temp = gen_reg_rtx (mode);
3743       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3744       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3745         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3746                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3747       else
3748         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3749                              build_int_cst (NULL_TREE, ushift),
3750                              NULL_RTX, 1);
3751       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3752                            0, OPTAB_LIB_WIDEN);
3753       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3754     }
3755
3756   label = gen_label_rtx ();
3757   temp = copy_to_mode_reg (mode, op0);
3758   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3759   expand_inc (temp, GEN_INT (d - 1));
3760   emit_label (label);
3761   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3762 }
3763 \f
3764 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3765    if that is convenient, and returning where the result is.
3766    You may request either the quotient or the remainder as the result;
3767    specify REM_FLAG nonzero to get the remainder.
3768
3769    CODE is the expression code for which kind of division this is;
3770    it controls how rounding is done.  MODE is the machine mode to use.
3771    UNSIGNEDP nonzero means do unsigned division.  */
3772
3773 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3774    and then correct it by or'ing in missing high bits
3775    if result of ANDI is nonzero.
3776    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3777    This could optimize to a bfexts instruction.
3778    But C doesn't use these operations, so their optimizations are
3779    left for later.  */
3780 /* ??? For modulo, we don't actually need the highpart of the first product,
3781    the low part will do nicely.  And for small divisors, the second multiply
3782    can also be a low-part only multiply or even be completely left out.
3783    E.g. to calculate the remainder of a division by 3 with a 32 bit
3784    multiply, multiply with 0x55555556 and extract the upper two bits;
3785    the result is exact for inputs up to 0x1fffffff.
3786    The input range can be reduced by using cross-sum rules.
3787    For odd divisors >= 3, the following table gives right shift counts
3788    so that if a number is shifted by an integer multiple of the given
3789    amount, the remainder stays the same:
3790    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3791    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3792    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3793    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3794    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3795
3796    Cross-sum rules for even numbers can be derived by leaving as many bits
3797    to the right alone as the divisor has zeros to the right.
3798    E.g. if x is an unsigned 32 bit number:
3799    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3800    */
3801
3802 rtx
3803 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3804                rtx op0, rtx op1, rtx target, int unsignedp)
3805 {
3806   enum machine_mode compute_mode;
3807   rtx tquotient;
3808   rtx quotient = 0, remainder = 0;
3809   rtx last;
3810   int size;
3811   rtx insn, set;
3812   optab optab1, optab2;
3813   int op1_is_constant, op1_is_pow2 = 0;
3814   int max_cost, extra_cost;
3815   static HOST_WIDE_INT last_div_const = 0;
3816   static HOST_WIDE_INT ext_op1;
3817
3818   op1_is_constant = GET_CODE (op1) == CONST_INT;
3819   if (op1_is_constant)
3820     {
3821       ext_op1 = INTVAL (op1);
3822       if (unsignedp)
3823         ext_op1 &= GET_MODE_MASK (mode);
3824       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3825                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3826     }
3827
3828   /*
3829      This is the structure of expand_divmod:
3830
3831      First comes code to fix up the operands so we can perform the operations
3832      correctly and efficiently.
3833
3834      Second comes a switch statement with code specific for each rounding mode.
3835      For some special operands this code emits all RTL for the desired
3836      operation, for other cases, it generates only a quotient and stores it in
3837      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3838      to indicate that it has not done anything.
3839
3840      Last comes code that finishes the operation.  If QUOTIENT is set and
3841      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3842      QUOTIENT is not set, it is computed using trunc rounding.
3843
3844      We try to generate special code for division and remainder when OP1 is a
3845      constant.  If |OP1| = 2**n we can use shifts and some other fast
3846      operations.  For other values of OP1, we compute a carefully selected
3847      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3848      by m.
3849
3850      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3851      half of the product.  Different strategies for generating the product are
3852      implemented in expand_mult_highpart.
3853
3854      If what we actually want is the remainder, we generate that by another
3855      by-constant multiplication and a subtraction.  */
3856
3857   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3858      code below will malfunction if we are, so check here and handle
3859      the special case if so.  */
3860   if (op1 == const1_rtx)
3861     return rem_flag ? const0_rtx : op0;
3862
3863     /* When dividing by -1, we could get an overflow.
3864      negv_optab can handle overflows.  */
3865   if (! unsignedp && op1 == constm1_rtx)
3866     {
3867       if (rem_flag)
3868         return const0_rtx;
3869       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3870                           ? negv_optab : neg_optab, op0, target, 0);
3871     }
3872
3873   if (target
3874       /* Don't use the function value register as a target
3875          since we have to read it as well as write it,
3876          and function-inlining gets confused by this.  */
3877       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3878           /* Don't clobber an operand while doing a multi-step calculation.  */
3879           || ((rem_flag || op1_is_constant)
3880               && (reg_mentioned_p (target, op0)
3881                   || (MEM_P (op0) && MEM_P (target))))
3882           || reg_mentioned_p (target, op1)
3883           || (MEM_P (op1) && MEM_P (target))))
3884     target = 0;
3885
3886   /* Get the mode in which to perform this computation.  Normally it will
3887      be MODE, but sometimes we can't do the desired operation in MODE.
3888      If so, pick a wider mode in which we can do the operation.  Convert
3889      to that mode at the start to avoid repeated conversions.
3890
3891      First see what operations we need.  These depend on the expression
3892      we are evaluating.  (We assume that divxx3 insns exist under the
3893      same conditions that modxx3 insns and that these insns don't normally
3894      fail.  If these assumptions are not correct, we may generate less
3895      efficient code in some cases.)
3896
3897      Then see if we find a mode in which we can open-code that operation
3898      (either a division, modulus, or shift).  Finally, check for the smallest
3899      mode for which we can do the operation with a library call.  */
3900
3901   /* We might want to refine this now that we have division-by-constant
3902      optimization.  Since expand_mult_highpart tries so many variants, it is
3903      not straightforward to generalize this.  Maybe we should make an array
3904      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3905
3906   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3907             ? (unsignedp ? lshr_optab : ashr_optab)
3908             : (unsignedp ? udiv_optab : sdiv_optab));
3909   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3910             ? optab1
3911             : (unsignedp ? udivmod_optab : sdivmod_optab));
3912
3913   for (compute_mode = mode; compute_mode != VOIDmode;
3914        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3915     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3916         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3917       break;
3918
3919   if (compute_mode == VOIDmode)
3920     for (compute_mode = mode; compute_mode != VOIDmode;
3921          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3922       if (optab1->handlers[compute_mode].libfunc
3923           || optab2->handlers[compute_mode].libfunc)
3924         break;
3925
3926   /* If we still couldn't find a mode, use MODE, but expand_binop will
3927      probably die.  */
3928   if (compute_mode == VOIDmode)
3929     compute_mode = mode;
3930
3931   if (target && GET_MODE (target) == compute_mode)
3932     tquotient = target;
3933   else
3934     tquotient = gen_reg_rtx (compute_mode);
3935
3936   size = GET_MODE_BITSIZE (compute_mode);
3937 #if 0
3938   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3939      (mode), and thereby get better code when OP1 is a constant.  Do that
3940      later.  It will require going over all usages of SIZE below.  */
3941   size = GET_MODE_BITSIZE (mode);
3942 #endif
3943
3944   /* Only deduct something for a REM if the last divide done was
3945      for a different constant.   Then set the constant of the last
3946      divide.  */
3947   max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode];
3948   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3949                      && INTVAL (op1) == last_div_const))
3950     max_cost -= mul_cost[compute_mode] + add_cost[compute_mode];
3951
3952   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3953
3954   /* Now convert to the best mode to use.  */
3955   if (compute_mode != mode)
3956     {
3957       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3958       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3959
3960       /* convert_modes may have placed op1 into a register, so we
3961          must recompute the following.  */
3962       op1_is_constant = GET_CODE (op1) == CONST_INT;
3963       op1_is_pow2 = (op1_is_constant
3964                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3965                           || (! unsignedp
3966                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3967     }
3968
3969   /* If one of the operands is a volatile MEM, copy it into a register.  */
3970
3971   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3972     op0 = force_reg (compute_mode, op0);
3973   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3974     op1 = force_reg (compute_mode, op1);
3975
3976   /* If we need the remainder or if OP1 is constant, we need to
3977      put OP0 in a register in case it has any queued subexpressions.  */
3978   if (rem_flag || op1_is_constant)
3979     op0 = force_reg (compute_mode, op0);
3980
3981   last = get_last_insn ();
3982
3983   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3984   if (unsignedp)
3985     {
3986       if (code == FLOOR_DIV_EXPR)
3987         code = TRUNC_DIV_EXPR;
3988       if (code == FLOOR_MOD_EXPR)
3989         code = TRUNC_MOD_EXPR;
3990       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3991         code = TRUNC_DIV_EXPR;
3992     }
3993
3994   if (op1 != const0_rtx)
3995     switch (code)
3996       {
3997       case TRUNC_MOD_EXPR:
3998       case TRUNC_DIV_EXPR:
3999         if (op1_is_constant)
4000           {
4001             if (unsignedp)
4002               {
4003                 unsigned HOST_WIDE_INT mh;
4004                 int pre_shift, post_shift;
4005                 int dummy;
4006                 rtx ml;
4007                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4008                                             & GET_MODE_MASK (compute_mode));
4009
4010                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4011                   {
4012                     pre_shift = floor_log2 (d);
4013                     if (rem_flag)
4014                       {
4015                         remainder
4016                           = expand_binop (compute_mode, and_optab, op0,
4017                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4018                                           remainder, 1,
4019                                           OPTAB_LIB_WIDEN);
4020                         if (remainder)
4021                           return gen_lowpart (mode, remainder);
4022                       }
4023                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4024                                              build_int_cst (NULL_TREE,
4025                                                             pre_shift),
4026                                              tquotient, 1);
4027                   }
4028                 else if (size <= HOST_BITS_PER_WIDE_INT)
4029                   {
4030                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4031                       {
4032                         /* Most significant bit of divisor is set; emit an scc
4033                            insn.  */
4034                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
4035                                                     compute_mode, 1, 1);
4036                         if (quotient == 0)
4037                           goto fail1;
4038                       }
4039                     else
4040                       {
4041                         /* Find a suitable multiplier and right shift count
4042                            instead of multiplying with D.  */
4043
4044                         mh = choose_multiplier (d, size, size,
4045                                                 &ml, &post_shift, &dummy);
4046
4047                         /* If the suggested multiplier is more than SIZE bits,
4048                            we can do better for even divisors, using an
4049                            initial right shift.  */
4050                         if (mh != 0 && (d & 1) == 0)
4051                           {
4052                             pre_shift = floor_log2 (d & -d);
4053                             mh = choose_multiplier (d >> pre_shift, size,
4054                                                     size - pre_shift,
4055                                                     &ml, &post_shift, &dummy);
4056                             gcc_assert (!mh);
4057                           }
4058                         else
4059                           pre_shift = 0;
4060
4061                         if (mh != 0)
4062                           {
4063                             rtx t1, t2, t3, t4;
4064
4065                             if (post_shift - 1 >= BITS_PER_WORD)
4066                               goto fail1;
4067
4068                             extra_cost
4069                               = (shift_cost[compute_mode][post_shift - 1]
4070                                  + shift_cost[compute_mode][1]
4071                                  + 2 * add_cost[compute_mode]);
4072                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4073                                                        NULL_RTX, 1,
4074                                                        max_cost - extra_cost);
4075                             if (t1 == 0)
4076                               goto fail1;
4077                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4078                                                                op0, t1),
4079                                                 NULL_RTX);
4080                             t3 = expand_shift
4081                               (RSHIFT_EXPR, compute_mode, t2,
4082                                build_int_cst (NULL_TREE, 1),
4083                                NULL_RTX,1);
4084                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4085                                                               t1, t3),
4086                                                 NULL_RTX);
4087                             quotient = expand_shift
4088                               (RSHIFT_EXPR, compute_mode, t4,
4089                                build_int_cst (NULL_TREE, post_shift - 1),
4090                                tquotient, 1);
4091                           }
4092                         else
4093                           {
4094                             rtx t1, t2;
4095
4096                             if (pre_shift >= BITS_PER_WORD
4097                                 || post_shift >= BITS_PER_WORD)
4098                               goto fail1;
4099
4100                             t1 = expand_shift
4101                               (RSHIFT_EXPR, compute_mode, op0,
4102                                build_int_cst (NULL_TREE, pre_shift),
4103                                NULL_RTX, 1);
4104                             extra_cost
4105                               = (shift_cost[compute_mode][pre_shift]
4106                                  + shift_cost[compute_mode][post_shift]);
4107                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4108                                                        NULL_RTX, 1,
4109                                                        max_cost - extra_cost);
4110                             if (t2 == 0)
4111                               goto fail1;
4112                             quotient = expand_shift
4113                               (RSHIFT_EXPR, compute_mode, t2,
4114                                build_int_cst (NULL_TREE, post_shift),
4115                                tquotient, 1);
4116                           }
4117                       }
4118                   }
4119                 else            /* Too wide mode to use tricky code */
4120                   break;
4121
4122                 insn = get_last_insn ();
4123                 if (insn != last
4124                     && (set = single_set (insn)) != 0
4125                     && SET_DEST (set) == quotient)
4126                   set_unique_reg_note (insn,
4127                                        REG_EQUAL,
4128                                        gen_rtx_UDIV (compute_mode, op0, op1));
4129               }
4130             else                /* TRUNC_DIV, signed */
4131               {
4132                 unsigned HOST_WIDE_INT ml;
4133                 int lgup, post_shift;
4134                 rtx mlr;
4135                 HOST_WIDE_INT d = INTVAL (op1);
4136                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
4137
4138                 /* n rem d = n rem -d */
4139                 if (rem_flag && d < 0)
4140                   {
4141                     d = abs_d;
4142                     op1 = gen_int_mode (abs_d, compute_mode);
4143                   }
4144
4145                 if (d == 1)
4146                   quotient = op0;
4147                 else if (d == -1)
4148                   quotient = expand_unop (compute_mode, neg_optab, op0,
4149                                           tquotient, 0);
4150                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4151                   {
4152                     /* This case is not handled correctly below.  */
4153                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4154                                                 compute_mode, 1, 1);
4155                     if (quotient == 0)
4156                       goto fail1;
4157                   }
4158                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4159                          && (rem_flag ? smod_pow2_cheap[compute_mode]
4160                                       : sdiv_pow2_cheap[compute_mode])
4161                          /* We assume that cheap metric is true if the
4162                             optab has an expander for this mode.  */
4163                          && (((rem_flag ? smod_optab : sdiv_optab)
4164                               ->handlers[compute_mode].insn_code
4165                               != CODE_FOR_nothing)
4166                              || (sdivmod_optab->handlers[compute_mode]
4167                                  .insn_code != CODE_FOR_nothing)))
4168                   ;
4169                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4170                   {
4171                     if (rem_flag)
4172                       {
4173                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4174                         if (remainder)
4175                           return gen_lowpart (mode, remainder);
4176                       }
4177
4178                     if (sdiv_pow2_cheap[compute_mode]
4179                         && ((sdiv_optab->handlers[compute_mode].insn_code
4180                              != CODE_FOR_nothing)
4181                             || (sdivmod_optab->handlers[compute_mode].insn_code
4182                                 != CODE_FOR_nothing)))
4183                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4184                                                 compute_mode, op0,
4185                                                 gen_int_mode (abs_d,
4186                                                               compute_mode),
4187                                                 NULL_RTX, 0);
4188                     else
4189                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4190
4191                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4192                        negate the quotient.  */
4193                     if (d < 0)
4194                       {
4195                         insn = get_last_insn ();
4196                         if (insn != last
4197                             && (set = single_set (insn)) != 0
4198                             && SET_DEST (set) == quotient
4199                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4200                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4201                           set_unique_reg_note (insn,
4202                                                REG_EQUAL,
4203                                                gen_rtx_DIV (compute_mode,
4204                                                             op0,
4205                                                             GEN_INT
4206                                                             (trunc_int_for_mode
4207                                                              (abs_d,
4208                                                               compute_mode))));
4209
4210                         quotient = expand_unop (compute_mode, neg_optab,
4211                                                 quotient, quotient, 0);
4212                       }
4213                   }
4214                 else if (size <= HOST_BITS_PER_WIDE_INT)
4215                   {
4216                     choose_multiplier (abs_d, size, size - 1,
4217                                        &mlr, &post_shift, &lgup);
4218                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4219                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4220                       {
4221                         rtx t1, t2, t3;
4222
4223                         if (post_shift >= BITS_PER_WORD
4224                             || size - 1 >= BITS_PER_WORD)
4225                           goto fail1;
4226
4227                         extra_cost = (shift_cost[compute_mode][post_shift]
4228                                       + shift_cost[compute_mode][size - 1]
4229                                       + add_cost[compute_mode]);
4230                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4231                                                    NULL_RTX, 0,
4232                                                    max_cost - extra_cost);
4233                         if (t1 == 0)
4234                           goto fail1;
4235                         t2 = expand_shift
4236                           (RSHIFT_EXPR, compute_mode, t1,
4237                            build_int_cst (NULL_TREE, post_shift),
4238                            NULL_RTX, 0);
4239                         t3 = expand_shift
4240                           (RSHIFT_EXPR, compute_mode, op0,
4241                            build_int_cst (NULL_TREE, size - 1),
4242                            NULL_RTX, 0);
4243                         if (d < 0)
4244                           quotient
4245                             = force_operand (gen_rtx_MINUS (compute_mode,
4246                                                             t3, t2),
4247                                              tquotient);
4248                         else
4249                           quotient
4250                             = force_operand (gen_rtx_MINUS (compute_mode,
4251                                                             t2, t3),
4252                                              tquotient);
4253                       }
4254                     else
4255                       {
4256                         rtx t1, t2, t3, t4;
4257
4258                         if (post_shift >= BITS_PER_WORD
4259                             || size - 1 >= BITS_PER_WORD)
4260                           goto fail1;
4261
4262                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4263                         mlr = gen_int_mode (ml, compute_mode);
4264                         extra_cost = (shift_cost[compute_mode][post_shift]
4265                                       + shift_cost[compute_mode][size - 1]
4266                                       + 2 * add_cost[compute_mode]);
4267                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4268                                                    NULL_RTX, 0,
4269                                                    max_cost - extra_cost);
4270                         if (t1 == 0)
4271                           goto fail1;
4272                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4273                                                           t1, op0),
4274                                             NULL_RTX);
4275                         t3 = expand_shift
4276                           (RSHIFT_EXPR, compute_mode, t2,
4277                            build_int_cst (NULL_TREE, post_shift),
4278                            NULL_RTX, 0);
4279                         t4 = expand_shift
4280                           (RSHIFT_EXPR, compute_mode, op0,
4281                            build_int_cst (NULL_TREE, size - 1),
4282                            NULL_RTX, 0);
4283                         if (d < 0)
4284                           quotient
4285                             = force_operand (gen_rtx_MINUS (compute_mode,
4286                                                             t4, t3),
4287                                              tquotient);
4288                         else
4289                           quotient
4290                             = force_operand (gen_rtx_MINUS (compute_mode,
4291                                                             t3, t4),
4292                                              tquotient);
4293                       }
4294                   }
4295                 else            /* Too wide mode to use tricky code */
4296                   break;
4297
4298                 insn = get_last_insn ();
4299                 if (insn != last
4300                     && (set = single_set (insn)) != 0
4301                     && SET_DEST (set) == quotient)
4302                   set_unique_reg_note (insn,
4303                                        REG_EQUAL,
4304                                        gen_rtx_DIV (compute_mode, op0, op1));
4305               }
4306             break;
4307           }
4308       fail1:
4309         delete_insns_since (last);
4310         break;
4311
4312       case FLOOR_DIV_EXPR:
4313       case FLOOR_MOD_EXPR:
4314       /* We will come here only for signed operations.  */
4315         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4316           {
4317             unsigned HOST_WIDE_INT mh;
4318             int pre_shift, lgup, post_shift;
4319             HOST_WIDE_INT d = INTVAL (op1);
4320             rtx ml;
4321
4322             if (d > 0)
4323               {
4324                 /* We could just as easily deal with negative constants here,
4325                    but it does not seem worth the trouble for GCC 2.6.  */
4326                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4327                   {
4328                     pre_shift = floor_log2 (d);
4329                     if (rem_flag)
4330                       {
4331                         remainder = expand_binop (compute_mode, and_optab, op0,
4332                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4333                                                   remainder, 0, OPTAB_LIB_WIDEN);
4334                         if (remainder)
4335                           return gen_lowpart (mode, remainder);
4336                       }
4337                     quotient = expand_shift
4338                       (RSHIFT_EXPR, compute_mode, op0,
4339                        build_int_cst (NULL_TREE, pre_shift),
4340                        tquotient, 0);
4341                   }
4342                 else
4343                   {
4344                     rtx t1, t2, t3, t4;
4345
4346                     mh = choose_multiplier (d, size, size - 1,
4347                                             &ml, &post_shift, &lgup);
4348                     gcc_assert (!mh);
4349
4350                     if (post_shift < BITS_PER_WORD
4351                         && size - 1 < BITS_PER_WORD)
4352                       {
4353                         t1 = expand_shift
4354                           (RSHIFT_EXPR, compute_mode, op0,
4355                            build_int_cst (NULL_TREE, size - 1),
4356                            NULL_RTX, 0);
4357                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4358                                            NULL_RTX, 0, OPTAB_WIDEN);
4359                         extra_cost = (shift_cost[compute_mode][post_shift]
4360                                       + shift_cost[compute_mode][size - 1]
4361                                       + 2 * add_cost[compute_mode]);
4362                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4363                                                    NULL_RTX, 1,
4364                                                    max_cost - extra_cost);
4365                         if (t3 != 0)
4366                           {
4367                             t4 = expand_shift
4368                               (RSHIFT_EXPR, compute_mode, t3,
4369                                build_int_cst (NULL_TREE, post_shift),
4370                                NULL_RTX, 1);
4371                             quotient = expand_binop (compute_mode, xor_optab,
4372                                                      t4, t1, tquotient, 0,
4373                                                      OPTAB_WIDEN);
4374                           }
4375                       }
4376                   }
4377               }
4378             else
4379               {
4380                 rtx nsign, t1, t2, t3, t4;
4381                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4382                                                   op0, constm1_rtx), NULL_RTX);
4383                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4384                                    0, OPTAB_WIDEN);
4385                 nsign = expand_shift
4386                   (RSHIFT_EXPR, compute_mode, t2,
4387                    build_int_cst (NULL_TREE, size - 1),
4388                    NULL_RTX, 0);
4389                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4390                                     NULL_RTX);
4391                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4392                                     NULL_RTX, 0);
4393                 if (t4)
4394                   {
4395                     rtx t5;
4396                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4397                                       NULL_RTX, 0);
4398                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4399                                                             t4, t5),
4400                                               tquotient);
4401                   }
4402               }
4403           }
4404
4405         if (quotient != 0)
4406           break;
4407         delete_insns_since (last);
4408
4409         /* Try using an instruction that produces both the quotient and
4410            remainder, using truncation.  We can easily compensate the quotient
4411            or remainder to get floor rounding, once we have the remainder.
4412            Notice that we compute also the final remainder value here,
4413            and return the result right away.  */
4414         if (target == 0 || GET_MODE (target) != compute_mode)
4415           target = gen_reg_rtx (compute_mode);
4416
4417         if (rem_flag)
4418           {
4419             remainder
4420               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4421             quotient = gen_reg_rtx (compute_mode);
4422           }
4423         else
4424           {
4425             quotient
4426               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4427             remainder = gen_reg_rtx (compute_mode);
4428           }
4429
4430         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4431                                  quotient, remainder, 0))
4432           {
4433             /* This could be computed with a branch-less sequence.
4434                Save that for later.  */
4435             rtx tem;
4436             rtx label = gen_label_rtx ();
4437             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4438             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4439                                 NULL_RTX, 0, OPTAB_WIDEN);
4440             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4441             expand_dec (quotient, const1_rtx);
4442             expand_inc (remainder, op1);
4443             emit_label (label);
4444             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4445           }
4446
4447         /* No luck with division elimination or divmod.  Have to do it
4448            by conditionally adjusting op0 *and* the result.  */
4449         {
4450           rtx label1, label2, label3, label4, label5;
4451           rtx adjusted_op0;
4452           rtx tem;
4453
4454           quotient = gen_reg_rtx (compute_mode);
4455           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4456           label1 = gen_label_rtx ();
4457           label2 = gen_label_rtx ();
4458           label3 = gen_label_rtx ();
4459           label4 = gen_label_rtx ();
4460           label5 = gen_label_rtx ();
4461           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4462           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4463           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4464                               quotient, 0, OPTAB_LIB_WIDEN);
4465           if (tem != quotient)
4466             emit_move_insn (quotient, tem);
4467           emit_jump_insn (gen_jump (label5));
4468           emit_barrier ();
4469           emit_label (label1);
4470           expand_inc (adjusted_op0, const1_rtx);
4471           emit_jump_insn (gen_jump (label4));
4472           emit_barrier ();
4473           emit_label (label2);
4474           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4475           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4476                               quotient, 0, OPTAB_LIB_WIDEN);
4477           if (tem != quotient)
4478             emit_move_insn (quotient, tem);
4479           emit_jump_insn (gen_jump (label5));
4480           emit_barrier ();
4481           emit_label (label3);
4482           expand_dec (adjusted_op0, const1_rtx);
4483           emit_label (label4);
4484           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4485                               quotient, 0, OPTAB_LIB_WIDEN);
4486           if (tem != quotient)
4487             emit_move_insn (quotient, tem);
4488           expand_dec (quotient, const1_rtx);
4489           emit_label (label5);
4490         }
4491         break;
4492
4493       case CEIL_DIV_EXPR:
4494       case CEIL_MOD_EXPR:
4495         if (unsignedp)
4496           {
4497             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4498               {
4499                 rtx t1, t2, t3;
4500                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4501                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4502                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4503                                    tquotient, 1);
4504                 t2 = expand_binop (compute_mode, and_optab, op0,
4505                                    GEN_INT (d - 1),
4506                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4507                 t3 = gen_reg_rtx (compute_mode);
4508                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4509                                       compute_mode, 1, 1);
4510                 if (t3 == 0)
4511                   {
4512                     rtx lab;
4513                     lab = gen_label_rtx ();
4514                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4515                     expand_inc (t1, const1_rtx);
4516                     emit_label (lab);
4517                     quotient = t1;
4518                   }
4519                 else
4520                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4521                                                           t1, t3),
4522                                             tquotient);
4523                 break;
4524               }
4525
4526             /* Try using an instruction that produces both the quotient and
4527                remainder, using truncation.  We can easily compensate the
4528                quotient or remainder to get ceiling rounding, once we have the
4529                remainder.  Notice that we compute also the final remainder
4530                value here, and return the result right away.  */
4531             if (target == 0 || GET_MODE (target) != compute_mode)
4532               target = gen_reg_rtx (compute_mode);
4533
4534             if (rem_flag)
4535               {
4536                 remainder = (REG_P (target)
4537                              ? target : gen_reg_rtx (compute_mode));
4538                 quotient = gen_reg_rtx (compute_mode);
4539               }
4540             else
4541               {
4542                 quotient = (REG_P (target)
4543                             ? target : gen_reg_rtx (compute_mode));
4544                 remainder = gen_reg_rtx (compute_mode);
4545               }
4546
4547             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4548                                      remainder, 1))
4549               {
4550                 /* This could be computed with a branch-less sequence.
4551                    Save that for later.  */
4552                 rtx label = gen_label_rtx ();
4553                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4554                                  compute_mode, label);
4555                 expand_inc (quotient, const1_rtx);
4556                 expand_dec (remainder, op1);
4557                 emit_label (label);
4558                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4559               }
4560
4561             /* No luck with division elimination or divmod.  Have to do it
4562                by conditionally adjusting op0 *and* the result.  */
4563             {
4564               rtx label1, label2;
4565               rtx adjusted_op0, tem;
4566
4567               quotient = gen_reg_rtx (compute_mode);
4568               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4569               label1 = gen_label_rtx ();
4570               label2 = gen_label_rtx ();
4571               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4572                                compute_mode, label1);
4573               emit_move_insn  (quotient, const0_rtx);
4574               emit_jump_insn (gen_jump (label2));
4575               emit_barrier ();
4576               emit_label (label1);
4577               expand_dec (adjusted_op0, const1_rtx);
4578               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4579                                   quotient, 1, OPTAB_LIB_WIDEN);
4580               if (tem != quotient)
4581                 emit_move_insn (quotient, tem);
4582               expand_inc (quotient, const1_rtx);
4583               emit_label (label2);
4584             }
4585           }
4586         else /* signed */
4587           {
4588             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4589                 && INTVAL (op1) >= 0)
4590               {
4591                 /* This is extremely similar to the code for the unsigned case
4592                    above.  For 2.7 we should merge these variants, but for
4593                    2.6.1 I don't want to touch the code for unsigned since that
4594                    get used in C.  The signed case will only be used by other
4595                    languages (Ada).  */
4596
4597                 rtx t1, t2, t3;
4598                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4599                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4600                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4601                                    tquotient, 0);
4602                 t2 = expand_binop (compute_mode, and_optab, op0,
4603                                    GEN_INT (d - 1),
4604                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4605                 t3 = gen_reg_rtx (compute_mode);
4606                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4607                                       compute_mode, 1, 1);
4608                 if (t3 == 0)
4609                   {
4610                     rtx lab;
4611                     lab = gen_label_rtx ();
4612                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4613                     expand_inc (t1, const1_rtx);
4614                     emit_label (lab);
4615                     quotient = t1;
4616                   }
4617                 else
4618                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4619                                                           t1, t3),
4620                                             tquotient);
4621                 break;
4622               }
4623
4624             /* Try using an instruction that produces both the quotient and
4625                remainder, using truncation.  We can easily compensate the
4626                quotient or remainder to get ceiling rounding, once we have the
4627                remainder.  Notice that we compute also the final remainder
4628                value here, and return the result right away.  */
4629             if (target == 0 || GET_MODE (target) != compute_mode)
4630               target = gen_reg_rtx (compute_mode);
4631             if (rem_flag)
4632               {
4633                 remainder= (REG_P (target)
4634                             ? target : gen_reg_rtx (compute_mode));
4635                 quotient = gen_reg_rtx (compute_mode);
4636               }
4637             else
4638               {
4639                 quotient = (REG_P (target)
4640                             ? target : gen_reg_rtx (compute_mode));
4641                 remainder = gen_reg_rtx (compute_mode);
4642               }
4643
4644             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4645                                      remainder, 0))
4646               {
4647                 /* This could be computed with a branch-less sequence.
4648                    Save that for later.  */
4649                 rtx tem;
4650                 rtx label = gen_label_rtx ();
4651                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4652                                  compute_mode, label);
4653                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4654                                     NULL_RTX, 0, OPTAB_WIDEN);
4655                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4656                 expand_inc (quotient, const1_rtx);
4657                 expand_dec (remainder, op1);
4658                 emit_label (label);
4659                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4660               }
4661
4662             /* No luck with division elimination or divmod.  Have to do it
4663                by conditionally adjusting op0 *and* the result.  */
4664             {
4665               rtx label1, label2, label3, label4, label5;
4666               rtx adjusted_op0;
4667               rtx tem;
4668
4669               quotient = gen_reg_rtx (compute_mode);
4670               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4671               label1 = gen_label_rtx ();
4672               label2 = gen_label_rtx ();
4673               label3 = gen_label_rtx ();
4674               label4 = gen_label_rtx ();
4675               label5 = gen_label_rtx ();
4676               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4677               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4678                                compute_mode, label1);
4679               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4680                                   quotient, 0, OPTAB_LIB_WIDEN);
4681               if (tem != quotient)
4682                 emit_move_insn (quotient, tem);
4683               emit_jump_insn (gen_jump (label5));
4684               emit_barrier ();
4685               emit_label (label1);
4686               expand_dec (adjusted_op0, const1_rtx);
4687               emit_jump_insn (gen_jump (label4));
4688               emit_barrier ();
4689               emit_label (label2);
4690               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4691                                compute_mode, label3);
4692               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4693                                   quotient, 0, OPTAB_LIB_WIDEN);
4694               if (tem != quotient)
4695                 emit_move_insn (quotient, tem);
4696               emit_jump_insn (gen_jump (label5));
4697               emit_barrier ();
4698               emit_label (label3);
4699               expand_inc (adjusted_op0, const1_rtx);
4700               emit_label (label4);
4701               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4702                                   quotient, 0, OPTAB_LIB_WIDEN);
4703               if (tem != quotient)
4704                 emit_move_insn (quotient, tem);
4705               expand_inc (quotient, const1_rtx);
4706               emit_label (label5);
4707             }
4708           }
4709         break;
4710
4711       case EXACT_DIV_EXPR:
4712         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4713           {
4714             HOST_WIDE_INT d = INTVAL (op1);
4715             unsigned HOST_WIDE_INT ml;
4716             int pre_shift;
4717             rtx t1;
4718
4719             pre_shift = floor_log2 (d & -d);
4720             ml = invert_mod2n (d >> pre_shift, size);
4721             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4722                                build_int_cst (NULL_TREE, pre_shift),
4723                                NULL_RTX, unsignedp);
4724             quotient = expand_mult (compute_mode, t1,
4725                                     gen_int_mode (ml, compute_mode),
4726                                     NULL_RTX, 1);
4727
4728             insn = get_last_insn ();
4729             set_unique_reg_note (insn,
4730                                  REG_EQUAL,
4731                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4732                                                  compute_mode,
4733                                                  op0, op1));
4734           }
4735         break;
4736
4737       case ROUND_DIV_EXPR:
4738       case ROUND_MOD_EXPR:
4739         if (unsignedp)
4740           {
4741             rtx tem;
4742             rtx label;
4743             label = gen_label_rtx ();
4744             quotient = gen_reg_rtx (compute_mode);
4745             remainder = gen_reg_rtx (compute_mode);
4746             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4747               {
4748                 rtx tem;
4749                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4750                                          quotient, 1, OPTAB_LIB_WIDEN);
4751                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4752                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4753                                           remainder, 1, OPTAB_LIB_WIDEN);
4754               }
4755             tem = plus_constant (op1, -1);
4756             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4757                                 build_int_cst (NULL_TREE, 1),
4758                                 NULL_RTX, 1);
4759             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4760             expand_inc (quotient, const1_rtx);
4761             expand_dec (remainder, op1);
4762             emit_label (label);
4763           }
4764         else
4765           {
4766             rtx abs_rem, abs_op1, tem, mask;
4767             rtx label;
4768             label = gen_label_rtx ();
4769             quotient = gen_reg_rtx (compute_mode);
4770             remainder = gen_reg_rtx (compute_mode);
4771             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4772               {
4773                 rtx tem;
4774                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4775                                          quotient, 0, OPTAB_LIB_WIDEN);
4776                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4777                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4778                                           remainder, 0, OPTAB_LIB_WIDEN);
4779               }
4780             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4781             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4782             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4783                                 build_int_cst (NULL_TREE, 1),
4784                                 NULL_RTX, 1);
4785             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4786             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4787                                 NULL_RTX, 0, OPTAB_WIDEN);
4788             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4789                                  build_int_cst (NULL_TREE, size - 1),
4790                                  NULL_RTX, 0);
4791             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4792                                 NULL_RTX, 0, OPTAB_WIDEN);
4793             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4794                                 NULL_RTX, 0, OPTAB_WIDEN);
4795             expand_inc (quotient, tem);
4796             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4797                                 NULL_RTX, 0, OPTAB_WIDEN);
4798             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4799                                 NULL_RTX, 0, OPTAB_WIDEN);
4800             expand_dec (remainder, tem);
4801             emit_label (label);
4802           }
4803         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4804
4805       default:
4806         gcc_unreachable ();
4807       }
4808
4809   if (quotient == 0)
4810     {
4811       if (target && GET_MODE (target) != compute_mode)
4812         target = 0;
4813
4814       if (rem_flag)
4815         {
4816           /* Try to produce the remainder without producing the quotient.
4817              If we seem to have a divmod pattern that does not require widening,
4818              don't try widening here.  We should really have a WIDEN argument
4819              to expand_twoval_binop, since what we'd really like to do here is
4820              1) try a mod insn in compute_mode
4821              2) try a divmod insn in compute_mode
4822              3) try a div insn in compute_mode and multiply-subtract to get
4823                 remainder
4824              4) try the same things with widening allowed.  */
4825           remainder
4826             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4827                                  op0, op1, target,
4828                                  unsignedp,
4829                                  ((optab2->handlers[compute_mode].insn_code
4830                                    != CODE_FOR_nothing)
4831                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4832           if (remainder == 0)
4833             {
4834               /* No luck there.  Can we do remainder and divide at once
4835                  without a library call?  */
4836               remainder = gen_reg_rtx (compute_mode);
4837               if (! expand_twoval_binop ((unsignedp
4838                                           ? udivmod_optab
4839                                           : sdivmod_optab),
4840                                          op0, op1,
4841                                          NULL_RTX, remainder, unsignedp))
4842                 remainder = 0;
4843             }
4844
4845           if (remainder)
4846             return gen_lowpart (mode, remainder);
4847         }
4848
4849       /* Produce the quotient.  Try a quotient insn, but not a library call.
4850          If we have a divmod in this mode, use it in preference to widening
4851          the div (for this test we assume it will not fail). Note that optab2
4852          is set to the one of the two optabs that the call below will use.  */
4853       quotient
4854         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4855                              op0, op1, rem_flag ? NULL_RTX : target,
4856                              unsignedp,
4857                              ((optab2->handlers[compute_mode].insn_code
4858                                != CODE_FOR_nothing)
4859                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4860
4861       if (quotient == 0)
4862         {
4863           /* No luck there.  Try a quotient-and-remainder insn,
4864              keeping the quotient alone.  */
4865           quotient = gen_reg_rtx (compute_mode);
4866           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4867                                      op0, op1,
4868                                      quotient, NULL_RTX, unsignedp))
4869             {
4870               quotient = 0;
4871               if (! rem_flag)
4872                 /* Still no luck.  If we are not computing the remainder,
4873                    use a library call for the quotient.  */
4874                 quotient = sign_expand_binop (compute_mode,
4875                                               udiv_optab, sdiv_optab,
4876                                               op0, op1, target,
4877                                               unsignedp, OPTAB_LIB_WIDEN);
4878             }
4879         }
4880     }
4881
4882   if (rem_flag)
4883     {
4884       if (target && GET_MODE (target) != compute_mode)
4885         target = 0;
4886
4887       if (quotient == 0)
4888         {
4889           /* No divide instruction either.  Use library for remainder.  */
4890           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4891                                          op0, op1, target,
4892                                          unsignedp, OPTAB_LIB_WIDEN);
4893           /* No remainder function.  Try a quotient-and-remainder
4894              function, keeping the remainder.  */
4895           if (!remainder)
4896             {
4897               remainder = gen_reg_rtx (compute_mode);
4898               if (!expand_twoval_binop_libfunc
4899                   (unsignedp ? udivmod_optab : sdivmod_optab,
4900                    op0, op1,
4901                    NULL_RTX, remainder,
4902                    unsignedp ? UMOD : MOD))
4903                 remainder = NULL_RTX;
4904             }
4905         }
4906       else
4907         {
4908           /* We divided.  Now finish doing X - Y * (X / Y).  */
4909           remainder = expand_mult (compute_mode, quotient, op1,
4910                                    NULL_RTX, unsignedp);
4911           remainder = expand_binop (compute_mode, sub_optab, op0,
4912                                     remainder, target, unsignedp,
4913                                     OPTAB_LIB_WIDEN);
4914         }
4915     }
4916
4917   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4918 }
4919 \f
4920 /* Return a tree node with data type TYPE, describing the value of X.
4921    Usually this is an VAR_DECL, if there is no obvious better choice.
4922    X may be an expression, however we only support those expressions
4923    generated by loop.c.  */
4924
4925 tree
4926 make_tree (tree type, rtx x)
4927 {
4928   tree t;
4929
4930   switch (GET_CODE (x))
4931     {
4932     case CONST_INT:
4933       {
4934         HOST_WIDE_INT hi = 0;
4935
4936         if (INTVAL (x) < 0
4937             && !(TYPE_UNSIGNED (type)
4938                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4939                      < HOST_BITS_PER_WIDE_INT)))
4940           hi = -1;
4941
4942         t = build_int_cst_wide (type, INTVAL (x), hi);
4943
4944         return t;
4945       }
4946
4947     case CONST_DOUBLE:
4948       if (GET_MODE (x) == VOIDmode)
4949         t = build_int_cst_wide (type,
4950                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4951       else
4952         {
4953           REAL_VALUE_TYPE d;
4954
4955           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4956           t = build_real (type, d);
4957         }
4958
4959       return t;
4960
4961     case CONST_VECTOR:
4962       {
4963         int i, units;
4964         rtx elt;
4965         tree t = NULL_TREE;
4966
4967         units = CONST_VECTOR_NUNITS (x);
4968
4969         /* Build a tree with vector elements.  */
4970         for (i = units - 1; i >= 0; --i)
4971           {
4972             elt = CONST_VECTOR_ELT (x, i);
4973             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4974           }
4975
4976         return build_vector (type, t);
4977       }
4978
4979     case PLUS:
4980       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4981                           make_tree (type, XEXP (x, 1)));
4982
4983     case MINUS:
4984       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4985                           make_tree (type, XEXP (x, 1)));
4986
4987     case NEG:
4988       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4989
4990     case MULT:
4991       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4992                           make_tree (type, XEXP (x, 1)));
4993
4994     case ASHIFT:
4995       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4996                           make_tree (type, XEXP (x, 1)));
4997
4998     case LSHIFTRT:
4999       t = lang_hooks.types.unsigned_type (type);
5000       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5001                                          make_tree (t, XEXP (x, 0)),
5002                                          make_tree (type, XEXP (x, 1))));
5003
5004     case ASHIFTRT:
5005       t = lang_hooks.types.signed_type (type);
5006       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5007                                          make_tree (t, XEXP (x, 0)),
5008                                          make_tree (type, XEXP (x, 1))));
5009
5010     case DIV:
5011       if (TREE_CODE (type) != REAL_TYPE)
5012         t = lang_hooks.types.signed_type (type);
5013       else
5014         t = type;
5015
5016       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5017                                          make_tree (t, XEXP (x, 0)),
5018                                          make_tree (t, XEXP (x, 1))));
5019     case UDIV:
5020       t = lang_hooks.types.unsigned_type (type);
5021       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5022                                          make_tree (t, XEXP (x, 0)),
5023                                          make_tree (t, XEXP (x, 1))));
5024
5025     case SIGN_EXTEND:
5026     case ZERO_EXTEND:
5027       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5028                                           GET_CODE (x) == ZERO_EXTEND);
5029       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5030
5031     default:
5032       t = build_decl (VAR_DECL, NULL_TREE, type);
5033
5034       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
5035          ptr_mode.  So convert.  */
5036       if (POINTER_TYPE_P (type))
5037         x = convert_memory_address (TYPE_MODE (type), x);
5038
5039       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5040          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5041       t->decl_with_rtl.rtl = x;
5042
5043       return t;
5044     }
5045 }
5046 \f
5047 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5048    and returning TARGET.
5049
5050    If TARGET is 0, a pseudo-register or constant is returned.  */
5051
5052 rtx
5053 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5054 {
5055   rtx tem = 0;
5056
5057   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5058     tem = simplify_binary_operation (AND, mode, op0, op1);
5059   if (tem == 0)
5060     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5061
5062   if (target == 0)
5063     target = tem;
5064   else if (tem != target)
5065     emit_move_insn (target, tem);
5066   return target;
5067 }
5068 \f
5069 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5070    and storing in TARGET.  Normally return TARGET.
5071    Return 0 if that cannot be done.
5072
5073    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5074    it is VOIDmode, they cannot both be CONST_INT.
5075
5076    UNSIGNEDP is for the case where we have to widen the operands
5077    to perform the operation.  It says to use zero-extension.
5078
5079    NORMALIZEP is 1 if we should convert the result to be either zero
5080    or one.  Normalize is -1 if we should convert the result to be
5081    either zero or -1.  If NORMALIZEP is zero, the result will be left
5082    "raw" out of the scc insn.  */
5083
5084 rtx
5085 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5086                  enum machine_mode mode, int unsignedp, int normalizep)
5087 {
5088   rtx subtarget;
5089   enum insn_code icode;
5090   enum machine_mode compare_mode;
5091   enum machine_mode target_mode = GET_MODE (target);
5092   rtx tem;
5093   rtx last = get_last_insn ();
5094   rtx pattern, comparison;
5095
5096   if (unsignedp)
5097     code = unsigned_condition (code);
5098
5099   /* If one operand is constant, make it the second one.  Only do this
5100      if the other operand is not constant as well.  */
5101
5102   if (swap_commutative_operands_p (op0, op1))
5103     {
5104       tem = op0;
5105       op0 = op1;
5106       op1 = tem;
5107       code = swap_condition (code);
5108     }
5109
5110   if (mode == VOIDmode)
5111     mode = GET_MODE (op0);
5112
5113   /* For some comparisons with 1 and -1, we can convert this to
5114      comparisons with zero.  This will often produce more opportunities for
5115      store-flag insns.  */
5116
5117   switch (code)
5118     {
5119     case LT:
5120       if (op1 == const1_rtx)
5121         op1 = const0_rtx, code = LE;
5122       break;
5123     case LE:
5124       if (op1 == constm1_rtx)
5125         op1 = const0_rtx, code = LT;
5126       break;
5127     case GE:
5128       if (op1 == const1_rtx)
5129         op1 = const0_rtx, code = GT;
5130       break;
5131     case GT:
5132       if (op1 == constm1_rtx)
5133         op1 = const0_rtx, code = GE;
5134       break;
5135     case GEU:
5136       if (op1 == const1_rtx)
5137         op1 = const0_rtx, code = NE;
5138       break;
5139     case LTU:
5140       if (op1 == const1_rtx)
5141         op1 = const0_rtx, code = EQ;
5142       break;
5143     default:
5144       break;
5145     }
5146
5147   /* If we are comparing a double-word integer with zero or -1, we can
5148      convert the comparison into one involving a single word.  */
5149   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5150       && GET_MODE_CLASS (mode) == MODE_INT
5151       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5152     {
5153       if ((code == EQ || code == NE)
5154           && (op1 == const0_rtx || op1 == constm1_rtx))
5155         {
5156           rtx op00, op01, op0both;
5157
5158           /* Do a logical OR or AND of the two words and compare the result.  */
5159           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5160           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5161           op0both = expand_binop (word_mode,
5162                                   op1 == const0_rtx ? ior_optab : and_optab,
5163                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5164
5165           if (op0both != 0)
5166             return emit_store_flag (target, code, op0both, op1, word_mode,
5167                                     unsignedp, normalizep);
5168         }
5169       else if ((code == LT || code == GE) && op1 == const0_rtx)
5170         {
5171           rtx op0h;
5172
5173           /* If testing the sign bit, can just test on high word.  */
5174           op0h = simplify_gen_subreg (word_mode, op0, mode,
5175                                       subreg_highpart_offset (word_mode, mode));
5176           return emit_store_flag (target, code, op0h, op1, word_mode,
5177                                   unsignedp, normalizep);
5178         }
5179     }
5180
5181   /* From now on, we won't change CODE, so set ICODE now.  */
5182   icode = setcc_gen_code[(int) code];
5183
5184   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5185      complement of A (for GE) and shifting the sign bit to the low bit.  */
5186   if (op1 == const0_rtx && (code == LT || code == GE)
5187       && GET_MODE_CLASS (mode) == MODE_INT
5188       && (normalizep || STORE_FLAG_VALUE == 1
5189           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5190               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5191                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5192     {
5193       subtarget = target;
5194
5195       /* If the result is to be wider than OP0, it is best to convert it
5196          first.  If it is to be narrower, it is *incorrect* to convert it
5197          first.  */
5198       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5199         {
5200           op0 = convert_modes (target_mode, mode, op0, 0);
5201           mode = target_mode;
5202         }
5203
5204       if (target_mode != mode)
5205         subtarget = 0;
5206
5207       if (code == GE)
5208         op0 = expand_unop (mode, one_cmpl_optab, op0,
5209                            ((STORE_FLAG_VALUE == 1 || normalizep)
5210                             ? 0 : subtarget), 0);
5211
5212       if (STORE_FLAG_VALUE == 1 || normalizep)
5213         /* If we are supposed to produce a 0/1 value, we want to do
5214            a logical shift from the sign bit to the low-order bit; for
5215            a -1/0 value, we do an arithmetic shift.  */
5216         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5217                             size_int (GET_MODE_BITSIZE (mode) - 1),
5218                             subtarget, normalizep != -1);
5219
5220       if (mode != target_mode)
5221         op0 = convert_modes (target_mode, mode, op0, 0);
5222
5223       return op0;
5224     }
5225
5226   if (icode != CODE_FOR_nothing)
5227     {
5228       insn_operand_predicate_fn pred;
5229
5230       /* We think we may be able to do this with a scc insn.  Emit the
5231          comparison and then the scc insn.  */
5232
5233       do_pending_stack_adjust ();
5234       last = get_last_insn ();
5235
5236       comparison
5237         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5238       if (CONSTANT_P (comparison))
5239         {
5240           switch (GET_CODE (comparison))
5241             {
5242             case CONST_INT:
5243               if (comparison == const0_rtx)
5244                 return const0_rtx;
5245               break;
5246
5247 #ifdef FLOAT_STORE_FLAG_VALUE
5248             case CONST_DOUBLE:
5249               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5250                 return const0_rtx;
5251               break;
5252 #endif
5253             default:
5254               gcc_unreachable ();
5255             }
5256
5257           if (normalizep == 1)
5258             return const1_rtx;
5259           if (normalizep == -1)
5260             return constm1_rtx;
5261           return const_true_rtx;
5262         }
5263
5264       /* The code of COMPARISON may not match CODE if compare_from_rtx
5265          decided to swap its operands and reverse the original code.
5266
5267          We know that compare_from_rtx returns either a CONST_INT or
5268          a new comparison code, so it is safe to just extract the
5269          code from COMPARISON.  */
5270       code = GET_CODE (comparison);
5271
5272       /* Get a reference to the target in the proper mode for this insn.  */
5273       compare_mode = insn_data[(int) icode].operand[0].mode;
5274       subtarget = target;
5275       pred = insn_data[(int) icode].operand[0].predicate;
5276       if (optimize || ! (*pred) (subtarget, compare_mode))
5277         subtarget = gen_reg_rtx (compare_mode);
5278
5279       pattern = GEN_FCN (icode) (subtarget);
5280       if (pattern)
5281         {
5282           emit_insn (pattern);
5283
5284           /* If we are converting to a wider mode, first convert to
5285              TARGET_MODE, then normalize.  This produces better combining
5286              opportunities on machines that have a SIGN_EXTRACT when we are
5287              testing a single bit.  This mostly benefits the 68k.
5288
5289              If STORE_FLAG_VALUE does not have the sign bit set when
5290              interpreted in COMPARE_MODE, we can do this conversion as
5291              unsigned, which is usually more efficient.  */
5292           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5293             {
5294               convert_move (target, subtarget,
5295                             (GET_MODE_BITSIZE (compare_mode)
5296                              <= HOST_BITS_PER_WIDE_INT)
5297                             && 0 == (STORE_FLAG_VALUE
5298                                      & ((HOST_WIDE_INT) 1
5299                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
5300               op0 = target;
5301               compare_mode = target_mode;
5302             }
5303           else
5304             op0 = subtarget;
5305
5306           /* If we want to keep subexpressions around, don't reuse our
5307              last target.  */
5308
5309           if (optimize)
5310             subtarget = 0;
5311
5312           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
5313              we don't have to do anything.  */
5314           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5315             ;
5316           /* STORE_FLAG_VALUE might be the most negative number, so write
5317              the comparison this way to avoid a compiler-time warning.  */
5318           else if (- normalizep == STORE_FLAG_VALUE)
5319             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5320
5321           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5322              makes it hard to use a value of just the sign bit due to
5323              ANSI integer constant typing rules.  */
5324           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5325                    && (STORE_FLAG_VALUE
5326                        & ((HOST_WIDE_INT) 1
5327                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
5328             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5329                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5330                                 subtarget, normalizep == 1);
5331           else
5332             {
5333               gcc_assert (STORE_FLAG_VALUE & 1);
5334
5335               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5336               if (normalizep == -1)
5337                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5338             }
5339
5340           /* If we were converting to a smaller mode, do the
5341              conversion now.  */
5342           if (target_mode != compare_mode)
5343             {
5344               convert_move (target, op0, 0);
5345               return target;
5346             }
5347           else
5348             return op0;
5349         }
5350     }
5351
5352   delete_insns_since (last);
5353
5354   /* If optimizing, use different pseudo registers for each insn, instead
5355      of reusing the same pseudo.  This leads to better CSE, but slows
5356      down the compiler, since there are more pseudos */
5357   subtarget = (!optimize
5358                && (target_mode == mode)) ? target : NULL_RTX;
5359
5360   /* If we reached here, we can't do this with a scc insn.  However, there
5361      are some comparisons that can be done directly.  For example, if
5362      this is an equality comparison of integers, we can try to exclusive-or
5363      (or subtract) the two operands and use a recursive call to try the
5364      comparison with zero.  Don't do any of these cases if branches are
5365      very cheap.  */
5366
5367   if (BRANCH_COST > 0
5368       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5369       && op1 != const0_rtx)
5370     {
5371       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5372                           OPTAB_WIDEN);
5373
5374       if (tem == 0)
5375         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5376                             OPTAB_WIDEN);
5377       if (tem != 0)
5378         tem = emit_store_flag (target, code, tem, const0_rtx,
5379                                mode, unsignedp, normalizep);
5380       if (tem == 0)
5381         delete_insns_since (last);
5382       return tem;
5383     }
5384
5385   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5386      the constant zero.  Reject all other comparisons at this point.  Only
5387      do LE and GT if branches are expensive since they are expensive on
5388      2-operand machines.  */
5389
5390   if (BRANCH_COST == 0
5391       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5392       || (code != EQ && code != NE
5393           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5394     return 0;
5395
5396   /* See what we need to return.  We can only return a 1, -1, or the
5397      sign bit.  */
5398
5399   if (normalizep == 0)
5400     {
5401       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5402         normalizep = STORE_FLAG_VALUE;
5403
5404       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5405                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5406                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5407         ;
5408       else
5409         return 0;
5410     }
5411
5412   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5413      do the necessary operation below.  */
5414
5415   tem = 0;
5416
5417   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5418      the sign bit set.  */
5419
5420   if (code == LE)
5421     {
5422       /* This is destructive, so SUBTARGET can't be OP0.  */
5423       if (rtx_equal_p (subtarget, op0))
5424         subtarget = 0;
5425
5426       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5427                           OPTAB_WIDEN);
5428       if (tem)
5429         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5430                             OPTAB_WIDEN);
5431     }
5432
5433   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5434      number of bits in the mode of OP0, minus one.  */
5435
5436   if (code == GT)
5437     {
5438       if (rtx_equal_p (subtarget, op0))
5439         subtarget = 0;
5440
5441       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5442                           size_int (GET_MODE_BITSIZE (mode) - 1),
5443                           subtarget, 0);
5444       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5445                           OPTAB_WIDEN);
5446     }
5447
5448   if (code == EQ || code == NE)
5449     {
5450       /* For EQ or NE, one way to do the comparison is to apply an operation
5451          that converts the operand into a positive number if it is nonzero
5452          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5453          for NE we negate.  This puts the result in the sign bit.  Then we
5454          normalize with a shift, if needed.
5455
5456          Two operations that can do the above actions are ABS and FFS, so try
5457          them.  If that doesn't work, and MODE is smaller than a full word,
5458          we can use zero-extension to the wider mode (an unsigned conversion)
5459          as the operation.  */
5460
5461       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5462          that is compensated by the subsequent overflow when subtracting
5463          one / negating.  */
5464
5465       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5466         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5467       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5468         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5469       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5470         {
5471           tem = convert_modes (word_mode, mode, op0, 1);
5472           mode = word_mode;
5473         }
5474
5475       if (tem != 0)
5476         {
5477           if (code == EQ)
5478             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5479                                 0, OPTAB_WIDEN);
5480           else
5481             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5482         }
5483
5484       /* If we couldn't do it that way, for NE we can "or" the two's complement
5485          of the value with itself.  For EQ, we take the one's complement of
5486          that "or", which is an extra insn, so we only handle EQ if branches
5487          are expensive.  */
5488
5489       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5490         {
5491           if (rtx_equal_p (subtarget, op0))
5492             subtarget = 0;
5493
5494           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5495           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5496                               OPTAB_WIDEN);
5497
5498           if (tem && code == EQ)
5499             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5500         }
5501     }
5502
5503   if (tem && normalizep)
5504     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5505                         size_int (GET_MODE_BITSIZE (mode) - 1),
5506                         subtarget, normalizep == 1);
5507
5508   if (tem)
5509     {
5510       if (GET_MODE (tem) != target_mode)
5511         {
5512           convert_move (target, tem, 0);
5513           tem = target;
5514         }
5515       else if (!subtarget)
5516         {
5517           emit_move_insn (target, tem);
5518           tem = target;
5519         }
5520     }
5521   else
5522     delete_insns_since (last);
5523
5524   return tem;
5525 }
5526
5527 /* Like emit_store_flag, but always succeeds.  */
5528
5529 rtx
5530 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5531                        enum machine_mode mode, int unsignedp, int normalizep)
5532 {
5533   rtx tem, label;
5534
5535   /* First see if emit_store_flag can do the job.  */
5536   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5537   if (tem != 0)
5538     return tem;
5539
5540   if (normalizep == 0)
5541     normalizep = 1;
5542
5543   /* If this failed, we have to do this with set/compare/jump/set code.  */
5544
5545   if (!REG_P (target)
5546       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5547     target = gen_reg_rtx (GET_MODE (target));
5548
5549   emit_move_insn (target, const1_rtx);
5550   label = gen_label_rtx ();
5551   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5552                            NULL_RTX, label);
5553
5554   emit_move_insn (target, const0_rtx);
5555   emit_label (label);
5556
5557   return target;
5558 }
5559 \f
5560 /* Perform possibly multi-word comparison and conditional jump to LABEL
5561    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5562    now a thin wrapper around do_compare_rtx_and_jump.  */
5563
5564 static void
5565 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5566                  rtx label)
5567 {
5568   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5569   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5570                            NULL_RTX, NULL_RTX, label);
5571 }