gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  55 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56
  57 /* Test whether a value is zero of a power of two.  */
  58 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  59
  60 /* Nonzero means divides or modulus operations are relatively cheap for
  61    powers of two, so don't use branches; emit the operation instead.
  62    Usually, this will mean that the MD file will emit non-branch
  63    sequences.  */
  64
  65 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  66 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  67
  68 #ifndef SLOW_UNALIGNED_ACCESS
  69 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  70 #endif
  71
  72 /* For compilers that support multiple targets with different word sizes,
  73    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  74    is the H8/300(H) compiler.  */
  75
  76 #ifndef MAX_BITS_PER_WORD
  77 #define MAX_BITS_PER_WORD BITS_PER_WORD
  78 #endif
  79
  80 /* Reduce conditional compilation elsewhere.  */
  81 #ifndef HAVE_insv
  82 #define HAVE_insv       0
  83 #define CODE_FOR_insv   CODE_FOR_nothing
  84 #define gen_insv(a,b,c,d) NULL_RTX
  85 #endif
  86 #ifndef HAVE_extv
  87 #define HAVE_extv       0
  88 #define CODE_FOR_extv   CODE_FOR_nothing
  89 #define gen_extv(a,b,c,d) NULL_RTX
  90 #endif
  91 #ifndef HAVE_extzv
  92 #define HAVE_extzv      0
  93 #define CODE_FOR_extzv  CODE_FOR_nothing
  94 #define gen_extzv(a,b,c,d) NULL_RTX
  95 #endif
  96
  97 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  98    shift count and some by mode.  */
  99 static int zero_cost;
 100 static int add_cost[NUM_MACHINE_MODES];
 101 static int neg_cost[NUM_MACHINE_MODES];
 102 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 103 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 104 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int mul_cost[NUM_MACHINE_MODES];
 106 static int div_cost[NUM_MACHINE_MODES];
 107 static int mul_widen_cost[NUM_MACHINE_MODES];
 108 static int mul_highpart_cost[NUM_MACHINE_MODES];
 109
 110 void
 111 init_expmed (void)
 112 {
 113   struct
 114   {
 115     struct rtx_def reg;         rtunion reg_fld[2];
 116     struct rtx_def plus;        rtunion plus_fld1;
 117     struct rtx_def neg;
 118     struct rtx_def udiv;        rtunion udiv_fld1;
 119     struct rtx_def mult;        rtunion mult_fld1;
 120     struct rtx_def div;         rtunion div_fld1;
 121     struct rtx_def mod;         rtunion mod_fld1;
 122     struct rtx_def zext;
 123     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 124     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 125     struct rtx_def wide_trunc;
 126     struct rtx_def shift;       rtunion shift_fld1;
 127     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 128     struct rtx_def shift_add;   rtunion shift_add_fld1;
 129     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 130   } all;
 131
 132   rtx pow2[MAX_BITS_PER_WORD];
 133   rtx cint[MAX_BITS_PER_WORD];
 134   int m, n;
 135   enum machine_mode mode, wider_mode;
 136
 137   zero_cost = rtx_cost (const0_rtx, 0);
 138
 139   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 140     {
 141       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 142       cint[m] = GEN_INT (m);
 143     }
 144
 145   memset (&all, 0, sizeof all);
 146
 147   PUT_CODE (&all.reg, REG);
 148   REGNO (&all.reg) = 10000;
 149
 150   PUT_CODE (&all.plus, PLUS);
 151   XEXP (&all.plus, 0) = &all.reg;
 152   XEXP (&all.plus, 1) = &all.reg;
 153
 154   PUT_CODE (&all.neg, NEG);
 155   XEXP (&all.neg, 0) = &all.reg;
 156
 157   PUT_CODE (&all.udiv, UDIV);
 158   XEXP (&all.udiv, 0) = &all.reg;
 159   XEXP (&all.udiv, 1) = &all.reg;
 160
 161   PUT_CODE (&all.mult, MULT);
 162   XEXP (&all.mult, 0) = &all.reg;
 163   XEXP (&all.mult, 1) = &all.reg;
 164
 165   PUT_CODE (&all.div, DIV);
 166   XEXP (&all.div, 0) = &all.reg;
 167   XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 168
 169   PUT_CODE (&all.mod, MOD);
 170   XEXP (&all.mod, 0) = &all.reg;
 171   XEXP (&all.mod, 1) = XEXP (&all.div, 1);
 172
 173   PUT_CODE (&all.zext, ZERO_EXTEND);
 174   XEXP (&all.zext, 0) = &all.reg;
 175
 176   PUT_CODE (&all.wide_mult, MULT);
 177   XEXP (&all.wide_mult, 0) = &all.zext;
 178   XEXP (&all.wide_mult, 1) = &all.zext;
 179
 180   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 181   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 182
 183   PUT_CODE (&all.wide_trunc, TRUNCATE);
 184   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 185
 186   PUT_CODE (&all.shift, ASHIFT);
 187   XEXP (&all.shift, 0) = &all.reg;
 188
 189   PUT_CODE (&all.shift_mult, MULT);
 190   XEXP (&all.shift_mult, 0) = &all.reg;
 191
 192   PUT_CODE (&all.shift_add, PLUS);
 193   XEXP (&all.shift_add, 0) = &all.shift_mult;
 194   XEXP (&all.shift_add, 1) = &all.reg;
 195
 196   PUT_CODE (&all.shift_sub, MINUS);
 197   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 198   XEXP (&all.shift_sub, 1) = &all.reg;
 199
 200   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 201        mode != VOIDmode;
 202        mode = GET_MODE_WIDER_MODE (mode))
 203     {
 204       PUT_MODE (&all.reg, mode);
 205       PUT_MODE (&all.plus, mode);
 206       PUT_MODE (&all.neg, mode);
 207       PUT_MODE (&all.udiv, mode);
 208       PUT_MODE (&all.mult, mode);
 209       PUT_MODE (&all.div, mode);
 210       PUT_MODE (&all.mod, mode);
 211       PUT_MODE (&all.wide_trunc, mode);
 212       PUT_MODE (&all.shift, mode);
 213       PUT_MODE (&all.shift_mult, mode);
 214       PUT_MODE (&all.shift_add, mode);
 215       PUT_MODE (&all.shift_sub, mode);
 216
 217       add_cost[mode] = rtx_cost (&all.plus, SET);
 218       neg_cost[mode] = rtx_cost (&all.neg, SET);
 219       div_cost[mode] = rtx_cost (&all.udiv, SET);
 220       mul_cost[mode] = rtx_cost (&all.mult, SET);
 221
 222       sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
 223       smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
 224
 225       wider_mode = GET_MODE_WIDER_MODE (mode);
 226       if (wider_mode != VOIDmode)
 227         {
 228           PUT_MODE (&all.zext, wider_mode);
 229           PUT_MODE (&all.wide_mult, wider_mode);
 230           PUT_MODE (&all.wide_lshr, wider_mode);
 231           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 232
 233           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 234           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 235         }
 236
 237       shift_cost[mode][0] = 0;
 238       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 239
 240       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 241       for (m = 1; m < n; m++)
 242         {
 243           XEXP (&all.shift, 1) = cint[m];
 244           XEXP (&all.shift_mult, 1) = pow2[m];
 245
 246           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 247           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 248           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 249         }
 250     }
 251 }
 252
 253 /* Return an rtx representing minus the value of X.
 254    MODE is the intended mode of the result,
 255    useful if X is a CONST_INT.  */
 256
 257 rtx
 258 negate_rtx (enum machine_mode mode, rtx x)
 259 {
 260   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 261
 262   if (result == 0)
 263     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 264
 265   return result;
 266 }
 267
 268 /* Report on the availability of insv/extv/extzv and the desired mode
 269    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 270    is false; else the mode of the specified operand.  If OPNO is -1,
 271    all the caller cares about is whether the insn is available.  */
 272 enum machine_mode
 273 mode_for_extraction (enum extraction_pattern pattern, int opno)
 274 {
 275   const struct insn_data *data;
 276
 277   switch (pattern)
 278     {
 279     case EP_insv:
 280       if (HAVE_insv)
 281         {
 282           data = &insn_data[CODE_FOR_insv];
 283           break;
 284         }
 285       return MAX_MACHINE_MODE;
 286
 287     case EP_extv:
 288       if (HAVE_extv)
 289         {
 290           data = &insn_data[CODE_FOR_extv];
 291           break;
 292         }
 293       return MAX_MACHINE_MODE;
 294
 295     case EP_extzv:
 296       if (HAVE_extzv)
 297         {
 298           data = &insn_data[CODE_FOR_extzv];
 299           break;
 300         }
 301       return MAX_MACHINE_MODE;
 302
 303     default:
 304       gcc_unreachable ();
 305     }
 306
 307   if (opno == -1)
 308     return VOIDmode;
 309
 310   /* Everyone who uses this function used to follow it with
 311      if (result == VOIDmode) result = word_mode; */
 312   if (data->operand[opno].mode == VOIDmode)
 313     return word_mode;
 314   return data->operand[opno].mode;
 315 }
 316
 317 \f
 318 /* Generate code to store value from rtx VALUE
 319    into a bit-field within structure STR_RTX
 320    containing BITSIZE bits starting at bit BITNUM.
 321    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 322    ALIGN is the alignment that STR_RTX is known to have.
 323    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 324
 325 /* ??? Note that there are two different ideas here for how
 326    to determine the size to count bits within, for a register.
 327    One is BITS_PER_WORD, and the other is the size of operand 3
 328    of the insv pattern.
 329
 330    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 331    else, we use the mode of operand 3.  */
 332
 333 rtx
 334 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 335                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 336                  rtx value)
 337 {
 338   unsigned int unit
 339     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 340   unsigned HOST_WIDE_INT offset = bitnum / unit;
 341   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 342   rtx op0 = str_rtx;
 343   int byte_offset;
 344   rtx orig_value;
 345
 346   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 347
 348   while (GET_CODE (op0) == SUBREG)
 349     {
 350       /* The following line once was done only if WORDS_BIG_ENDIAN,
 351          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 352          meaningful at a much higher level; when structures are copied
 353          between memory and regs, the higher-numbered regs
 354          always get higher addresses.  */
 355       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 356       /* We used to adjust BITPOS here, but now we do the whole adjustment
 357          right after the loop.  */
 358       op0 = SUBREG_REG (op0);
 359     }
 360
 361   /* Use vec_set patterns for inserting parts of vectors whenever
 362      available.  */
 363   if (VECTOR_MODE_P (GET_MODE (op0))
 364       && !MEM_P (op0)
 365       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 366           != CODE_FOR_nothing)
 367       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 368       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 369       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 370     {
 371       enum machine_mode outermode = GET_MODE (op0);
 372       enum machine_mode innermode = GET_MODE_INNER (outermode);
 373       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 374       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 375       rtx rtxpos = GEN_INT (pos);
 376       rtx src = value;
 377       rtx dest = op0;
 378       rtx pat, seq;
 379       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 380       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 381       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 382
 383       start_sequence ();
 384
 385       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 386         src = copy_to_mode_reg (mode1, src);
 387
 388       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 389         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 390
 391       /* We could handle this, but we should always be called with a pseudo
 392          for our targets and all insns should take them as outputs.  */
 393       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 394                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 395                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 396       pat = GEN_FCN (icode) (dest, src, rtxpos);
 397       seq = get_insns ();
 398       end_sequence ();
 399       if (pat)
 400         {
 401           emit_insn (seq);
 402           emit_insn (pat);
 403           return dest;
 404         }
 405     }
 406
 407   if (flag_force_mem)
 408     {
 409       int old_generating_concat_p = generating_concat_p;
 410       generating_concat_p = 0;
 411       value = force_not_mem (value);
 412       generating_concat_p = old_generating_concat_p;
 413     }
 414
 415   /* If the target is a register, overwriting the entire object, or storing
 416      a full-word or multi-word field can be done with just a SUBREG.
 417
 418      If the target is memory, storing any naturally aligned field can be
 419      done with a simple store.  For targets that support fast unaligned
 420      memory, any naturally sized, unit aligned field can be done directly.  */
 421
 422   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 423                 + (offset * UNITS_PER_WORD);
 424
 425   if (bitpos == 0
 426       && bitsize == GET_MODE_BITSIZE (fieldmode)
 427       && (!MEM_P (op0)
 428           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 429              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 430              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 431           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 432              || (offset * BITS_PER_UNIT % bitsize == 0
 433                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 434     {
 435       if (GET_MODE (op0) != fieldmode)
 436         {
 437           if (MEM_P (op0))
 438             op0 = adjust_address (op0, fieldmode, offset);
 439           else
 440             op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 441                                        byte_offset);
 442         }
 443       emit_move_insn (op0, value);
 444       return value;
 445     }
 446
 447   /* Make sure we are playing with integral modes.  Pun with subregs
 448      if we aren't.  This must come after the entire register case above,
 449      since that case is valid for any mode.  The following cases are only
 450      valid for integral modes.  */
 451   {
 452     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 453     if (imode != GET_MODE (op0))
 454       {
 455         if (MEM_P (op0))
 456           op0 = adjust_address (op0, imode, 0);
 457         else
 458           {
 459             gcc_assert (imode != BLKmode);
 460             op0 = gen_lowpart (imode, op0);
 461           }
 462       }
 463   }
 464
 465   /* We may be accessing data outside the field, which means
 466      we can alias adjacent data.  */
 467   if (MEM_P (op0))
 468     {
 469       op0 = shallow_copy_rtx (op0);
 470       set_mem_alias_set (op0, 0);
 471       set_mem_expr (op0, 0);
 472     }
 473
 474   /* If OP0 is a register, BITPOS must count within a word.
 475      But as we have it, it counts within whatever size OP0 now has.
 476      On a bigendian machine, these are not the same, so convert.  */
 477   if (BYTES_BIG_ENDIAN
 478       && !MEM_P (op0)
 479       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 480     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 481
 482   /* Storing an lsb-aligned field in a register
 483      can be done with a movestrict instruction.  */
 484
 485   if (!MEM_P (op0)
 486       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 487       && bitsize == GET_MODE_BITSIZE (fieldmode)
 488       && (movstrict_optab->handlers[fieldmode].insn_code
 489           != CODE_FOR_nothing))
 490     {
 491       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 492
 493       /* Get appropriate low part of the value being stored.  */
 494       if (GET_CODE (value) == CONST_INT || REG_P (value))
 495         value = gen_lowpart (fieldmode, value);
 496       else if (!(GET_CODE (value) == SYMBOL_REF
 497                  || GET_CODE (value) == LABEL_REF
 498                  || GET_CODE (value) == CONST))
 499         value = convert_to_mode (fieldmode, value, 0);
 500
 501       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 502         value = copy_to_mode_reg (fieldmode, value);
 503
 504       if (GET_CODE (op0) == SUBREG)
 505         {
 506           /* Else we've got some float mode source being extracted into
 507              a different float mode destination -- this combination of
 508              subregs results in Severe Tire Damage.  */
 509           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 510                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 511                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 512           op0 = SUBREG_REG (op0);
 513         }
 514
 515       emit_insn (GEN_FCN (icode)
 516                  (gen_rtx_SUBREG (fieldmode, op0,
 517                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 518                                   + (offset * UNITS_PER_WORD)),
 519                                   value));
 520
 521       return value;
 522     }
 523
 524   /* Handle fields bigger than a word.  */
 525
 526   if (bitsize > BITS_PER_WORD)
 527     {
 528       /* Here we transfer the words of the field
 529          in the order least significant first.
 530          This is because the most significant word is the one which may
 531          be less than full.
 532          However, only do that if the value is not BLKmode.  */
 533
 534       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 535       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 536       unsigned int i;
 537
 538       /* This is the mode we must force value to, so that there will be enough
 539          subwords to extract.  Note that fieldmode will often (always?) be
 540          VOIDmode, because that is what store_field uses to indicate that this
 541          is a bit field, but passing VOIDmode to operand_subword_force will
 542          result in an abort.  */
 543       fieldmode = GET_MODE (value);
 544       if (fieldmode == VOIDmode)
 545         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 546
 547       for (i = 0; i < nwords; i++)
 548         {
 549           /* If I is 0, use the low-order word in both field and target;
 550              if I is 1, use the next to lowest word; and so on.  */
 551           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 552           unsigned int bit_offset = (backwards
 553                                      ? MAX ((int) bitsize - ((int) i + 1)
 554                                             * BITS_PER_WORD,
 555                                             0)
 556                                      : (int) i * BITS_PER_WORD);
 557
 558           store_bit_field (op0, MIN (BITS_PER_WORD,
 559                                      bitsize - i * BITS_PER_WORD),
 560                            bitnum + bit_offset, word_mode,
 561                            operand_subword_force (value, wordnum, fieldmode));
 562         }
 563       return value;
 564     }
 565
 566   /* From here on we can assume that the field to be stored in is
 567      a full-word (whatever type that is), since it is shorter than a word.  */
 568
 569   /* OFFSET is the number of words or bytes (UNIT says which)
 570      from STR_RTX to the first word or byte containing part of the field.  */
 571
 572   if (!MEM_P (op0))
 573     {
 574       if (offset != 0
 575           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 576         {
 577           if (!REG_P (op0))
 578             {
 579               /* Since this is a destination (lvalue), we can't copy it to a
 580                  pseudo.  We can trivially remove a SUBREG that does not
 581                  change the size of the operand.  Such a SUBREG may have been
 582                  added above.  Otherwise, abort.  */
 583               gcc_assert (GET_CODE (op0) == SUBREG
 584                           && (GET_MODE_SIZE (GET_MODE (op0))
 585                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 586               op0 = SUBREG_REG (op0);
 587             }
 588           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 589                                 op0, (offset * UNITS_PER_WORD));
 590         }
 591       offset = 0;
 592     }
 593
 594   /* If VALUE has a floating-point or complex mode, access it as an
 595      integer of the corresponding size.  This can occur on a machine
 596      with 64 bit registers that uses SFmode for float.  It can also
 597      occur for unaligned float or complex fields.  */
 598   orig_value = value;
 599   if (GET_MODE (value) != VOIDmode
 600       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 601       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 602     {
 603       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 604       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 605     }
 606
 607   /* Now OFFSET is nonzero only if OP0 is memory
 608      and is therefore always measured in bytes.  */
 609
 610   if (HAVE_insv
 611       && GET_MODE (value) != BLKmode
 612       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 613       /* Ensure insv's size is wide enough for this field.  */
 614       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 615       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 616             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 617     {
 618       int xbitpos = bitpos;
 619       rtx value1;
 620       rtx xop0 = op0;
 621       rtx last = get_last_insn ();
 622       rtx pat;
 623       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 624       int save_volatile_ok = volatile_ok;
 625
 626       volatile_ok = 1;
 627
 628       /* If this machine's insv can only insert into a register, copy OP0
 629          into a register and save it back later.  */
 630       /* This used to check flag_force_mem, but that was a serious
 631          de-optimization now that flag_force_mem is enabled by -O2.  */
 632       if (MEM_P (op0)
 633           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 634                 (op0, VOIDmode)))
 635         {
 636           rtx tempreg;
 637           enum machine_mode bestmode;
 638
 639           /* Get the mode to use for inserting into this field.  If OP0 is
 640              BLKmode, get the smallest mode consistent with the alignment. If
 641              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 642              mode. Otherwise, use the smallest mode containing the field.  */
 643
 644           if (GET_MODE (op0) == BLKmode
 645               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 646             bestmode
 647               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 648                                MEM_VOLATILE_P (op0));
 649           else
 650             bestmode = GET_MODE (op0);
 651
 652           if (bestmode == VOIDmode
 653               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 654                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 655             goto insv_loses;
 656
 657           /* Adjust address to point to the containing unit of that mode.
 658              Compute offset as multiple of this unit, counting in bytes.  */
 659           unit = GET_MODE_BITSIZE (bestmode);
 660           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 661           bitpos = bitnum % unit;
 662           op0 = adjust_address (op0, bestmode,  offset);
 663
 664           /* Fetch that unit, store the bitfield in it, then store
 665              the unit.  */
 666           tempreg = copy_to_reg (op0);
 667           store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
 668           emit_move_insn (op0, tempreg);
 669           return value;
 670         }
 671       volatile_ok = save_volatile_ok;
 672
 673       /* Add OFFSET into OP0's address.  */
 674       if (MEM_P (xop0))
 675         xop0 = adjust_address (xop0, byte_mode, offset);
 676
 677       /* If xop0 is a register, we need it in MAXMODE
 678          to make it acceptable to the format of insv.  */
 679       if (GET_CODE (xop0) == SUBREG)
 680         /* We can't just change the mode, because this might clobber op0,
 681            and we will need the original value of op0 if insv fails.  */
 682         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 683       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 684         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 685
 686       /* On big-endian machines, we count bits from the most significant.
 687          If the bit field insn does not, we must invert.  */
 688
 689       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 690         xbitpos = unit - bitsize - xbitpos;
 691
 692       /* We have been counting XBITPOS within UNIT.
 693          Count instead within the size of the register.  */
 694       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 695         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 696
 697       unit = GET_MODE_BITSIZE (maxmode);
 698
 699       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 700       value1 = value;
 701       if (GET_MODE (value) != maxmode)
 702         {
 703           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 704             {
 705               /* Optimization: Don't bother really extending VALUE
 706                  if it has all the bits we will actually use.  However,
 707                  if we must narrow it, be sure we do it correctly.  */
 708
 709               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 710                 {
 711                   rtx tmp;
 712
 713                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 714                   if (! tmp)
 715                     tmp = simplify_gen_subreg (maxmode,
 716                                                force_reg (GET_MODE (value),
 717                                                           value1),
 718                                                GET_MODE (value), 0);
 719                   value1 = tmp;
 720                 }
 721               else
 722                 value1 = gen_lowpart (maxmode, value1);
 723             }
 724           else if (GET_CODE (value) == CONST_INT)
 725             value1 = gen_int_mode (INTVAL (value), maxmode);
 726           else
 727             /* Parse phase is supposed to make VALUE's data type
 728                match that of the component reference, which is a type
 729                at least as wide as the field; so VALUE should have
 730                a mode that corresponds to that type.  */
 731             gcc_assert (CONSTANT_P (value));
 732         }
 733
 734       /* If this machine's insv insists on a register,
 735          get VALUE1 into a register.  */
 736       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 737              (value1, maxmode)))
 738         value1 = force_reg (maxmode, value1);
 739
 740       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 741       if (pat)
 742         emit_insn (pat);
 743       else
 744         {
 745           delete_insns_since (last);
 746           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 747         }
 748     }
 749   else
 750     insv_loses:
 751     /* Insv is not available; store using shifts and boolean ops.  */
 752     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 753   return value;
 754 }
 755 \f
 756 /* Use shifts and boolean operations to store VALUE
 757    into a bit field of width BITSIZE
 758    in a memory location specified by OP0 except offset by OFFSET bytes.
 759      (OFFSET must be 0 if OP0 is a register.)
 760    The field starts at position BITPOS within the byte.
 761     (If OP0 is a register, it may be a full word or a narrower mode,
 762      but BITPOS still counts within a full word,
 763      which is significant on bigendian machines.)  */
 764
 765 static void
 766 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 767                        unsigned HOST_WIDE_INT bitsize,
 768                        unsigned HOST_WIDE_INT bitpos, rtx value)
 769 {
 770   enum machine_mode mode;
 771   unsigned int total_bits = BITS_PER_WORD;
 772   rtx subtarget, temp;
 773   int all_zero = 0;
 774   int all_one = 0;
 775
 776   /* There is a case not handled here:
 777      a structure with a known alignment of just a halfword
 778      and a field split across two aligned halfwords within the structure.
 779      Or likewise a structure with a known alignment of just a byte
 780      and a field split across two bytes.
 781      Such cases are not supposed to be able to occur.  */
 782
 783   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 784     {
 785       gcc_assert (!offset);
 786       /* Special treatment for a bit field split across two registers.  */
 787       if (bitsize + bitpos > BITS_PER_WORD)
 788         {
 789           store_split_bit_field (op0, bitsize, bitpos, value);
 790           return;
 791         }
 792     }
 793   else
 794     {
 795       /* Get the proper mode to use for this field.  We want a mode that
 796          includes the entire field.  If such a mode would be larger than
 797          a word, we won't be doing the extraction the normal way.
 798          We don't want a mode bigger than the destination.  */
 799
 800       mode = GET_MODE (op0);
 801       if (GET_MODE_BITSIZE (mode) == 0
 802           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 803         mode = word_mode;
 804       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 805                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 806
 807       if (mode == VOIDmode)
 808         {
 809           /* The only way this should occur is if the field spans word
 810              boundaries.  */
 811           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 812                                  value);
 813           return;
 814         }
 815
 816       total_bits = GET_MODE_BITSIZE (mode);
 817
 818       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 819          be in the range 0 to total_bits-1, and put any excess bytes in
 820          OFFSET.  */
 821       if (bitpos >= total_bits)
 822         {
 823           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 824           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 825                      * BITS_PER_UNIT);
 826         }
 827
 828       /* Get ref to an aligned byte, halfword, or word containing the field.
 829          Adjust BITPOS to be position within a word,
 830          and OFFSET to be the offset of that word.
 831          Then alter OP0 to refer to that word.  */
 832       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 833       offset -= (offset % (total_bits / BITS_PER_UNIT));
 834       op0 = adjust_address (op0, mode, offset);
 835     }
 836
 837   mode = GET_MODE (op0);
 838
 839   /* Now MODE is either some integral mode for a MEM as OP0,
 840      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 841      The bit field is contained entirely within OP0.
 842      BITPOS is the starting bit number within OP0.
 843      (OP0's mode may actually be narrower than MODE.)  */
 844
 845   if (BYTES_BIG_ENDIAN)
 846       /* BITPOS is the distance between our msb
 847          and that of the containing datum.
 848          Convert it to the distance from the lsb.  */
 849       bitpos = total_bits - bitsize - bitpos;
 850
 851   /* Now BITPOS is always the distance between our lsb
 852      and that of OP0.  */
 853
 854   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 855      we must first convert its mode to MODE.  */
 856
 857   if (GET_CODE (value) == CONST_INT)
 858     {
 859       HOST_WIDE_INT v = INTVAL (value);
 860
 861       if (bitsize < HOST_BITS_PER_WIDE_INT)
 862         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 863
 864       if (v == 0)
 865         all_zero = 1;
 866       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 867                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 868                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 869         all_one = 1;
 870
 871       value = lshift_value (mode, value, bitpos, bitsize);
 872     }
 873   else
 874     {
 875       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 876                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 877
 878       if (GET_MODE (value) != mode)
 879         {
 880           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 881               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 882             value = gen_lowpart (mode, value);
 883           else
 884             value = convert_to_mode (mode, value, 1);
 885         }
 886
 887       if (must_and)
 888         value = expand_binop (mode, and_optab, value,
 889                               mask_rtx (mode, 0, bitsize, 0),
 890                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 891       if (bitpos > 0)
 892         value = expand_shift (LSHIFT_EXPR, mode, value,
 893                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 894     }
 895
 896   /* Now clear the chosen bits in OP0,
 897      except that if VALUE is -1 we need not bother.  */
 898
 899   subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0;
 900
 901   if (! all_one)
 902     {
 903       temp = expand_binop (mode, and_optab, op0,
 904                            mask_rtx (mode, bitpos, bitsize, 1),
 905                            subtarget, 1, OPTAB_LIB_WIDEN);
 906       subtarget = temp;
 907     }
 908   else
 909     temp = op0;
 910
 911   /* Now logical-or VALUE into OP0, unless it is zero.  */
 912
 913   if (! all_zero)
 914     temp = expand_binop (mode, ior_optab, temp, value,
 915                          subtarget, 1, OPTAB_LIB_WIDEN);
 916   if (op0 != temp)
 917     emit_move_insn (op0, temp);
 918 }
 919 \f
 920 /* Store a bit field that is split across multiple accessible memory objects.
 921
 922    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 923    BITSIZE is the field width; BITPOS the position of its first bit
 924    (within the word).
 925    VALUE is the value to store.
 926
 927    This does not yet handle fields wider than BITS_PER_WORD.  */
 928
 929 static void
 930 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 931                        unsigned HOST_WIDE_INT bitpos, rtx value)
 932 {
 933   unsigned int unit;
 934   unsigned int bitsdone = 0;
 935
 936   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 937      much at a time.  */
 938   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 939     unit = BITS_PER_WORD;
 940   else
 941     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 942
 943   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 944      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 945      that VALUE might be a floating-point constant.  */
 946   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 947     {
 948       rtx word = gen_lowpart_common (word_mode, value);
 949
 950       if (word && (value != word))
 951         value = word;
 952       else
 953         value = gen_lowpart_common (word_mode,
 954                                     force_reg (GET_MODE (value) != VOIDmode
 955                                                ? GET_MODE (value)
 956                                                : word_mode, value));
 957     }
 958
 959   while (bitsdone < bitsize)
 960     {
 961       unsigned HOST_WIDE_INT thissize;
 962       rtx part, word;
 963       unsigned HOST_WIDE_INT thispos;
 964       unsigned HOST_WIDE_INT offset;
 965
 966       offset = (bitpos + bitsdone) / unit;
 967       thispos = (bitpos + bitsdone) % unit;
 968
 969       /* THISSIZE must not overrun a word boundary.  Otherwise,
 970          store_fixed_bit_field will call us again, and we will mutually
 971          recurse forever.  */
 972       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 973       thissize = MIN (thissize, unit - thispos);
 974
 975       if (BYTES_BIG_ENDIAN)
 976         {
 977           int total_bits;
 978
 979           /* We must do an endian conversion exactly the same way as it is
 980              done in extract_bit_field, so that the two calls to
 981              extract_fixed_bit_field will have comparable arguments.  */
 982           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
 983             total_bits = BITS_PER_WORD;
 984           else
 985             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 986
 987           /* Fetch successively less significant portions.  */
 988           if (GET_CODE (value) == CONST_INT)
 989             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 990                              >> (bitsize - bitsdone - thissize))
 991                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 992           else
 993             /* The args are chosen so that the last part includes the
 994                lsb.  Give extract_bit_field the value it needs (with
 995                endianness compensation) to fetch the piece we want.  */
 996             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 997                                             total_bits - bitsize + bitsdone,
 998                                             NULL_RTX, 1);
 999         }
1000       else
1001         {
1002           /* Fetch successively more significant portions.  */
1003           if (GET_CODE (value) == CONST_INT)
1004             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1005                              >> bitsdone)
1006                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1007           else
1008             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1009                                             bitsdone, NULL_RTX, 1);
1010         }
1011
1012       /* If OP0 is a register, then handle OFFSET here.
1013
1014          When handling multiword bitfields, extract_bit_field may pass
1015          down a word_mode SUBREG of a larger REG for a bitfield that actually
1016          crosses a word boundary.  Thus, for a SUBREG, we must find
1017          the current word starting from the base register.  */
1018       if (GET_CODE (op0) == SUBREG)
1019         {
1020           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1021           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1022                                         GET_MODE (SUBREG_REG (op0)));
1023           offset = 0;
1024         }
1025       else if (REG_P (op0))
1026         {
1027           word = operand_subword_force (op0, offset, GET_MODE (op0));
1028           offset = 0;
1029         }
1030       else
1031         word = op0;
1032
1033       /* OFFSET is in UNITs, and UNIT is in bits.
1034          store_fixed_bit_field wants offset in bytes.  */
1035       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1036                              thispos, part);
1037       bitsdone += thissize;
1038     }
1039 }
1040 \f
1041 /* Generate code to extract a byte-field from STR_RTX
1042    containing BITSIZE bits, starting at BITNUM,
1043    and put it in TARGET if possible (if TARGET is nonzero).
1044    Regardless of TARGET, we return the rtx for where the value is placed.
1045
1046    STR_RTX is the structure containing the byte (a REG or MEM).
1047    UNSIGNEDP is nonzero if this is an unsigned bit field.
1048    MODE is the natural mode of the field value once extracted.
1049    TMODE is the mode the caller would like the value to have;
1050    but the value may be returned with type MODE instead.
1051
1052    TOTAL_SIZE is the size in bytes of the containing structure,
1053    or -1 if varying.
1054
1055    If a TARGET is specified and we can store in it at no extra cost,
1056    we do so, and return TARGET.
1057    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1058    if they are equally easy.  */
1059
1060 rtx
1061 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1062                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1063                    enum machine_mode mode, enum machine_mode tmode)
1064 {
1065   unsigned int unit
1066     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1067   unsigned HOST_WIDE_INT offset = bitnum / unit;
1068   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1069   rtx op0 = str_rtx;
1070   rtx spec_target = target;
1071   rtx spec_target_subreg = 0;
1072   enum machine_mode int_mode;
1073   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1074   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1075   enum machine_mode mode1;
1076   int byte_offset;
1077
1078   if (tmode == VOIDmode)
1079     tmode = mode;
1080
1081   while (GET_CODE (op0) == SUBREG)
1082     {
1083       bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1084       if (bitpos > unit)
1085         {
1086           offset += (bitpos / unit);
1087           bitpos %= unit;
1088         }
1089       op0 = SUBREG_REG (op0);
1090     }
1091
1092   if (REG_P (op0)
1093       && mode == GET_MODE (op0)
1094       && bitnum == 0
1095       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1096     {
1097       /* We're trying to extract a full register from itself.  */
1098       return op0;
1099     }
1100
1101   /* Use vec_extract patterns for extracting parts of vectors whenever
1102      available.  */
1103   if (VECTOR_MODE_P (GET_MODE (op0))
1104       && !MEM_P (op0)
1105       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1106           != CODE_FOR_nothing)
1107       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1108           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1109     {
1110       enum machine_mode outermode = GET_MODE (op0);
1111       enum machine_mode innermode = GET_MODE_INNER (outermode);
1112       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1113       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1114       rtx rtxpos = GEN_INT (pos);
1115       rtx src = op0;
1116       rtx dest = NULL, pat, seq;
1117       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1118       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1119       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1120
1121       if (innermode == tmode || innermode == mode)
1122         dest = target;
1123
1124       if (!dest)
1125         dest = gen_reg_rtx (innermode);
1126
1127       start_sequence ();
1128
1129       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1130         dest = copy_to_mode_reg (mode0, dest);
1131
1132       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1133         src = copy_to_mode_reg (mode1, src);
1134
1135       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1136         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1137
1138       /* We could handle this, but we should always be called with a pseudo
1139          for our targets and all insns should take them as outputs.  */
1140       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1141                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1142                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1143
1144       pat = GEN_FCN (icode) (dest, src, rtxpos);
1145       seq = get_insns ();
1146       end_sequence ();
1147       if (pat)
1148         {
1149           emit_insn (seq);
1150           emit_insn (pat);
1151           return dest;
1152         }
1153     }
1154
1155   /* Make sure we are playing with integral modes.  Pun with subregs
1156      if we aren't.  */
1157   {
1158     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1159     if (imode != GET_MODE (op0))
1160       {
1161         op0 = gen_lowpart (imode, op0);
1162
1163         /* If we got a SUBREG, force it into a register since we aren't going
1164            to be able to do another SUBREG on it.  */
1165         if (GET_CODE (op0) == SUBREG)
1166           op0 = force_reg (imode, op0);
1167       }
1168   }
1169
1170   /* We may be accessing data outside the field, which means
1171      we can alias adjacent data.  */
1172   if (MEM_P (op0))
1173     {
1174       op0 = shallow_copy_rtx (op0);
1175       set_mem_alias_set (op0, 0);
1176       set_mem_expr (op0, 0);
1177     }
1178
1179   /* Extraction of a full-word or multi-word value from a structure
1180      in a register or aligned memory can be done with just a SUBREG.
1181      A subword value in the least significant part of a register
1182      can also be extracted with a SUBREG.  For this, we need the
1183      byte offset of the value in op0.  */
1184
1185   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1186
1187   /* If OP0 is a register, BITPOS must count within a word.
1188      But as we have it, it counts within whatever size OP0 now has.
1189      On a bigendian machine, these are not the same, so convert.  */
1190   if (BYTES_BIG_ENDIAN
1191       && !MEM_P (op0)
1192       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1193     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1194
1195   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1196      If that's wrong, the solution is to test for it and set TARGET to 0
1197      if needed.  */
1198
1199   /* Only scalar integer modes can be converted via subregs.  There is an
1200      additional problem for FP modes here in that they can have a precision
1201      which is different from the size.  mode_for_size uses precision, but
1202      we want a mode based on the size, so we must avoid calling it for FP
1203      modes.  */
1204   mode1  = (SCALAR_INT_MODE_P (tmode)
1205             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1206             : mode);
1207
1208   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1209         && bitpos % BITS_PER_WORD == 0)
1210        || (mode1 != BLKmode
1211            /* ??? The big endian test here is wrong.  This is correct
1212               if the value is in a register, and if mode_for_size is not
1213               the same mode as op0.  This causes us to get unnecessarily
1214               inefficient code from the Thumb port when -mbig-endian.  */
1215            && (BYTES_BIG_ENDIAN
1216                ? bitpos + bitsize == BITS_PER_WORD
1217                : bitpos == 0)))
1218       && ((!MEM_P (op0)
1219            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1220                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1221            && GET_MODE_SIZE (mode1) != 0
1222            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1223           || (MEM_P (op0)
1224               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1225                   || (offset * BITS_PER_UNIT % bitsize == 0
1226                       && MEM_ALIGN (op0) % bitsize == 0)))))
1227     {
1228       if (mode1 != GET_MODE (op0))
1229         {
1230           if (MEM_P (op0))
1231             op0 = adjust_address (op0, mode1, offset);
1232           else
1233             {
1234               rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1235                                              byte_offset);
1236               if (sub == NULL)
1237                 goto no_subreg_mode_swap;
1238               op0 = sub;
1239             }
1240         }
1241       if (mode1 != mode)
1242         return convert_to_mode (tmode, op0, unsignedp);
1243       return op0;
1244     }
1245  no_subreg_mode_swap:
1246
1247   /* Handle fields bigger than a word.  */
1248
1249   if (bitsize > BITS_PER_WORD)
1250     {
1251       /* Here we transfer the words of the field
1252          in the order least significant first.
1253          This is because the most significant word is the one which may
1254          be less than full.  */
1255
1256       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1257       unsigned int i;
1258
1259       if (target == 0 || !REG_P (target))
1260         target = gen_reg_rtx (mode);
1261
1262       /* Indicate for flow that the entire target reg is being set.  */
1263       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1264
1265       for (i = 0; i < nwords; i++)
1266         {
1267           /* If I is 0, use the low-order word in both field and target;
1268              if I is 1, use the next to lowest word; and so on.  */
1269           /* Word number in TARGET to use.  */
1270           unsigned int wordnum
1271             = (WORDS_BIG_ENDIAN
1272                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1273                : i);
1274           /* Offset from start of field in OP0.  */
1275           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1276                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1277                                                 * (int) BITS_PER_WORD))
1278                                      : (int) i * BITS_PER_WORD);
1279           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1280           rtx result_part
1281             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1282                                            bitsize - i * BITS_PER_WORD),
1283                                  bitnum + bit_offset, 1, target_part, mode,
1284                                  word_mode);
1285
1286           gcc_assert (target_part);
1287
1288           if (result_part != target_part)
1289             emit_move_insn (target_part, result_part);
1290         }
1291
1292       if (unsignedp)
1293         {
1294           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1295              need to be zero'd out.  */
1296           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1297             {
1298               unsigned int i, total_words;
1299
1300               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1301               for (i = nwords; i < total_words; i++)
1302                 emit_move_insn
1303                   (operand_subword (target,
1304                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1305                                     1, VOIDmode),
1306                    const0_rtx);
1307             }
1308           return target;
1309         }
1310
1311       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1312       target = expand_shift (LSHIFT_EXPR, mode, target,
1313                              build_int_cst (NULL_TREE,
1314                                             GET_MODE_BITSIZE (mode) - bitsize),
1315                              NULL_RTX, 0);
1316       return expand_shift (RSHIFT_EXPR, mode, target,
1317                            build_int_cst (NULL_TREE,
1318                                           GET_MODE_BITSIZE (mode) - bitsize),
1319                            NULL_RTX, 0);
1320     }
1321
1322   /* From here on we know the desired field is smaller than a word.  */
1323
1324   /* Check if there is a correspondingly-sized integer field, so we can
1325      safely extract it as one size of integer, if necessary; then
1326      truncate or extend to the size that is wanted; then use SUBREGs or
1327      convert_to_mode to get one of the modes we really wanted.  */
1328
1329   int_mode = int_mode_for_mode (tmode);
1330   if (int_mode == BLKmode)
1331     int_mode = int_mode_for_mode (mode);
1332   /* Should probably push op0 out to memory and then do a load.  */
1333   gcc_assert (int_mode != BLKmode);
1334
1335   /* OFFSET is the number of words or bytes (UNIT says which)
1336      from STR_RTX to the first word or byte containing part of the field.  */
1337   if (!MEM_P (op0))
1338     {
1339       if (offset != 0
1340           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1341         {
1342           if (!REG_P (op0))
1343             op0 = copy_to_reg (op0);
1344           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1345                                 op0, (offset * UNITS_PER_WORD));
1346         }
1347       offset = 0;
1348     }
1349
1350   /* Now OFFSET is nonzero only for memory operands.  */
1351
1352   if (unsignedp)
1353     {
1354       if (HAVE_extzv
1355           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1356           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1357                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1358         {
1359           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1360           rtx bitsize_rtx, bitpos_rtx;
1361           rtx last = get_last_insn ();
1362           rtx xop0 = op0;
1363           rtx xtarget = target;
1364           rtx xspec_target = spec_target;
1365           rtx xspec_target_subreg = spec_target_subreg;
1366           rtx pat;
1367           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1368
1369           if (MEM_P (xop0))
1370             {
1371               int save_volatile_ok = volatile_ok;
1372               volatile_ok = 1;
1373
1374               /* Is the memory operand acceptable?  */
1375               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1376                      (xop0, GET_MODE (xop0))))
1377                 {
1378                   /* No, load into a reg and extract from there.  */
1379                   enum machine_mode bestmode;
1380
1381                   /* Get the mode to use for inserting into this field.  If
1382                      OP0 is BLKmode, get the smallest mode consistent with the
1383                      alignment. If OP0 is a non-BLKmode object that is no
1384                      wider than MAXMODE, use its mode. Otherwise, use the
1385                      smallest mode containing the field.  */
1386
1387                   if (GET_MODE (xop0) == BLKmode
1388                       || (GET_MODE_SIZE (GET_MODE (op0))
1389                           > GET_MODE_SIZE (maxmode)))
1390                     bestmode = get_best_mode (bitsize, bitnum,
1391                                               MEM_ALIGN (xop0), maxmode,
1392                                               MEM_VOLATILE_P (xop0));
1393                   else
1394                     bestmode = GET_MODE (xop0);
1395
1396                   if (bestmode == VOIDmode
1397                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1398                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1399                     goto extzv_loses;
1400
1401                   /* Compute offset as multiple of this unit,
1402                      counting in bytes.  */
1403                   unit = GET_MODE_BITSIZE (bestmode);
1404                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1405                   xbitpos = bitnum % unit;
1406                   xop0 = adjust_address (xop0, bestmode, xoffset);
1407
1408                   /* Fetch it to a register in that size.  */
1409                   xop0 = force_reg (bestmode, xop0);
1410
1411                   /* XBITPOS counts within UNIT, which is what is expected.  */
1412                 }
1413               else
1414                 /* Get ref to first byte containing part of the field.  */
1415                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1416
1417               volatile_ok = save_volatile_ok;
1418             }
1419
1420           /* If op0 is a register, we need it in MAXMODE (which is usually
1421              SImode). to make it acceptable to the format of extzv.  */
1422           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1423             goto extzv_loses;
1424           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1425             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1426
1427           /* On big-endian machines, we count bits from the most significant.
1428              If the bit field insn does not, we must invert.  */
1429           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1430             xbitpos = unit - bitsize - xbitpos;
1431
1432           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1433           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1434             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1435
1436           unit = GET_MODE_BITSIZE (maxmode);
1437
1438           if (xtarget == 0
1439               || (flag_force_mem && MEM_P (xtarget)))
1440             xtarget = xspec_target = gen_reg_rtx (tmode);
1441
1442           if (GET_MODE (xtarget) != maxmode)
1443             {
1444               if (REG_P (xtarget))
1445                 {
1446                   int wider = (GET_MODE_SIZE (maxmode)
1447                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1448                   xtarget = gen_lowpart (maxmode, xtarget);
1449                   if (wider)
1450                     xspec_target_subreg = xtarget;
1451                 }
1452               else
1453                 xtarget = gen_reg_rtx (maxmode);
1454             }
1455
1456           /* If this machine's extzv insists on a register target,
1457              make sure we have one.  */
1458           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1459                  (xtarget, maxmode)))
1460             xtarget = gen_reg_rtx (maxmode);
1461
1462           bitsize_rtx = GEN_INT (bitsize);
1463           bitpos_rtx = GEN_INT (xbitpos);
1464
1465           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1466           if (pat)
1467             {
1468               emit_insn (pat);
1469               target = xtarget;
1470               spec_target = xspec_target;
1471               spec_target_subreg = xspec_target_subreg;
1472             }
1473           else
1474             {
1475               delete_insns_since (last);
1476               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1477                                                 bitpos, target, 1);
1478             }
1479         }
1480       else
1481       extzv_loses:
1482         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1483                                           bitpos, target, 1);
1484     }
1485   else
1486     {
1487       if (HAVE_extv
1488           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1489           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1490                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1491         {
1492           int xbitpos = bitpos, xoffset = offset;
1493           rtx bitsize_rtx, bitpos_rtx;
1494           rtx last = get_last_insn ();
1495           rtx xop0 = op0, xtarget = target;
1496           rtx xspec_target = spec_target;
1497           rtx xspec_target_subreg = spec_target_subreg;
1498           rtx pat;
1499           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1500
1501           if (MEM_P (xop0))
1502             {
1503               /* Is the memory operand acceptable?  */
1504               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1505                      (xop0, GET_MODE (xop0))))
1506                 {
1507                   /* No, load into a reg and extract from there.  */
1508                   enum machine_mode bestmode;
1509
1510                   /* Get the mode to use for inserting into this field.  If
1511                      OP0 is BLKmode, get the smallest mode consistent with the
1512                      alignment. If OP0 is a non-BLKmode object that is no
1513                      wider than MAXMODE, use its mode. Otherwise, use the
1514                      smallest mode containing the field.  */
1515
1516                   if (GET_MODE (xop0) == BLKmode
1517                       || (GET_MODE_SIZE (GET_MODE (op0))
1518                           > GET_MODE_SIZE (maxmode)))
1519                     bestmode = get_best_mode (bitsize, bitnum,
1520                                               MEM_ALIGN (xop0), maxmode,
1521                                               MEM_VOLATILE_P (xop0));
1522                   else
1523                     bestmode = GET_MODE (xop0);
1524
1525                   if (bestmode == VOIDmode
1526                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1527                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1528                     goto extv_loses;
1529
1530                   /* Compute offset as multiple of this unit,
1531                      counting in bytes.  */
1532                   unit = GET_MODE_BITSIZE (bestmode);
1533                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1534                   xbitpos = bitnum % unit;
1535                   xop0 = adjust_address (xop0, bestmode, xoffset);
1536
1537                   /* Fetch it to a register in that size.  */
1538                   xop0 = force_reg (bestmode, xop0);
1539
1540                   /* XBITPOS counts within UNIT, which is what is expected.  */
1541                 }
1542               else
1543                 /* Get ref to first byte containing part of the field.  */
1544                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1545             }
1546
1547           /* If op0 is a register, we need it in MAXMODE (which is usually
1548              SImode) to make it acceptable to the format of extv.  */
1549           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1550             goto extv_loses;
1551           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1552             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1553
1554           /* On big-endian machines, we count bits from the most significant.
1555              If the bit field insn does not, we must invert.  */
1556           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1557             xbitpos = unit - bitsize - xbitpos;
1558
1559           /* XBITPOS counts within a size of UNIT.
1560              Adjust to count within a size of MAXMODE.  */
1561           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1562             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1563
1564           unit = GET_MODE_BITSIZE (maxmode);
1565
1566           if (xtarget == 0
1567               || (flag_force_mem && MEM_P (xtarget)))
1568             xtarget = xspec_target = gen_reg_rtx (tmode);
1569
1570           if (GET_MODE (xtarget) != maxmode)
1571             {
1572               if (REG_P (xtarget))
1573                 {
1574                   int wider = (GET_MODE_SIZE (maxmode)
1575                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1576                   xtarget = gen_lowpart (maxmode, xtarget);
1577                   if (wider)
1578                     xspec_target_subreg = xtarget;
1579                 }
1580               else
1581                 xtarget = gen_reg_rtx (maxmode);
1582             }
1583
1584           /* If this machine's extv insists on a register target,
1585              make sure we have one.  */
1586           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1587                  (xtarget, maxmode)))
1588             xtarget = gen_reg_rtx (maxmode);
1589
1590           bitsize_rtx = GEN_INT (bitsize);
1591           bitpos_rtx = GEN_INT (xbitpos);
1592
1593           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1594           if (pat)
1595             {
1596               emit_insn (pat);
1597               target = xtarget;
1598               spec_target = xspec_target;
1599               spec_target_subreg = xspec_target_subreg;
1600             }
1601           else
1602             {
1603               delete_insns_since (last);
1604               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1605                                                 bitpos, target, 0);
1606             }
1607         }
1608       else
1609       extv_loses:
1610         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1611                                           bitpos, target, 0);
1612     }
1613   if (target == spec_target)
1614     return target;
1615   if (target == spec_target_subreg)
1616     return spec_target;
1617   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1618     {
1619       /* If the target mode is not a scalar integral, first convert to the
1620          integer mode of that size and then access it as a floating-point
1621          value via a SUBREG.  */
1622       if (!SCALAR_INT_MODE_P (tmode))
1623         {
1624           enum machine_mode smode
1625             = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1626           target = convert_to_mode (smode, target, unsignedp);
1627           target = force_reg (smode, target);
1628           return gen_lowpart (tmode, target);
1629         }
1630
1631       return convert_to_mode (tmode, target, unsignedp);
1632     }
1633   return target;
1634 }
1635 \f
1636 /* Extract a bit field using shifts and boolean operations
1637    Returns an rtx to represent the value.
1638    OP0 addresses a register (word) or memory (byte).
1639    BITPOS says which bit within the word or byte the bit field starts in.
1640    OFFSET says how many bytes farther the bit field starts;
1641     it is 0 if OP0 is a register.
1642    BITSIZE says how many bits long the bit field is.
1643     (If OP0 is a register, it may be narrower than a full word,
1644      but BITPOS still counts within a full word,
1645      which is significant on bigendian machines.)
1646
1647    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1648    If TARGET is nonzero, attempts to store the value there
1649    and return TARGET, but this is not guaranteed.
1650    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1651
1652 static rtx
1653 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1654                          unsigned HOST_WIDE_INT offset,
1655                          unsigned HOST_WIDE_INT bitsize,
1656                          unsigned HOST_WIDE_INT bitpos, rtx target,
1657                          int unsignedp)
1658 {
1659   unsigned int total_bits = BITS_PER_WORD;
1660   enum machine_mode mode;
1661
1662   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1663     {
1664       /* Special treatment for a bit field split across two registers.  */
1665       if (bitsize + bitpos > BITS_PER_WORD)
1666         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1667     }
1668   else
1669     {
1670       /* Get the proper mode to use for this field.  We want a mode that
1671          includes the entire field.  If such a mode would be larger than
1672          a word, we won't be doing the extraction the normal way.  */
1673
1674       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1675                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1676
1677       if (mode == VOIDmode)
1678         /* The only way this should occur is if the field spans word
1679            boundaries.  */
1680         return extract_split_bit_field (op0, bitsize,
1681                                         bitpos + offset * BITS_PER_UNIT,
1682                                         unsignedp);
1683
1684       total_bits = GET_MODE_BITSIZE (mode);
1685
1686       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1687          be in the range 0 to total_bits-1, and put any excess bytes in
1688          OFFSET.  */
1689       if (bitpos >= total_bits)
1690         {
1691           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1692           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1693                      * BITS_PER_UNIT);
1694         }
1695
1696       /* Get ref to an aligned byte, halfword, or word containing the field.
1697          Adjust BITPOS to be position within a word,
1698          and OFFSET to be the offset of that word.
1699          Then alter OP0 to refer to that word.  */
1700       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1701       offset -= (offset % (total_bits / BITS_PER_UNIT));
1702       op0 = adjust_address (op0, mode, offset);
1703     }
1704
1705   mode = GET_MODE (op0);
1706
1707   if (BYTES_BIG_ENDIAN)
1708     /* BITPOS is the distance between our msb and that of OP0.
1709        Convert it to the distance from the lsb.  */
1710     bitpos = total_bits - bitsize - bitpos;
1711
1712   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1713      We have reduced the big-endian case to the little-endian case.  */
1714
1715   if (unsignedp)
1716     {
1717       if (bitpos)
1718         {
1719           /* If the field does not already start at the lsb,
1720              shift it so it does.  */
1721           tree amount = build_int_cst (NULL_TREE, bitpos);
1722           /* Maybe propagate the target for the shift.  */
1723           /* But not if we will return it--could confuse integrate.c.  */
1724           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1725           if (tmode != mode) subtarget = 0;
1726           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1727         }
1728       /* Convert the value to the desired mode.  */
1729       if (mode != tmode)
1730         op0 = convert_to_mode (tmode, op0, 1);
1731
1732       /* Unless the msb of the field used to be the msb when we shifted,
1733          mask out the upper bits.  */
1734
1735       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1736         return expand_binop (GET_MODE (op0), and_optab, op0,
1737                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1738                              target, 1, OPTAB_LIB_WIDEN);
1739       return op0;
1740     }
1741
1742   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1743      then arithmetic-shift its lsb to the lsb of the word.  */
1744   op0 = force_reg (mode, op0);
1745   if (mode != tmode)
1746     target = 0;
1747
1748   /* Find the narrowest integer mode that contains the field.  */
1749
1750   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1751        mode = GET_MODE_WIDER_MODE (mode))
1752     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1753       {
1754         op0 = convert_to_mode (mode, op0, 0);
1755         break;
1756       }
1757
1758   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1759     {
1760       tree amount
1761         = build_int_cst (NULL_TREE,
1762                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1763       /* Maybe propagate the target for the shift.  */
1764       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1765       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1766     }
1767
1768   return expand_shift (RSHIFT_EXPR, mode, op0,
1769                        build_int_cst (NULL_TREE,
1770                                       GET_MODE_BITSIZE (mode) - bitsize),
1771                        target, 0);
1772 }
1773 \f
1774 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1775    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1776    complement of that if COMPLEMENT.  The mask is truncated if
1777    necessary to the width of mode MODE.  The mask is zero-extended if
1778    BITSIZE+BITPOS is too small for MODE.  */
1779
1780 static rtx
1781 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1782 {
1783   HOST_WIDE_INT masklow, maskhigh;
1784
1785   if (bitsize == 0)
1786     masklow = 0;
1787   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1788     masklow = (HOST_WIDE_INT) -1 << bitpos;
1789   else
1790     masklow = 0;
1791
1792   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1793     masklow &= ((unsigned HOST_WIDE_INT) -1
1794                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1795
1796   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1797     maskhigh = -1;
1798   else
1799     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1800
1801   if (bitsize == 0)
1802     maskhigh = 0;
1803   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1804     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1805                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1806   else
1807     maskhigh = 0;
1808
1809   if (complement)
1810     {
1811       maskhigh = ~maskhigh;
1812       masklow = ~masklow;
1813     }
1814
1815   return immed_double_const (masklow, maskhigh, mode);
1816 }
1817
1818 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1819    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1820
1821 static rtx
1822 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1823 {
1824   unsigned HOST_WIDE_INT v = INTVAL (value);
1825   HOST_WIDE_INT low, high;
1826
1827   if (bitsize < HOST_BITS_PER_WIDE_INT)
1828     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1829
1830   if (bitpos < HOST_BITS_PER_WIDE_INT)
1831     {
1832       low = v << bitpos;
1833       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1834     }
1835   else
1836     {
1837       low = 0;
1838       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1839     }
1840
1841   return immed_double_const (low, high, mode);
1842 }
1843 \f
1844 /* Extract a bit field from a memory by forcing the alignment of the
1845    memory.  This efficient only if the field spans at least 4 boundaries.
1846
1847    OP0 is the MEM.
1848    BITSIZE is the field width; BITPOS is the position of the first bit.
1849    UNSIGNEDP is true if the result should be zero-extended.  */
1850
1851 static rtx
1852 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1853                                    unsigned HOST_WIDE_INT bitpos,
1854                                    int unsignedp)
1855 {
1856   enum machine_mode mode, dmode;
1857   unsigned int m_bitsize, m_size;
1858   unsigned int sign_shift_up, sign_shift_dn;
1859   rtx base, a1, a2, v1, v2, comb, shift, result, start;
1860
1861   /* Choose a mode that will fit BITSIZE.  */
1862   mode = smallest_mode_for_size (bitsize, MODE_INT);
1863   m_size = GET_MODE_SIZE (mode);
1864   m_bitsize = GET_MODE_BITSIZE (mode);
1865
1866   /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1867   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1868   if (dmode == BLKmode)
1869     return NULL;
1870
1871   do_pending_stack_adjust ();
1872   start = get_last_insn ();
1873
1874   /* At the end, we'll need an additional shift to deal with sign/zero
1875      extension.  By default this will be a left+right shift of the
1876      appropriate size.  But we may be able to eliminate one of them.  */
1877   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1878
1879   if (STRICT_ALIGNMENT)
1880     {
1881       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1882       bitpos %= BITS_PER_UNIT;
1883
1884       /* We load two values to be concatenate.  There's an edge condition
1885          that bears notice -- an aligned value at the end of a page can
1886          only load one value lest we segfault.  So the two values we load
1887          are at "base & -size" and "(base + size - 1) & -size".  If base
1888          is unaligned, the addresses will be aligned and sequential; if
1889          base is aligned, the addresses will both be equal to base.  */
1890
1891       a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1892                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1893                                 NULL, true, OPTAB_LIB_WIDEN);
1894       mark_reg_pointer (a1, m_bitsize);
1895       v1 = gen_rtx_MEM (mode, a1);
1896       set_mem_align (v1, m_bitsize);
1897       v1 = force_reg (mode, validize_mem (v1));
1898
1899       a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1900       a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1901                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1902                                 NULL, true, OPTAB_LIB_WIDEN);
1903       v2 = gen_rtx_MEM (mode, a2);
1904       set_mem_align (v2, m_bitsize);
1905       v2 = force_reg (mode, validize_mem (v2));
1906
1907       /* Combine these two values into a double-word value.  */
1908       if (m_bitsize == BITS_PER_WORD)
1909         {
1910           comb = gen_reg_rtx (dmode);
1911           emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1912           emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1913           emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1914         }
1915       else
1916         {
1917           if (BYTES_BIG_ENDIAN)
1918             comb = v1, v1 = v2, v2 = comb;
1919           v1 = convert_modes (dmode, mode, v1, true);
1920           if (v1 == NULL)
1921             goto fail;
1922           v2 = convert_modes (dmode, mode, v2, true);
1923           v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1924                                     NULL, true, OPTAB_LIB_WIDEN);
1925           if (v2 == NULL)
1926             goto fail;
1927           comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1928                                       true, OPTAB_LIB_WIDEN);
1929           if (comb == NULL)
1930             goto fail;
1931         }
1932
1933       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1934                                    NULL, true, OPTAB_LIB_WIDEN);
1935       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1936
1937       if (bitpos != 0)
1938         {
1939           if (sign_shift_up <= bitpos)
1940             bitpos -= sign_shift_up, sign_shift_up = 0;
1941           shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1942                                        NULL, true, OPTAB_LIB_WIDEN);
1943         }
1944     }
1945   else
1946     {
1947       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1948       bitpos %= BITS_PER_UNIT;
1949
1950       /* When strict alignment is not required, we can just load directly
1951          from memory without masking.  If the remaining BITPOS offset is
1952          small enough, we may be able to do all operations in MODE as
1953          opposed to DMODE.  */
1954       if (bitpos + bitsize <= m_bitsize)
1955         dmode = mode;
1956       comb = adjust_address (op0, dmode, offset);
1957
1958       if (sign_shift_up <= bitpos)
1959         bitpos -= sign_shift_up, sign_shift_up = 0;
1960       shift = GEN_INT (bitpos);
1961     }
1962
1963   /* Shift down the double-word such that the requested value is at bit 0.  */
1964   if (shift != const0_rtx)
1965     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
1966                                 comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
1967   if (comb == NULL)
1968     goto fail;
1969
1970   /* If the field exactly matches MODE, then all we need to do is return the
1971      lowpart.  Otherwise, shift to get the sign bits set properly.  */
1972   result = force_reg (mode, gen_lowpart (mode, comb));
1973
1974   if (sign_shift_up)
1975     result = expand_simple_binop (mode, ASHIFT, result,
1976                                   GEN_INT (sign_shift_up),
1977                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1978   if (sign_shift_dn)
1979     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
1980                                   result, GEN_INT (sign_shift_dn),
1981                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1982
1983   return result;
1984
1985  fail:
1986   delete_insns_since (start);
1987   return NULL;
1988 }
1989
1990 /* Extract a bit field that is split across two words
1991    and return an RTX for the result.
1992
1993    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1994    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1995    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1996
1997 static rtx
1998 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1999                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
2000 {
2001   unsigned int unit;
2002   unsigned int bitsdone = 0;
2003   rtx result = NULL_RTX;
2004   int first = 1;
2005
2006   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2007      much at a time.  */
2008   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2009     unit = BITS_PER_WORD;
2010   else
2011     {
2012       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2013       if (0 && bitsize / unit > 2)
2014         {
2015           rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2016                                                        unsignedp);
2017           if (tmp)
2018             return tmp;
2019         }
2020     }
2021
2022   while (bitsdone < bitsize)
2023     {
2024       unsigned HOST_WIDE_INT thissize;
2025       rtx part, word;
2026       unsigned HOST_WIDE_INT thispos;
2027       unsigned HOST_WIDE_INT offset;
2028
2029       offset = (bitpos + bitsdone) / unit;
2030       thispos = (bitpos + bitsdone) % unit;
2031
2032       /* THISSIZE must not overrun a word boundary.  Otherwise,
2033          extract_fixed_bit_field will call us again, and we will mutually
2034          recurse forever.  */
2035       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2036       thissize = MIN (thissize, unit - thispos);
2037
2038       /* If OP0 is a register, then handle OFFSET here.
2039
2040          When handling multiword bitfields, extract_bit_field may pass
2041          down a word_mode SUBREG of a larger REG for a bitfield that actually
2042          crosses a word boundary.  Thus, for a SUBREG, we must find
2043          the current word starting from the base register.  */
2044       if (GET_CODE (op0) == SUBREG)
2045         {
2046           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2047           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2048                                         GET_MODE (SUBREG_REG (op0)));
2049           offset = 0;
2050         }
2051       else if (REG_P (op0))
2052         {
2053           word = operand_subword_force (op0, offset, GET_MODE (op0));
2054           offset = 0;
2055         }
2056       else
2057         word = op0;
2058
2059       /* Extract the parts in bit-counting order,
2060          whose meaning is determined by BYTES_PER_UNIT.
2061          OFFSET is in UNITs, and UNIT is in bits.
2062          extract_fixed_bit_field wants offset in bytes.  */
2063       part = extract_fixed_bit_field (word_mode, word,
2064                                       offset * unit / BITS_PER_UNIT,
2065                                       thissize, thispos, 0, 1);
2066       bitsdone += thissize;
2067
2068       /* Shift this part into place for the result.  */
2069       if (BYTES_BIG_ENDIAN)
2070         {
2071           if (bitsize != bitsdone)
2072             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2073                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
2074                                  0, 1);
2075         }
2076       else
2077         {
2078           if (bitsdone != thissize)
2079             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2080                                  build_int_cst (NULL_TREE,
2081                                                 bitsdone - thissize), 0, 1);
2082         }
2083
2084       if (first)
2085         result = part;
2086       else
2087         /* Combine the parts with bitwise or.  This works
2088            because we extracted each part as an unsigned bit field.  */
2089         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2090                                OPTAB_LIB_WIDEN);
2091
2092       first = 0;
2093     }
2094
2095   /* Unsigned bit field: we are done.  */
2096   if (unsignedp)
2097     return result;
2098   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2099   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2100                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2101                          NULL_RTX, 0);
2102   return expand_shift (RSHIFT_EXPR, word_mode, result,
2103                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2104                        NULL_RTX, 0);
2105 }
2106 \f
2107 /* Add INC into TARGET.  */
2108
2109 void
2110 expand_inc (rtx target, rtx inc)
2111 {
2112   rtx value = expand_binop (GET_MODE (target), add_optab,
2113                             target, inc,
2114                             target, 0, OPTAB_LIB_WIDEN);
2115   if (value != target)
2116     emit_move_insn (target, value);
2117 }
2118
2119 /* Subtract DEC from TARGET.  */
2120
2121 void
2122 expand_dec (rtx target, rtx dec)
2123 {
2124   rtx value = expand_binop (GET_MODE (target), sub_optab,
2125                             target, dec,
2126                             target, 0, OPTAB_LIB_WIDEN);
2127   if (value != target)
2128     emit_move_insn (target, value);
2129 }
2130 \f
2131 /* Output a shift instruction for expression code CODE,
2132    with SHIFTED being the rtx for the value to shift,
2133    and AMOUNT the tree for the amount to shift by.
2134    Store the result in the rtx TARGET, if that is convenient.
2135    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2136    Return the rtx for where the value is.  */
2137
2138 rtx
2139 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2140               tree amount, rtx target, int unsignedp)
2141 {
2142   rtx op1, temp = 0;
2143   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2144   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2145   int try;
2146
2147   /* Previously detected shift-counts computed by NEGATE_EXPR
2148      and shifted in the other direction; but that does not work
2149      on all machines.  */
2150
2151   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2152
2153   if (SHIFT_COUNT_TRUNCATED)
2154     {
2155       if (GET_CODE (op1) == CONST_INT
2156           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2157               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2158         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2159                        % GET_MODE_BITSIZE (mode));
2160       else if (GET_CODE (op1) == SUBREG
2161                && subreg_lowpart_p (op1))
2162         op1 = SUBREG_REG (op1);
2163     }
2164
2165   if (op1 == const0_rtx)
2166     return shifted;
2167
2168   /* Check whether its cheaper to implement a left shift by a constant
2169      bit count by a sequence of additions.  */
2170   if (code == LSHIFT_EXPR
2171       && GET_CODE (op1) == CONST_INT
2172       && INTVAL (op1) > 0
2173       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2174       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2175     {
2176       int i;
2177       for (i = 0; i < INTVAL (op1); i++)
2178         {
2179           temp = force_reg (mode, shifted);
2180           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2181                                   unsignedp, OPTAB_LIB_WIDEN);
2182         }
2183       return shifted;
2184     }
2185
2186   for (try = 0; temp == 0 && try < 3; try++)
2187     {
2188       enum optab_methods methods;
2189
2190       if (try == 0)
2191         methods = OPTAB_DIRECT;
2192       else if (try == 1)
2193         methods = OPTAB_WIDEN;
2194       else
2195         methods = OPTAB_LIB_WIDEN;
2196
2197       if (rotate)
2198         {
2199           /* Widening does not work for rotation.  */
2200           if (methods == OPTAB_WIDEN)
2201             continue;
2202           else if (methods == OPTAB_LIB_WIDEN)
2203             {
2204               /* If we have been unable to open-code this by a rotation,
2205                  do it as the IOR of two shifts.  I.e., to rotate A
2206                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2207                  where C is the bitsize of A.
2208
2209                  It is theoretically possible that the target machine might
2210                  not be able to perform either shift and hence we would
2211                  be making two libcalls rather than just the one for the
2212                  shift (similarly if IOR could not be done).  We will allow
2213                  this extremely unlikely lossage to avoid complicating the
2214                  code below.  */
2215
2216               rtx subtarget = target == shifted ? 0 : target;
2217               rtx temp1;
2218               tree type = TREE_TYPE (amount);
2219               tree new_amount = make_tree (type, op1);
2220               tree other_amount
2221                 = fold (build2 (MINUS_EXPR, type, convert
2222                                 (type, build_int_cst
2223                                  (NULL_TREE, GET_MODE_BITSIZE (mode))),
2224                                 amount));
2225
2226               shifted = force_reg (mode, shifted);
2227
2228               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2229                                    mode, shifted, new_amount, subtarget, 1);
2230               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2231                                     mode, shifted, other_amount, 0, 1);
2232               return expand_binop (mode, ior_optab, temp, temp1, target,
2233                                    unsignedp, methods);
2234             }
2235
2236           temp = expand_binop (mode,
2237                                left ? rotl_optab : rotr_optab,
2238                                shifted, op1, target, unsignedp, methods);
2239
2240           /* If we don't have the rotate, but we are rotating by a constant
2241              that is in range, try a rotate in the opposite direction.  */
2242
2243           if (temp == 0 && GET_CODE (op1) == CONST_INT
2244               && INTVAL (op1) > 0
2245               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2246             temp = expand_binop (mode,
2247                                  left ? rotr_optab : rotl_optab,
2248                                  shifted,
2249                                  GEN_INT (GET_MODE_BITSIZE (mode)
2250                                           - INTVAL (op1)),
2251                                  target, unsignedp, methods);
2252         }
2253       else if (unsignedp)
2254         temp = expand_binop (mode,
2255                              left ? ashl_optab : lshr_optab,
2256                              shifted, op1, target, unsignedp, methods);
2257
2258       /* Do arithmetic shifts.
2259          Also, if we are going to widen the operand, we can just as well
2260          use an arithmetic right-shift instead of a logical one.  */
2261       if (temp == 0 && ! rotate
2262           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2263         {
2264           enum optab_methods methods1 = methods;
2265
2266           /* If trying to widen a log shift to an arithmetic shift,
2267              don't accept an arithmetic shift of the same size.  */
2268           if (unsignedp)
2269             methods1 = OPTAB_MUST_WIDEN;
2270
2271           /* Arithmetic shift */
2272
2273           temp = expand_binop (mode,
2274                                left ? ashl_optab : ashr_optab,
2275                                shifted, op1, target, unsignedp, methods1);
2276         }
2277
2278       /* We used to try extzv here for logical right shifts, but that was
2279          only useful for one machine, the VAX, and caused poor code
2280          generation there for lshrdi3, so the code was deleted and a
2281          define_expand for lshrsi3 was added to vax.md.  */
2282     }
2283
2284   gcc_assert (temp);
2285   return temp;
2286 }
2287 \f
2288 enum alg_code { alg_unknown, alg_zero, alg_m, alg_shift,
2289                   alg_add_t_m2, alg_sub_t_m2,
2290                   alg_add_factor, alg_sub_factor,
2291                   alg_add_t2_m, alg_sub_t2_m };
2292
2293 /* This structure holds the "cost" of a multiply sequence.  The
2294    "cost" field holds the total rtx_cost of every operator in the
2295    synthetic multiplication sequence, hence cost(a op b) is defined
2296    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2297    The "latency" field holds the minimum possible latency of the
2298    synthetic multiply, on a hypothetical infinitely parallel CPU.
2299    This is the critical path, or the maximum height, of the expression
2300    tree which is the sum of rtx_costs on the most expensive path from
2301    any leaf to the root.  Hence latency(a op b) is defined as zero for
2302    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2303
2304 struct mult_cost {
2305   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2306   short latency;  /* The latency of the multiplication sequence.  */
2307 };
2308
2309 /* This macro is used to compare a pointer to a mult_cost against an
2310    single integer "rtx_cost" value.  This is equivalent to the macro
2311    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2312 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2313                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2314
2315 /* This macro is used to compare two pointers to mult_costs against
2316    each other.  The macro returns true if X is cheaper than Y.
2317    Currently, the cheaper of two mult_costs is the one with the
2318    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2319 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2320                                  || ((X)->cost == (Y)->cost     \
2321                                      && (X)->latency < (Y)->latency))
2322
2323 /* This structure records a sequence of operations.
2324    `ops' is the number of operations recorded.
2325    `cost' is their total cost.
2326    The operations are stored in `op' and the corresponding
2327    logarithms of the integer coefficients in `log'.
2328
2329    These are the operations:
2330    alg_zero             total := 0;
2331    alg_m                total := multiplicand;
2332    alg_shift            total := total * coeff
2333    alg_add_t_m2         total := total + multiplicand * coeff;
2334    alg_sub_t_m2         total := total - multiplicand * coeff;
2335    alg_add_factor       total := total * coeff + total;
2336    alg_sub_factor       total := total * coeff - total;
2337    alg_add_t2_m         total := total * coeff + multiplicand;
2338    alg_sub_t2_m         total := total * coeff - multiplicand;
2339
2340    The first operand must be either alg_zero or alg_m.  */
2341
2342 struct algorithm
2343 {
2344   struct mult_cost cost;
2345   short ops;
2346   /* The size of the OP and LOG fields are not directly related to the
2347      word size, but the worst-case algorithms will be if we have few
2348      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2349      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2350      in total wordsize operations.  */
2351   enum alg_code op[MAX_BITS_PER_WORD];
2352   char log[MAX_BITS_PER_WORD];
2353 };
2354
2355 /* The entry for our multiplication cache/hash table.  */
2356 struct alg_hash_entry {
2357   /* The number we are multiplying by.  */
2358   unsigned int t;
2359
2360   /* The mode in which we are multiplying something by T.  */
2361   enum machine_mode mode;
2362
2363   /* The best multiplication algorithm for t.  */
2364   enum alg_code alg;
2365 };
2366
2367 /* The number of cache/hash entries.  */
2368 #define NUM_ALG_HASH_ENTRIES 307
2369
2370 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2371    actually a hash table.  If we have a collision, that the older
2372    entry is kicked out.  */
2373 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2374
2375 /* Indicates the type of fixup needed after a constant multiplication.
2376    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2377    the result should be negated, and ADD_VARIANT means that the
2378    multiplicand should be added to the result.  */
2379 enum mult_variant {basic_variant, negate_variant, add_variant};
2380
2381 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2382                         const struct mult_cost *, enum machine_mode mode);
2383 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2384                                  struct algorithm *, enum mult_variant *, int);
2385 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2386                               const struct algorithm *, enum mult_variant);
2387 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2388                                                  int, unsigned HOST_WIDE_INT *,
2389                                                  int *, int *);
2390 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2391 static rtx extract_high_half (enum machine_mode, rtx);
2392 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2393                                        int, int);
2394 /* Compute and return the best algorithm for multiplying by T.
2395    The algorithm must cost less than cost_limit
2396    If retval.cost >= COST_LIMIT, no algorithm was found and all
2397    other field of the returned struct are undefined.
2398    MODE is the machine mode of the multiplication.  */
2399
2400 static void
2401 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2402             const struct mult_cost *cost_limit, enum machine_mode mode)
2403 {
2404   int m;
2405   struct algorithm *alg_in, *best_alg;
2406   struct mult_cost best_cost;
2407   struct mult_cost new_limit;
2408   int op_cost, op_latency;
2409   unsigned HOST_WIDE_INT q;
2410   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2411   int hash_index;
2412   bool cache_hit = false;
2413   enum alg_code cache_alg = alg_zero;
2414
2415   /* Indicate that no algorithm is yet found.  If no algorithm
2416      is found, this value will be returned and indicate failure.  */
2417   alg_out->cost.cost = cost_limit->cost + 1;
2418   alg_out->cost.latency = cost_limit->latency + 1;
2419
2420   if (cost_limit->cost < 0
2421       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2422     return;
2423
2424   /* Restrict the bits of "t" to the multiplication's mode.  */
2425   t &= GET_MODE_MASK (mode);
2426
2427   /* t == 1 can be done in zero cost.  */
2428   if (t == 1)
2429     {
2430       alg_out->ops = 1;
2431       alg_out->cost.cost = 0;
2432       alg_out->cost.latency = 0;
2433       alg_out->op[0] = alg_m;
2434       return;
2435     }
2436
2437   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2438      fail now.  */
2439   if (t == 0)
2440     {
2441       if (MULT_COST_LESS (cost_limit, zero_cost))
2442         return;
2443       else
2444         {
2445           alg_out->ops = 1;
2446           alg_out->cost.cost = zero_cost;
2447           alg_out->cost.latency = zero_cost;
2448           alg_out->op[0] = alg_zero;
2449           return;
2450         }
2451     }
2452
2453   /* We'll be needing a couple extra algorithm structures now.  */
2454
2455   alg_in = alloca (sizeof (struct algorithm));
2456   best_alg = alloca (sizeof (struct algorithm));
2457   best_cost = *cost_limit;
2458
2459   /* Compute the hash index.  */
2460   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2461
2462   /* See if we already know what to do for T.  */
2463   if (alg_hash[hash_index].t == t
2464       && alg_hash[hash_index].mode == mode
2465       && alg_hash[hash_index].alg != alg_unknown)
2466     {
2467       cache_hit = true;
2468       cache_alg = alg_hash[hash_index].alg;
2469       switch (cache_alg)
2470         {
2471         case alg_shift:
2472           goto do_alg_shift;
2473
2474         case alg_add_t_m2:
2475         case alg_sub_t_m2:
2476           goto do_alg_addsub_t_m2;
2477
2478         case alg_add_factor:
2479         case alg_sub_factor:
2480           goto do_alg_addsub_factor;
2481
2482         case alg_add_t2_m:
2483           goto do_alg_add_t2_m;
2484
2485         case alg_sub_t2_m:
2486           goto do_alg_sub_t2_m;
2487
2488         default:
2489           gcc_unreachable ();
2490         }
2491     }
2492
2493   /* If we have a group of zero bits at the low-order part of T, try
2494      multiplying by the remaining bits and then doing a shift.  */
2495
2496   if ((t & 1) == 0)
2497     {
2498     do_alg_shift:
2499       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2500       if (m < maxm)
2501         {
2502           q = t >> m;
2503           /* The function expand_shift will choose between a shift and
2504              a sequence of additions, so the observed cost is given as
2505              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2506           op_cost = m * add_cost[mode];
2507           if (shift_cost[mode][m] < op_cost)
2508             op_cost = shift_cost[mode][m];
2509           new_limit.cost = best_cost.cost - op_cost;
2510           new_limit.latency = best_cost.latency - op_cost;
2511           synth_mult (alg_in, q, &new_limit, mode);
2512
2513           alg_in->cost.cost += op_cost;
2514           alg_in->cost.latency += op_cost;
2515           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2516             {
2517               struct algorithm *x;
2518               best_cost = alg_in->cost;
2519               x = alg_in, alg_in = best_alg, best_alg = x;
2520               best_alg->log[best_alg->ops] = m;
2521               best_alg->op[best_alg->ops] = alg_shift;
2522             }
2523         }
2524       if (cache_hit)
2525         goto done;
2526     }
2527
2528   /* If we have an odd number, add or subtract one.  */
2529   if ((t & 1) != 0)
2530     {
2531       unsigned HOST_WIDE_INT w;
2532
2533     do_alg_addsub_t_m2:
2534       for (w = 1; (w & t) != 0; w <<= 1)
2535         ;
2536       /* If T was -1, then W will be zero after the loop.  This is another
2537          case where T ends with ...111.  Handling this with (T + 1) and
2538          subtract 1 produces slightly better code and results in algorithm
2539          selection much faster than treating it like the ...0111 case
2540          below.  */
2541       if (w == 0
2542           || (w > 2
2543               /* Reject the case where t is 3.
2544                  Thus we prefer addition in that case.  */
2545               && t != 3))
2546         {
2547           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2548
2549           op_cost = add_cost[mode];
2550           new_limit.cost = best_cost.cost - op_cost;
2551           new_limit.latency = best_cost.latency - op_cost;
2552           synth_mult (alg_in, t + 1, &new_limit, mode);
2553
2554           alg_in->cost.cost += op_cost;
2555           alg_in->cost.latency += op_cost;
2556           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2557             {
2558               struct algorithm *x;
2559               best_cost = alg_in->cost;
2560               x = alg_in, alg_in = best_alg, best_alg = x;
2561               best_alg->log[best_alg->ops] = 0;
2562               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2563             }
2564         }
2565       else
2566         {
2567           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2568
2569           op_cost = add_cost[mode];
2570           new_limit.cost = best_cost.cost - op_cost;
2571           new_limit.latency = best_cost.latency - op_cost;
2572           synth_mult (alg_in, t - 1, &new_limit, mode);
2573
2574           alg_in->cost.cost += op_cost;
2575           alg_in->cost.latency += op_cost;
2576           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2577             {
2578               struct algorithm *x;
2579               best_cost = alg_in->cost;
2580               x = alg_in, alg_in = best_alg, best_alg = x;
2581               best_alg->log[best_alg->ops] = 0;
2582               best_alg->op[best_alg->ops] = alg_add_t_m2;
2583             }
2584         }
2585       if (cache_hit)
2586         goto done;
2587     }
2588
2589   /* Look for factors of t of the form
2590      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2591      If we find such a factor, we can multiply by t using an algorithm that
2592      multiplies by q, shift the result by m and add/subtract it to itself.
2593
2594      We search for large factors first and loop down, even if large factors
2595      are less probable than small; if we find a large factor we will find a
2596      good sequence quickly, and therefore be able to prune (by decreasing
2597      COST_LIMIT) the search.  */
2598
2599  do_alg_addsub_factor:
2600   for (m = floor_log2 (t - 1); m >= 2; m--)
2601     {
2602       unsigned HOST_WIDE_INT d;
2603
2604       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2605       if (t % d == 0 && t > d && m < maxm
2606           && (!cache_hit || cache_alg == alg_add_factor))
2607         {
2608           /* If the target has a cheap shift-and-add instruction use
2609              that in preference to a shift insn followed by an add insn.
2610              Assume that the shift-and-add is "atomic" with a latency
2611              equal to its cost, otherwise assume that on superscalar
2612              hardware the shift may be executed concurrently with the
2613              earlier steps in the algorithm.  */
2614           op_cost = add_cost[mode] + shift_cost[mode][m];
2615           if (shiftadd_cost[mode][m] < op_cost)
2616             {
2617               op_cost = shiftadd_cost[mode][m];
2618               op_latency = op_cost;
2619             }
2620           else
2621             op_latency = add_cost[mode];
2622
2623           new_limit.cost = best_cost.cost - op_cost;
2624           new_limit.latency = best_cost.latency - op_latency;
2625           synth_mult (alg_in, t / d, &new_limit, mode);
2626
2627           alg_in->cost.cost += op_cost;
2628           alg_in->cost.latency += op_latency;
2629           if (alg_in->cost.latency < op_cost)
2630             alg_in->cost.latency = op_cost;
2631           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2632             {
2633               struct algorithm *x;
2634               best_cost = alg_in->cost;
2635               x = alg_in, alg_in = best_alg, best_alg = x;
2636               best_alg->log[best_alg->ops] = m;
2637               best_alg->op[best_alg->ops] = alg_add_factor;
2638             }
2639           /* Other factors will have been taken care of in the recursion.  */
2640           break;
2641         }
2642
2643       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2644       if (t % d == 0 && t > d && m < maxm
2645           && (!cache_hit || cache_alg == alg_sub_factor))
2646         {
2647           /* If the target has a cheap shift-and-subtract insn use
2648              that in preference to a shift insn followed by a sub insn.
2649              Assume that the shift-and-sub is "atomic" with a latency
2650              equal to it's cost, otherwise assume that on superscalar
2651              hardware the shift may be executed concurrently with the
2652              earlier steps in the algorithm.  */
2653           op_cost = add_cost[mode] + shift_cost[mode][m];
2654           if (shiftsub_cost[mode][m] < op_cost)
2655             {
2656               op_cost = shiftsub_cost[mode][m];
2657               op_latency = op_cost;
2658             }
2659           else
2660             op_latency = add_cost[mode];
2661
2662           new_limit.cost = best_cost.cost - op_cost;
2663           new_limit.cost = best_cost.cost - op_latency;
2664           synth_mult (alg_in, t / d, &new_limit, mode);
2665
2666           alg_in->cost.cost += op_cost;
2667           alg_in->cost.latency += op_latency;
2668           if (alg_in->cost.latency < op_cost)
2669             alg_in->cost.latency = op_cost;
2670           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2671             {
2672               struct algorithm *x;
2673               best_cost = alg_in->cost;
2674               x = alg_in, alg_in = best_alg, best_alg = x;
2675               best_alg->log[best_alg->ops] = m;
2676               best_alg->op[best_alg->ops] = alg_sub_factor;
2677             }
2678           break;
2679         }
2680     }
2681   if (cache_hit)
2682     goto done;
2683
2684   /* Try shift-and-add (load effective address) instructions,
2685      i.e. do a*3, a*5, a*9.  */
2686   if ((t & 1) != 0)
2687     {
2688     do_alg_add_t2_m:
2689       q = t - 1;
2690       q = q & -q;
2691       m = exact_log2 (q);
2692       if (m >= 0 && m < maxm)
2693         {
2694           op_cost = shiftadd_cost[mode][m];
2695           new_limit.cost = best_cost.cost - op_cost;
2696           new_limit.latency = best_cost.latency - op_cost;
2697           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2698
2699           alg_in->cost.cost += op_cost;
2700           alg_in->cost.latency += op_cost;
2701           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2702             {
2703               struct algorithm *x;
2704               best_cost = alg_in->cost;
2705               x = alg_in, alg_in = best_alg, best_alg = x;
2706               best_alg->log[best_alg->ops] = m;
2707               best_alg->op[best_alg->ops] = alg_add_t2_m;
2708             }
2709         }
2710       if (cache_hit)
2711         goto done;
2712
2713     do_alg_sub_t2_m:
2714       q = t + 1;
2715       q = q & -q;
2716       m = exact_log2 (q);
2717       if (m >= 0 && m < maxm)
2718         {
2719           op_cost = shiftsub_cost[mode][m];
2720           new_limit.cost = best_cost.cost - op_cost;
2721           new_limit.latency = best_cost.latency - op_cost;
2722           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2723
2724           alg_in->cost.cost += op_cost;
2725           alg_in->cost.latency += op_cost;
2726           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2727             {
2728               struct algorithm *x;
2729               best_cost = alg_in->cost;
2730               x = alg_in, alg_in = best_alg, best_alg = x;
2731               best_alg->log[best_alg->ops] = m;
2732               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2733             }
2734         }
2735       if (cache_hit)
2736         goto done;
2737     }
2738
2739  done:
2740   /* If best_cost has not decreased, we have not found any algorithm.  */
2741   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2742     return;
2743
2744   /* Cache the result.  */
2745   if (!cache_hit)
2746     {
2747       alg_hash[hash_index].t = t;
2748       alg_hash[hash_index].mode = mode;
2749       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2750     }
2751
2752   /* If we are getting a too long sequence for `struct algorithm'
2753      to record, make this search fail.  */
2754   if (best_alg->ops == MAX_BITS_PER_WORD)
2755     return;
2756
2757   /* Copy the algorithm from temporary space to the space at alg_out.
2758      We avoid using structure assignment because the majority of
2759      best_alg is normally undefined, and this is a critical function.  */
2760   alg_out->ops = best_alg->ops + 1;
2761   alg_out->cost = best_cost;
2762   memcpy (alg_out->op, best_alg->op,
2763           alg_out->ops * sizeof *alg_out->op);
2764   memcpy (alg_out->log, best_alg->log,
2765           alg_out->ops * sizeof *alg_out->log);
2766 }
2767 \f
2768 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2769    Try three variations:
2770
2771        - a shift/add sequence based on VAL itself
2772        - a shift/add sequence based on -VAL, followed by a negation
2773        - a shift/add sequence based on VAL - 1, followed by an addition.
2774
2775    Return true if the cheapest of these cost less than MULT_COST,
2776    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2777
2778 static bool
2779 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2780                      struct algorithm *alg, enum mult_variant *variant,
2781                      int mult_cost)
2782 {
2783   struct algorithm alg2;
2784   struct mult_cost limit;
2785   int op_cost;
2786
2787   *variant = basic_variant;
2788   limit.cost = mult_cost;
2789   limit.latency = mult_cost;
2790   synth_mult (alg, val, &limit, mode);
2791
2792   /* This works only if the inverted value actually fits in an
2793      `unsigned int' */
2794   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2795     {
2796       op_cost = neg_cost[mode];
2797       if (MULT_COST_LESS (&alg->cost, mult_cost))
2798         {
2799           limit.cost = alg->cost.cost - op_cost;
2800           limit.latency = alg->cost.latency - op_cost;
2801         }
2802       else
2803         {
2804           limit.cost = mult_cost - op_cost;
2805           limit.latency = mult_cost - op_cost;
2806         }
2807
2808       synth_mult (&alg2, -val, &limit, mode);
2809       alg2.cost.cost += op_cost;
2810       alg2.cost.latency += op_cost;
2811       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2812         *alg = alg2, *variant = negate_variant;
2813     }
2814
2815   /* This proves very useful for division-by-constant.  */
2816   op_cost = add_cost[mode];
2817   if (MULT_COST_LESS (&alg->cost, mult_cost))
2818     {
2819       limit.cost = alg->cost.cost - op_cost;
2820       limit.latency = alg->cost.latency - op_cost;
2821     }
2822   else
2823     {
2824       limit.cost = mult_cost - op_cost;
2825       limit.latency = mult_cost - op_cost;
2826     }
2827
2828   synth_mult (&alg2, val - 1, &limit, mode);
2829   alg2.cost.cost += op_cost;
2830   alg2.cost.latency += op_cost;
2831   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2832     *alg = alg2, *variant = add_variant;
2833
2834   return MULT_COST_LESS (&alg->cost, mult_cost);
2835 }
2836
2837 /* A subroutine of expand_mult, used for constant multiplications.
2838    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2839    convenient.  Use the shift/add sequence described by ALG and apply
2840    the final fixup specified by VARIANT.  */
2841
2842 static rtx
2843 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2844                    rtx target, const struct algorithm *alg,
2845                    enum mult_variant variant)
2846 {
2847   HOST_WIDE_INT val_so_far;
2848   rtx insn, accum, tem;
2849   int opno;
2850   enum machine_mode nmode;
2851
2852   /* Avoid referencing memory over and over.
2853      For speed, but also for correctness when mem is volatile.  */
2854   if (MEM_P (op0))
2855     op0 = force_reg (mode, op0);
2856
2857   /* ACCUM starts out either as OP0 or as a zero, depending on
2858      the first operation.  */
2859
2860   if (alg->op[0] == alg_zero)
2861     {
2862       accum = copy_to_mode_reg (mode, const0_rtx);
2863       val_so_far = 0;
2864     }
2865   else if (alg->op[0] == alg_m)
2866     {
2867       accum = copy_to_mode_reg (mode, op0);
2868       val_so_far = 1;
2869     }
2870   else
2871     gcc_unreachable ();
2872
2873   for (opno = 1; opno < alg->ops; opno++)
2874     {
2875       int log = alg->log[opno];
2876       rtx shift_subtarget = optimize ? 0 : accum;
2877       rtx add_target
2878         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2879            && !optimize)
2880           ? target : 0;
2881       rtx accum_target = optimize ? 0 : accum;
2882
2883       switch (alg->op[opno])
2884         {
2885         case alg_shift:
2886           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2887                                 build_int_cst (NULL_TREE, log),
2888                                 NULL_RTX, 0);
2889           val_so_far <<= log;
2890           break;
2891
2892         case alg_add_t_m2:
2893           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2894                               build_int_cst (NULL_TREE, log),
2895                               NULL_RTX, 0);
2896           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2897                                  add_target ? add_target : accum_target);
2898           val_so_far += (HOST_WIDE_INT) 1 << log;
2899           break;
2900
2901         case alg_sub_t_m2:
2902           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2903                               build_int_cst (NULL_TREE, log),
2904                               NULL_RTX, 0);
2905           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2906                                  add_target ? add_target : accum_target);
2907           val_so_far -= (HOST_WIDE_INT) 1 << log;
2908           break;
2909
2910         case alg_add_t2_m:
2911           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2912                                 build_int_cst (NULL_TREE, log),
2913                                 shift_subtarget,
2914                                 0);
2915           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2916                                  add_target ? add_target : accum_target);
2917           val_so_far = (val_so_far << log) + 1;
2918           break;
2919
2920         case alg_sub_t2_m:
2921           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2922                                 build_int_cst (NULL_TREE, log),
2923                                 shift_subtarget, 0);
2924           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2925                                  add_target ? add_target : accum_target);
2926           val_so_far = (val_so_far << log) - 1;
2927           break;
2928
2929         case alg_add_factor:
2930           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2931                               build_int_cst (NULL_TREE, log),
2932                               NULL_RTX, 0);
2933           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2934                                  add_target ? add_target : accum_target);
2935           val_so_far += val_so_far << log;
2936           break;
2937
2938         case alg_sub_factor:
2939           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2940                               build_int_cst (NULL_TREE, log),
2941                               NULL_RTX, 0);
2942           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2943                                  (add_target
2944                                   ? add_target : (optimize ? 0 : tem)));
2945           val_so_far = (val_so_far << log) - val_so_far;
2946           break;
2947
2948         default:
2949           gcc_unreachable ();
2950         }
2951
2952       /* Write a REG_EQUAL note on the last insn so that we can cse
2953          multiplication sequences.  Note that if ACCUM is a SUBREG,
2954          we've set the inner register and must properly indicate
2955          that.  */
2956
2957       tem = op0, nmode = mode;
2958       if (GET_CODE (accum) == SUBREG)
2959         {
2960           nmode = GET_MODE (SUBREG_REG (accum));
2961           tem = gen_lowpart (nmode, op0);
2962         }
2963
2964       insn = get_last_insn ();
2965       set_unique_reg_note (insn, REG_EQUAL,
2966                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
2967     }
2968
2969   if (variant == negate_variant)
2970     {
2971       val_so_far = -val_so_far;
2972       accum = expand_unop (mode, neg_optab, accum, target, 0);
2973     }
2974   else if (variant == add_variant)
2975     {
2976       val_so_far = val_so_far + 1;
2977       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2978     }
2979
2980   /* Compare only the bits of val and val_so_far that are significant
2981      in the result mode, to avoid sign-/zero-extension confusion.  */
2982   val &= GET_MODE_MASK (mode);
2983   val_so_far &= GET_MODE_MASK (mode);
2984   gcc_assert (val == val_so_far);
2985
2986   return accum;
2987 }
2988
2989 /* Perform a multiplication and return an rtx for the result.
2990    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2991    TARGET is a suggestion for where to store the result (an rtx).
2992
2993    We check specially for a constant integer as OP1.
2994    If you want this check for OP0 as well, then before calling
2995    you should swap the two operands if OP0 would be constant.  */
2996
2997 rtx
2998 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2999              int unsignedp)
3000 {
3001   rtx const_op1 = op1;
3002   enum mult_variant variant;
3003   struct algorithm algorithm;
3004
3005   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3006      less than or equal in size to `unsigned int' this doesn't matter.
3007      If the mode is larger than `unsigned int', then synth_mult works only
3008      if the constant value exactly fits in an `unsigned int' without any
3009      truncation.  This means that multiplying by negative values does
3010      not work; results are off by 2^32 on a 32 bit machine.  */
3011
3012   /* If we are multiplying in DImode, it may still be a win
3013      to try to work with shifts and adds.  */
3014   if (GET_CODE (op1) == CONST_DOUBLE
3015       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
3016       && HOST_BITS_PER_INT >= BITS_PER_WORD
3017       && CONST_DOUBLE_HIGH (op1) == 0)
3018     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
3019   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
3020            && GET_CODE (op1) == CONST_INT
3021            && INTVAL (op1) < 0)
3022     const_op1 = 0;
3023
3024   /* We used to test optimize here, on the grounds that it's better to
3025      produce a smaller program when -O is not used.
3026      But this causes such a terrible slowdown sometimes
3027      that it seems better to use synth_mult always.  */
3028
3029   if (const_op1 && GET_CODE (const_op1) == CONST_INT
3030       && (unsignedp || !flag_trapv))
3031     {
3032       HOST_WIDE_INT coeff = INTVAL (const_op1);
3033       int mult_cost;
3034
3035       /* Special case powers of two.  */
3036       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3037         {
3038           if (coeff == 0)
3039             return const0_rtx;
3040           if (coeff == 1)
3041             return op0;
3042           return expand_shift (LSHIFT_EXPR, mode, op0,
3043                                build_int_cst (NULL_TREE, floor_log2 (coeff)),
3044                                target, unsignedp);
3045         }
3046
3047       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
3048       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3049                                mult_cost))
3050         return expand_mult_const (mode, op0, coeff, target,
3051                                   &algorithm, variant);
3052     }
3053
3054   if (GET_CODE (op0) == CONST_DOUBLE)
3055     {
3056       rtx temp = op0;
3057       op0 = op1;
3058       op1 = temp;
3059     }
3060
3061   /* Expand x*2.0 as x+x.  */
3062   if (GET_CODE (op1) == CONST_DOUBLE
3063       && GET_MODE_CLASS (mode) == MODE_FLOAT)
3064     {
3065       REAL_VALUE_TYPE d;
3066       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3067
3068       if (REAL_VALUES_EQUAL (d, dconst2))
3069         {
3070           op0 = force_reg (GET_MODE (op0), op0);
3071           return expand_binop (mode, add_optab, op0, op0,
3072                                target, unsignedp, OPTAB_LIB_WIDEN);
3073         }
3074     }
3075
3076   /* This used to use umul_optab if unsigned, but for non-widening multiply
3077      there is no difference between signed and unsigned.  */
3078   op0 = expand_binop (mode,
3079                       ! unsignedp
3080                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3081                       ? smulv_optab : smul_optab,
3082                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3083   gcc_assert (op0);
3084   return op0;
3085 }
3086 \f
3087 /* Return the smallest n such that 2**n >= X.  */
3088
3089 int
3090 ceil_log2 (unsigned HOST_WIDE_INT x)
3091 {
3092   return floor_log2 (x - 1) + 1;
3093 }
3094
3095 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3096    replace division by D, and put the least significant N bits of the result
3097    in *MULTIPLIER_PTR and return the most significant bit.
3098
3099    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3100    needed precision is in PRECISION (should be <= N).
3101
3102    PRECISION should be as small as possible so this function can choose
3103    multiplier more freely.
3104
3105    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3106    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3107
3108    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3109    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3110
3111 static
3112 unsigned HOST_WIDE_INT
3113 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3114                    unsigned HOST_WIDE_INT *multiplier_ptr,
3115                    int *post_shift_ptr, int *lgup_ptr)
3116 {
3117   HOST_WIDE_INT mhigh_hi, mlow_hi;
3118   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3119   int lgup, post_shift;
3120   int pow, pow2;
3121   unsigned HOST_WIDE_INT nl, dummy1;
3122   HOST_WIDE_INT nh, dummy2;
3123
3124   /* lgup = ceil(log2(divisor)); */
3125   lgup = ceil_log2 (d);
3126
3127   gcc_assert (lgup <= n);
3128
3129   pow = n + lgup;
3130   pow2 = n + lgup - precision;
3131
3132   /* We could handle this with some effort, but this case is much
3133      better handled directly with a scc insn, so rely on caller using
3134      that.  */
3135   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3136
3137   /* mlow = 2^(N + lgup)/d */
3138  if (pow >= HOST_BITS_PER_WIDE_INT)
3139     {
3140       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3141       nl = 0;
3142     }
3143   else
3144     {
3145       nh = 0;
3146       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3147     }
3148   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3149                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3150
3151   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3152   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3153     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3154   else
3155     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3156   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3157                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3158
3159   gcc_assert (!mhigh_hi || nh - d < d);
3160   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3161   /* Assert that mlow < mhigh.  */
3162   gcc_assert (mlow_hi < mhigh_hi
3163               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3164
3165   /* If precision == N, then mlow, mhigh exceed 2^N
3166      (but they do not exceed 2^(N+1)).  */
3167
3168   /* Reduce to lowest terms.  */
3169   for (post_shift = lgup; post_shift > 0; post_shift--)
3170     {
3171       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3172       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3173       if (ml_lo >= mh_lo)
3174         break;
3175
3176       mlow_hi = 0;
3177       mlow_lo = ml_lo;
3178       mhigh_hi = 0;
3179       mhigh_lo = mh_lo;
3180     }
3181
3182   *post_shift_ptr = post_shift;
3183   *lgup_ptr = lgup;
3184   if (n < HOST_BITS_PER_WIDE_INT)
3185     {
3186       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3187       *multiplier_ptr = mhigh_lo & mask;
3188       return mhigh_lo >= mask;
3189     }
3190   else
3191     {
3192       *multiplier_ptr = mhigh_lo;
3193       return mhigh_hi;
3194     }
3195 }
3196
3197 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3198    congruent to 1 (mod 2**N).  */
3199
3200 static unsigned HOST_WIDE_INT
3201 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3202 {
3203   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3204
3205   /* The algorithm notes that the choice y = x satisfies
3206      x*y == 1 mod 2^3, since x is assumed odd.
3207      Each iteration doubles the number of bits of significance in y.  */
3208
3209   unsigned HOST_WIDE_INT mask;
3210   unsigned HOST_WIDE_INT y = x;
3211   int nbit = 3;
3212
3213   mask = (n == HOST_BITS_PER_WIDE_INT
3214           ? ~(unsigned HOST_WIDE_INT) 0
3215           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3216
3217   while (nbit < n)
3218     {
3219       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3220       nbit *= 2;
3221     }
3222   return y;
3223 }
3224
3225 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3226    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3227    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3228    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3229    become signed.
3230
3231    The result is put in TARGET if that is convenient.
3232
3233    MODE is the mode of operation.  */
3234
3235 rtx
3236 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3237                              rtx op1, rtx target, int unsignedp)
3238 {
3239   rtx tem;
3240   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3241
3242   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3243                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3244                       NULL_RTX, 0);
3245   tem = expand_and (mode, tem, op1, NULL_RTX);
3246   adj_operand
3247     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3248                      adj_operand);
3249
3250   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3251                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3252                       NULL_RTX, 0);
3253   tem = expand_and (mode, tem, op0, NULL_RTX);
3254   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3255                           target);
3256
3257   return target;
3258 }
3259
3260 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3261
3262 static rtx
3263 extract_high_half (enum machine_mode mode, rtx op)
3264 {
3265   enum machine_mode wider_mode;
3266
3267   if (mode == word_mode)
3268     return gen_highpart (mode, op);
3269
3270   wider_mode = GET_MODE_WIDER_MODE (mode);
3271   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3272                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3273   return convert_modes (mode, wider_mode, op, 0);
3274 }
3275
3276 /* Like expand_mult_highpart, but only consider using a multiplication
3277    optab.  OP1 is an rtx for the constant operand.  */
3278
3279 static rtx
3280 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3281                             rtx target, int unsignedp, int max_cost)
3282 {
3283   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3284   enum machine_mode wider_mode;
3285   optab moptab;
3286   rtx tem;
3287   int size;
3288
3289   wider_mode = GET_MODE_WIDER_MODE (mode);
3290   size = GET_MODE_BITSIZE (mode);
3291
3292   /* Firstly, try using a multiplication insn that only generates the needed
3293      high part of the product, and in the sign flavor of unsignedp.  */
3294   if (mul_highpart_cost[mode] < max_cost)
3295     {
3296       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3297       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3298                           unsignedp, OPTAB_DIRECT);
3299       if (tem)
3300         return tem;
3301     }
3302
3303   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3304      Need to adjust the result after the multiplication.  */
3305   if (size - 1 < BITS_PER_WORD
3306       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3307           + 4 * add_cost[mode] < max_cost))
3308     {
3309       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3310       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3311                           unsignedp, OPTAB_DIRECT);
3312       if (tem)
3313         /* We used the wrong signedness.  Adjust the result.  */
3314         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3315                                             tem, unsignedp);
3316     }
3317
3318   /* Try widening multiplication.  */
3319   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3320   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3321       && mul_widen_cost[wider_mode] < max_cost)
3322     {
3323       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3324                           unsignedp, OPTAB_WIDEN);
3325       if (tem)
3326         return extract_high_half (mode, tem);
3327     }
3328
3329   /* Try widening the mode and perform a non-widening multiplication.  */
3330   moptab = smul_optab;
3331   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3332       && size - 1 < BITS_PER_WORD
3333       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3334     {
3335       tem = expand_binop (wider_mode, moptab, op0, op1, 0,
3336                           unsignedp, OPTAB_WIDEN);
3337       if (tem)
3338         return extract_high_half (mode, tem);
3339     }
3340
3341   /* Try widening multiplication of opposite signedness, and adjust.  */
3342   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3343   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3344       && size - 1 < BITS_PER_WORD
3345       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3346           + 4 * add_cost[mode] < max_cost))
3347     {
3348       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3349                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3350       if (tem != 0)
3351         {
3352           tem = extract_high_half (mode, tem);
3353           /* We used the wrong signedness.  Adjust the result.  */
3354           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3355                                               target, unsignedp);
3356         }
3357     }
3358
3359   return 0;
3360 }
3361
3362 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
3363    in TARGET if that is convenient, and return where the result is.  If the
3364    operation can not be performed, 0 is returned.
3365
3366    MODE is the mode of operation and result.
3367
3368    UNSIGNEDP nonzero means unsigned multiply.
3369
3370    MAX_COST is the total allowed cost for the expanded RTL.  */
3371
3372 rtx
3373 expand_mult_highpart (enum machine_mode mode, rtx op0,
3374                       unsigned HOST_WIDE_INT cnst1, rtx target,
3375                       int unsignedp, int max_cost)
3376 {
3377   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3378   int extra_cost;
3379   bool sign_adjust = false;
3380   enum mult_variant variant;
3381   struct algorithm alg;
3382   rtx op1, tem;
3383
3384   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3385   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3386
3387   op1 = gen_int_mode (cnst1, wider_mode);
3388   cnst1 &= GET_MODE_MASK (mode);
3389
3390   /* We can't optimize modes wider than BITS_PER_WORD.
3391      ??? We might be able to perform double-word arithmetic if
3392      mode == word_mode, however all the cost calculations in
3393      synth_mult etc. assume single-word operations.  */
3394   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3395     return expand_mult_highpart_optab (mode, op0, op1, target,
3396                                        unsignedp, max_cost);
3397
3398   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3399
3400   /* Check whether we try to multiply by a negative constant.  */
3401   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3402     {
3403       sign_adjust = true;
3404       extra_cost += add_cost[mode];
3405     }
3406
3407   /* See whether shift/add multiplication is cheap enough.  */
3408   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3409                            max_cost - extra_cost))
3410     {
3411       /* See whether the specialized multiplication optabs are
3412          cheaper than the shift/add version.  */
3413       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3414                                         alg.cost.cost + extra_cost);
3415       if (tem)
3416         return tem;
3417
3418       tem = convert_to_mode (wider_mode, op0, unsignedp);
3419       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3420       tem = extract_high_half (mode, tem);
3421
3422       /* Adjust result for signedness.  */
3423       if (sign_adjust)
3424         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3425
3426       return tem;
3427     }
3428   return expand_mult_highpart_optab (mode, op0, op1, target,
3429                                      unsignedp, max_cost);
3430 }
3431
3432
3433 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3434
3435 static rtx
3436 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3437 {
3438   unsigned HOST_WIDE_INT masklow, maskhigh;
3439   rtx result, temp, shift, label;
3440   int logd;
3441
3442   logd = floor_log2 (d);
3443   result = gen_reg_rtx (mode);
3444
3445   /* Avoid conditional branches when they're expensive.  */
3446   if (BRANCH_COST >= 2
3447       && !optimize_size)
3448     {
3449       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3450                                       mode, 0, -1);
3451       if (signmask)
3452         {
3453           signmask = force_reg (mode, signmask);
3454           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3455           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3456
3457           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3458              which instruction sequence to use.  If logical right shifts
3459              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3460              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3461
3462           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3463           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3464               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3465             {
3466               temp = expand_binop (mode, xor_optab, op0, signmask,
3467                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3468               temp = expand_binop (mode, sub_optab, temp, signmask,
3469                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3470               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3471                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3472               temp = expand_binop (mode, xor_optab, temp, signmask,
3473                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3474               temp = expand_binop (mode, sub_optab, temp, signmask,
3475                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3476             }
3477           else
3478             {
3479               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3480                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3481               signmask = force_reg (mode, signmask);
3482
3483               temp = expand_binop (mode, add_optab, op0, signmask,
3484                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3485               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3486                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3487               temp = expand_binop (mode, sub_optab, temp, signmask,
3488                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3489             }
3490           return temp;
3491         }
3492     }
3493
3494   /* Mask contains the mode's signbit and the significant bits of the
3495      modulus.  By including the signbit in the operation, many targets
3496      can avoid an explicit compare operation in the following comparison
3497      against zero.  */
3498
3499   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3500   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3501     {
3502       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3503       maskhigh = -1;
3504     }
3505   else
3506     maskhigh = (HOST_WIDE_INT) -1
3507                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3508
3509   temp = expand_binop (mode, and_optab, op0,
3510                        immed_double_const (masklow, maskhigh, mode),
3511                        result, 1, OPTAB_LIB_WIDEN);
3512   if (temp != result)
3513     emit_move_insn (result, temp);
3514
3515   label = gen_label_rtx ();
3516   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3517
3518   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3519                        0, OPTAB_LIB_WIDEN);
3520   masklow = (HOST_WIDE_INT) -1 << logd;
3521   maskhigh = -1;
3522   temp = expand_binop (mode, ior_optab, temp,
3523                        immed_double_const (masklow, maskhigh, mode),
3524                        result, 1, OPTAB_LIB_WIDEN);
3525   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3526                        0, OPTAB_LIB_WIDEN);
3527   if (temp != result)
3528     emit_move_insn (result, temp);
3529   emit_label (label);
3530   return result;
3531 }
3532
3533 /* Expand signed division of OP0 by a power of two D in mode MODE.
3534    This routine is only called for positive values of D.  */
3535
3536 static rtx
3537 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3538 {
3539   rtx temp, label;
3540   tree shift;
3541   int logd;
3542
3543   logd = floor_log2 (d);
3544   shift = build_int_cst (NULL_TREE, logd);
3545
3546   if (d == 2 && BRANCH_COST >= 1)
3547     {
3548       temp = gen_reg_rtx (mode);
3549       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3550       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3551                            0, OPTAB_LIB_WIDEN);
3552       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3553     }
3554
3555 #ifdef HAVE_conditional_move
3556   if (BRANCH_COST >= 2)
3557     {
3558       rtx temp2;
3559
3560       /* ??? emit_conditional_move forces a stack adjustment via
3561          compare_from_rtx so, if the sequence is discarded, it will
3562          be lost.  Do it now instead.  */
3563       do_pending_stack_adjust ();
3564
3565       start_sequence ();
3566       temp2 = copy_to_mode_reg (mode, op0);
3567       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3568                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3569       temp = force_reg (mode, temp);
3570
3571       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3572       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3573                                      mode, temp, temp2, mode, 0);
3574       if (temp2)
3575         {
3576           rtx seq = get_insns ();
3577           end_sequence ();
3578           emit_insn (seq);
3579           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3580         }
3581       end_sequence ();
3582     }
3583 #endif
3584
3585   if (BRANCH_COST >= 2)
3586     {
3587       int ushift = GET_MODE_BITSIZE (mode) - logd;
3588
3589       temp = gen_reg_rtx (mode);
3590       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3591       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3592         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3593                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3594       else
3595         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3596                              build_int_cst (NULL_TREE, ushift),
3597                              NULL_RTX, 1);
3598       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3599                            0, OPTAB_LIB_WIDEN);
3600       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3601     }
3602
3603   label = gen_label_rtx ();
3604   temp = copy_to_mode_reg (mode, op0);
3605   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3606   expand_inc (temp, GEN_INT (d - 1));
3607   emit_label (label);
3608   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3609 }
3610 \f
3611 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3612    if that is convenient, and returning where the result is.
3613    You may request either the quotient or the remainder as the result;
3614    specify REM_FLAG nonzero to get the remainder.
3615
3616    CODE is the expression code for which kind of division this is;
3617    it controls how rounding is done.  MODE is the machine mode to use.
3618    UNSIGNEDP nonzero means do unsigned division.  */
3619
3620 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3621    and then correct it by or'ing in missing high bits
3622    if result of ANDI is nonzero.
3623    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3624    This could optimize to a bfexts instruction.
3625    But C doesn't use these operations, so their optimizations are
3626    left for later.  */
3627 /* ??? For modulo, we don't actually need the highpart of the first product,
3628    the low part will do nicely.  And for small divisors, the second multiply
3629    can also be a low-part only multiply or even be completely left out.
3630    E.g. to calculate the remainder of a division by 3 with a 32 bit
3631    multiply, multiply with 0x55555556 and extract the upper two bits;
3632    the result is exact for inputs up to 0x1fffffff.
3633    The input range can be reduced by using cross-sum rules.
3634    For odd divisors >= 3, the following table gives right shift counts
3635    so that if a number is shifted by an integer multiple of the given
3636    amount, the remainder stays the same:
3637    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3638    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3639    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3640    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3641    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3642
3643    Cross-sum rules for even numbers can be derived by leaving as many bits
3644    to the right alone as the divisor has zeros to the right.
3645    E.g. if x is an unsigned 32 bit number:
3646    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3647    */
3648
3649 rtx
3650 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3651                rtx op0, rtx op1, rtx target, int unsignedp)
3652 {
3653   enum machine_mode compute_mode;
3654   rtx tquotient;
3655   rtx quotient = 0, remainder = 0;
3656   rtx last;
3657   int size;
3658   rtx insn, set;
3659   optab optab1, optab2;
3660   int op1_is_constant, op1_is_pow2 = 0;
3661   int max_cost, extra_cost;
3662   static HOST_WIDE_INT last_div_const = 0;
3663   static HOST_WIDE_INT ext_op1;
3664
3665   op1_is_constant = GET_CODE (op1) == CONST_INT;
3666   if (op1_is_constant)
3667     {
3668       ext_op1 = INTVAL (op1);
3669       if (unsignedp)
3670         ext_op1 &= GET_MODE_MASK (mode);
3671       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3672                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3673     }
3674
3675   /*
3676      This is the structure of expand_divmod:
3677
3678      First comes code to fix up the operands so we can perform the operations
3679      correctly and efficiently.
3680
3681      Second comes a switch statement with code specific for each rounding mode.
3682      For some special operands this code emits all RTL for the desired
3683      operation, for other cases, it generates only a quotient and stores it in
3684      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3685      to indicate that it has not done anything.
3686
3687      Last comes code that finishes the operation.  If QUOTIENT is set and
3688      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3689      QUOTIENT is not set, it is computed using trunc rounding.
3690
3691      We try to generate special code for division and remainder when OP1 is a
3692      constant.  If |OP1| = 2**n we can use shifts and some other fast
3693      operations.  For other values of OP1, we compute a carefully selected
3694      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3695      by m.
3696
3697      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3698      half of the product.  Different strategies for generating the product are
3699      implemented in expand_mult_highpart.
3700
3701      If what we actually want is the remainder, we generate that by another
3702      by-constant multiplication and a subtraction.  */
3703
3704   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3705      code below will malfunction if we are, so check here and handle
3706      the special case if so.  */
3707   if (op1 == const1_rtx)
3708     return rem_flag ? const0_rtx : op0;
3709
3710     /* When dividing by -1, we could get an overflow.
3711      negv_optab can handle overflows.  */
3712   if (! unsignedp && op1 == constm1_rtx)
3713     {
3714       if (rem_flag)
3715         return const0_rtx;
3716       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3717                           ? negv_optab : neg_optab, op0, target, 0);
3718     }
3719
3720   if (target
3721       /* Don't use the function value register as a target
3722          since we have to read it as well as write it,
3723          and function-inlining gets confused by this.  */
3724       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3725           /* Don't clobber an operand while doing a multi-step calculation.  */
3726           || ((rem_flag || op1_is_constant)
3727               && (reg_mentioned_p (target, op0)
3728                   || (MEM_P (op0) && MEM_P (target))))
3729           || reg_mentioned_p (target, op1)
3730           || (MEM_P (op1) && MEM_P (target))))
3731     target = 0;
3732
3733   /* Get the mode in which to perform this computation.  Normally it will
3734      be MODE, but sometimes we can't do the desired operation in MODE.
3735      If so, pick a wider mode in which we can do the operation.  Convert
3736      to that mode at the start to avoid repeated conversions.
3737
3738      First see what operations we need.  These depend on the expression
3739      we are evaluating.  (We assume that divxx3 insns exist under the
3740      same conditions that modxx3 insns and that these insns don't normally
3741      fail.  If these assumptions are not correct, we may generate less
3742      efficient code in some cases.)
3743
3744      Then see if we find a mode in which we can open-code that operation
3745      (either a division, modulus, or shift).  Finally, check for the smallest
3746      mode for which we can do the operation with a library call.  */
3747
3748   /* We might want to refine this now that we have division-by-constant
3749      optimization.  Since expand_mult_highpart tries so many variants, it is
3750      not straightforward to generalize this.  Maybe we should make an array
3751      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3752
3753   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3754             ? (unsignedp ? lshr_optab : ashr_optab)
3755             : (unsignedp ? udiv_optab : sdiv_optab));
3756   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3757             ? optab1
3758             : (unsignedp ? udivmod_optab : sdivmod_optab));
3759
3760   for (compute_mode = mode; compute_mode != VOIDmode;
3761        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3762     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3763         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3764       break;
3765
3766   if (compute_mode == VOIDmode)
3767     for (compute_mode = mode; compute_mode != VOIDmode;
3768          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3769       if (optab1->handlers[compute_mode].libfunc
3770           || optab2->handlers[compute_mode].libfunc)
3771         break;
3772
3773   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3774      in expand_binop.  */
3775   if (compute_mode == VOIDmode)
3776     compute_mode = mode;
3777
3778   if (target && GET_MODE (target) == compute_mode)
3779     tquotient = target;
3780   else
3781     tquotient = gen_reg_rtx (compute_mode);
3782
3783   size = GET_MODE_BITSIZE (compute_mode);
3784 #if 0
3785   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3786      (mode), and thereby get better code when OP1 is a constant.  Do that
3787      later.  It will require going over all usages of SIZE below.  */
3788   size = GET_MODE_BITSIZE (mode);
3789 #endif
3790
3791   /* Only deduct something for a REM if the last divide done was
3792      for a different constant.   Then set the constant of the last
3793      divide.  */
3794   max_cost = div_cost[compute_mode]
3795     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3796                       && INTVAL (op1) == last_div_const)
3797        ? mul_cost[compute_mode] + add_cost[compute_mode]
3798        : 0);
3799
3800   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3801
3802   /* Now convert to the best mode to use.  */
3803   if (compute_mode != mode)
3804     {
3805       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3806       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3807
3808       /* convert_modes may have placed op1 into a register, so we
3809          must recompute the following.  */
3810       op1_is_constant = GET_CODE (op1) == CONST_INT;
3811       op1_is_pow2 = (op1_is_constant
3812                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3813                           || (! unsignedp
3814                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3815     }
3816
3817   /* If one of the operands is a volatile MEM, copy it into a register.  */
3818
3819   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3820     op0 = force_reg (compute_mode, op0);
3821   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3822     op1 = force_reg (compute_mode, op1);
3823
3824   /* If we need the remainder or if OP1 is constant, we need to
3825      put OP0 in a register in case it has any queued subexpressions.  */
3826   if (rem_flag || op1_is_constant)
3827     op0 = force_reg (compute_mode, op0);
3828
3829   last = get_last_insn ();
3830
3831   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3832   if (unsignedp)
3833     {
3834       if (code == FLOOR_DIV_EXPR)
3835         code = TRUNC_DIV_EXPR;
3836       if (code == FLOOR_MOD_EXPR)
3837         code = TRUNC_MOD_EXPR;
3838       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3839         code = TRUNC_DIV_EXPR;
3840     }
3841
3842   if (op1 != const0_rtx)
3843     switch (code)
3844       {
3845       case TRUNC_MOD_EXPR:
3846       case TRUNC_DIV_EXPR:
3847         if (op1_is_constant)
3848           {
3849             if (unsignedp)
3850               {
3851                 unsigned HOST_WIDE_INT mh, ml;
3852                 int pre_shift, post_shift;
3853                 int dummy;
3854                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3855                                             & GET_MODE_MASK (compute_mode));
3856
3857                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3858                   {
3859                     pre_shift = floor_log2 (d);
3860                     if (rem_flag)
3861                       {
3862                         remainder
3863                           = expand_binop (compute_mode, and_optab, op0,
3864                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3865                                           remainder, 1,
3866                                           OPTAB_LIB_WIDEN);
3867                         if (remainder)
3868                           return gen_lowpart (mode, remainder);
3869                       }
3870                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3871                                              build_int_cst (NULL_TREE,
3872                                                             pre_shift),
3873                                              tquotient, 1);
3874                   }
3875                 else if (size <= HOST_BITS_PER_WIDE_INT)
3876                   {
3877                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3878                       {
3879                         /* Most significant bit of divisor is set; emit an scc
3880                            insn.  */
3881                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3882                                                     compute_mode, 1, 1);
3883                         if (quotient == 0)
3884                           goto fail1;
3885                       }
3886                     else
3887                       {
3888                         /* Find a suitable multiplier and right shift count
3889                            instead of multiplying with D.  */
3890
3891                         mh = choose_multiplier (d, size, size,
3892                                                 &ml, &post_shift, &dummy);
3893
3894                         /* If the suggested multiplier is more than SIZE bits,
3895                            we can do better for even divisors, using an
3896                            initial right shift.  */
3897                         if (mh != 0 && (d & 1) == 0)
3898                           {
3899                             pre_shift = floor_log2 (d & -d);
3900                             mh = choose_multiplier (d >> pre_shift, size,
3901                                                     size - pre_shift,
3902                                                     &ml, &post_shift, &dummy);
3903                             gcc_assert (!mh);
3904                           }
3905                         else
3906                           pre_shift = 0;
3907
3908                         if (mh != 0)
3909                           {
3910                             rtx t1, t2, t3, t4;
3911
3912                             if (post_shift - 1 >= BITS_PER_WORD)
3913                               goto fail1;
3914
3915                             extra_cost
3916                               = (shift_cost[compute_mode][post_shift - 1]
3917                                  + shift_cost[compute_mode][1]
3918                                  + 2 * add_cost[compute_mode]);
3919                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3920                                                        NULL_RTX, 1,
3921                                                        max_cost - extra_cost);
3922                             if (t1 == 0)
3923                               goto fail1;
3924                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3925                                                                op0, t1),
3926                                                 NULL_RTX);
3927                             t3 = expand_shift
3928                               (RSHIFT_EXPR, compute_mode, t2,
3929                                build_int_cst (NULL_TREE, 1),
3930                                NULL_RTX,1);
3931                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3932                                                               t1, t3),
3933                                                 NULL_RTX);
3934                             quotient = expand_shift
3935                               (RSHIFT_EXPR, compute_mode, t4,
3936                                build_int_cst (NULL_TREE, post_shift - 1),
3937                                tquotient, 1);
3938                           }
3939                         else
3940                           {
3941                             rtx t1, t2;
3942
3943                             if (pre_shift >= BITS_PER_WORD
3944                                 || post_shift >= BITS_PER_WORD)
3945                               goto fail1;
3946
3947                             t1 = expand_shift
3948                               (RSHIFT_EXPR, compute_mode, op0,
3949                                build_int_cst (NULL_TREE, pre_shift),
3950                                NULL_RTX, 1);
3951                             extra_cost
3952                               = (shift_cost[compute_mode][pre_shift]
3953                                  + shift_cost[compute_mode][post_shift]);
3954                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3955                                                        NULL_RTX, 1,
3956                                                        max_cost - extra_cost);
3957                             if (t2 == 0)
3958                               goto fail1;
3959                             quotient = expand_shift
3960                               (RSHIFT_EXPR, compute_mode, t2,
3961                                build_int_cst (NULL_TREE, post_shift),
3962                                tquotient, 1);
3963                           }
3964                       }
3965                   }
3966                 else            /* Too wide mode to use tricky code */
3967                   break;
3968
3969                 insn = get_last_insn ();
3970                 if (insn != last
3971                     && (set = single_set (insn)) != 0
3972                     && SET_DEST (set) == quotient)
3973                   set_unique_reg_note (insn,
3974                                        REG_EQUAL,
3975                                        gen_rtx_UDIV (compute_mode, op0, op1));
3976               }
3977             else                /* TRUNC_DIV, signed */
3978               {
3979                 unsigned HOST_WIDE_INT ml;
3980                 int lgup, post_shift;
3981                 HOST_WIDE_INT d = INTVAL (op1);
3982                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3983
3984                 /* n rem d = n rem -d */
3985                 if (rem_flag && d < 0)
3986                   {
3987                     d = abs_d;
3988                     op1 = gen_int_mode (abs_d, compute_mode);
3989                   }
3990
3991                 if (d == 1)
3992                   quotient = op0;
3993                 else if (d == -1)
3994                   quotient = expand_unop (compute_mode, neg_optab, op0,
3995                                           tquotient, 0);
3996                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3997                   {
3998                     /* This case is not handled correctly below.  */
3999                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4000                                                 compute_mode, 1, 1);
4001                     if (quotient == 0)
4002                       goto fail1;
4003                   }
4004                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4005                          && (rem_flag ? smod_pow2_cheap[compute_mode]
4006                                       : sdiv_pow2_cheap[compute_mode])
4007                          /* We assume that cheap metric is true if the
4008                             optab has an expander for this mode.  */
4009                          && (((rem_flag ? smod_optab : sdiv_optab)
4010                               ->handlers[compute_mode].insn_code
4011                               != CODE_FOR_nothing)
4012                              || (sdivmod_optab->handlers[compute_mode]
4013                                  .insn_code != CODE_FOR_nothing)))
4014                   ;
4015                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4016                   {
4017                     if (rem_flag)
4018                       {
4019                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4020                         if (remainder)
4021                           return gen_lowpart (mode, remainder);
4022                       }
4023
4024                     if (sdiv_pow2_cheap[compute_mode]
4025                         && ((sdiv_optab->handlers[compute_mode].insn_code
4026                              != CODE_FOR_nothing)
4027                             || (sdivmod_optab->handlers[compute_mode].insn_code
4028                                 != CODE_FOR_nothing)))
4029                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4030                                                 compute_mode, op0,
4031                                                 gen_int_mode (abs_d,
4032                                                               compute_mode),
4033                                                 NULL_RTX, 0);
4034                     else
4035                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4036
4037                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4038                        negate the quotient.  */
4039                     if (d < 0)
4040                       {
4041                         insn = get_last_insn ();
4042                         if (insn != last
4043                             && (set = single_set (insn)) != 0
4044                             && SET_DEST (set) == quotient
4045                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4046                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4047                           set_unique_reg_note (insn,
4048                                                REG_EQUAL,
4049                                                gen_rtx_DIV (compute_mode,
4050                                                             op0,
4051                                                             GEN_INT
4052                                                             (trunc_int_for_mode
4053                                                              (abs_d,
4054                                                               compute_mode))));
4055
4056                         quotient = expand_unop (compute_mode, neg_optab,
4057                                                 quotient, quotient, 0);
4058                       }
4059                   }
4060                 else if (size <= HOST_BITS_PER_WIDE_INT)
4061                   {
4062                     choose_multiplier (abs_d, size, size - 1,
4063                                        &ml, &post_shift, &lgup);
4064                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4065                       {
4066                         rtx t1, t2, t3;
4067
4068                         if (post_shift >= BITS_PER_WORD
4069                             || size - 1 >= BITS_PER_WORD)
4070                           goto fail1;
4071
4072                         extra_cost = (shift_cost[compute_mode][post_shift]
4073                                       + shift_cost[compute_mode][size - 1]
4074                                       + add_cost[compute_mode]);
4075                         t1 = expand_mult_highpart (compute_mode, op0, ml,
4076                                                    NULL_RTX, 0,
4077                                                    max_cost - extra_cost);
4078                         if (t1 == 0)
4079                           goto fail1;
4080                         t2 = expand_shift
4081                           (RSHIFT_EXPR, compute_mode, t1,
4082                            build_int_cst (NULL_TREE, post_shift),
4083                            NULL_RTX, 0);
4084                         t3 = expand_shift
4085                           (RSHIFT_EXPR, compute_mode, op0,
4086                            build_int_cst (NULL_TREE, size - 1),
4087                            NULL_RTX, 0);
4088                         if (d < 0)
4089                           quotient
4090                             = force_operand (gen_rtx_MINUS (compute_mode,
4091                                                             t3, t2),
4092                                              tquotient);
4093                         else
4094                           quotient
4095                             = force_operand (gen_rtx_MINUS (compute_mode,
4096                                                             t2, t3),
4097                                              tquotient);
4098                       }
4099                     else
4100                       {
4101                         rtx t1, t2, t3, t4;
4102
4103                         if (post_shift >= BITS_PER_WORD
4104                             || size - 1 >= BITS_PER_WORD)
4105                           goto fail1;
4106
4107                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4108                         extra_cost = (shift_cost[compute_mode][post_shift]
4109                                       + shift_cost[compute_mode][size - 1]
4110                                       + 2 * add_cost[compute_mode]);
4111                         t1 = expand_mult_highpart (compute_mode, op0, ml,
4112                                                    NULL_RTX, 0,
4113                                                    max_cost - extra_cost);
4114                         if (t1 == 0)
4115                           goto fail1;
4116                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4117                                                           t1, op0),
4118                                             NULL_RTX);
4119                         t3 = expand_shift
4120                           (RSHIFT_EXPR, compute_mode, t2,
4121                            build_int_cst (NULL_TREE, post_shift),
4122                            NULL_RTX, 0);
4123                         t4 = expand_shift
4124                           (RSHIFT_EXPR, compute_mode, op0,
4125                            build_int_cst (NULL_TREE, size - 1),
4126                            NULL_RTX, 0);
4127                         if (d < 0)
4128                           quotient
4129                             = force_operand (gen_rtx_MINUS (compute_mode,
4130                                                             t4, t3),
4131                                              tquotient);
4132                         else
4133                           quotient
4134                             = force_operand (gen_rtx_MINUS (compute_mode,
4135                                                             t3, t4),
4136                                              tquotient);
4137                       }
4138                   }
4139                 else            /* Too wide mode to use tricky code */
4140                   break;
4141
4142                 insn = get_last_insn ();
4143                 if (insn != last
4144                     && (set = single_set (insn)) != 0
4145                     && SET_DEST (set) == quotient)
4146                   set_unique_reg_note (insn,
4147                                        REG_EQUAL,
4148                                        gen_rtx_DIV (compute_mode, op0, op1));
4149               }
4150             break;
4151           }
4152       fail1:
4153         delete_insns_since (last);
4154         break;
4155
4156       case FLOOR_DIV_EXPR:
4157       case FLOOR_MOD_EXPR:
4158       /* We will come here only for signed operations.  */
4159         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4160           {
4161             unsigned HOST_WIDE_INT mh, ml;
4162             int pre_shift, lgup, post_shift;
4163             HOST_WIDE_INT d = INTVAL (op1);
4164
4165             if (d > 0)
4166               {
4167                 /* We could just as easily deal with negative constants here,
4168                    but it does not seem worth the trouble for GCC 2.6.  */
4169                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4170                   {
4171                     pre_shift = floor_log2 (d);
4172                     if (rem_flag)
4173                       {
4174                         remainder = expand_binop (compute_mode, and_optab, op0,
4175                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4176                                                   remainder, 0, OPTAB_LIB_WIDEN);
4177                         if (remainder)
4178                           return gen_lowpart (mode, remainder);
4179                       }
4180                     quotient = expand_shift
4181                       (RSHIFT_EXPR, compute_mode, op0,
4182                        build_int_cst (NULL_TREE, pre_shift),
4183                        tquotient, 0);
4184                   }
4185                 else
4186                   {
4187                     rtx t1, t2, t3, t4;
4188
4189                     mh = choose_multiplier (d, size, size - 1,
4190                                             &ml, &post_shift, &lgup);
4191                     gcc_assert (!mh);
4192
4193                     if (post_shift < BITS_PER_WORD
4194                         && size - 1 < BITS_PER_WORD)
4195                       {
4196                         t1 = expand_shift
4197                           (RSHIFT_EXPR, compute_mode, op0,
4198                            build_int_cst (NULL_TREE, size - 1),
4199                            NULL_RTX, 0);
4200                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4201                                            NULL_RTX, 0, OPTAB_WIDEN);
4202                         extra_cost = (shift_cost[compute_mode][post_shift]
4203                                       + shift_cost[compute_mode][size - 1]
4204                                       + 2 * add_cost[compute_mode]);
4205                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4206                                                    NULL_RTX, 1,
4207                                                    max_cost - extra_cost);
4208                         if (t3 != 0)
4209                           {
4210                             t4 = expand_shift
4211                               (RSHIFT_EXPR, compute_mode, t3,
4212                                build_int_cst (NULL_TREE, post_shift),
4213                                NULL_RTX, 1);
4214                             quotient = expand_binop (compute_mode, xor_optab,
4215                                                      t4, t1, tquotient, 0,
4216                                                      OPTAB_WIDEN);
4217                           }
4218                       }
4219                   }
4220               }
4221             else
4222               {
4223                 rtx nsign, t1, t2, t3, t4;
4224                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4225                                                   op0, constm1_rtx), NULL_RTX);
4226                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4227                                    0, OPTAB_WIDEN);
4228                 nsign = expand_shift
4229                   (RSHIFT_EXPR, compute_mode, t2,
4230                    build_int_cst (NULL_TREE, size - 1),
4231                    NULL_RTX, 0);
4232                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4233                                     NULL_RTX);
4234                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4235                                     NULL_RTX, 0);
4236                 if (t4)
4237                   {
4238                     rtx t5;
4239                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4240                                       NULL_RTX, 0);
4241                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4242                                                             t4, t5),
4243                                               tquotient);
4244                   }
4245               }
4246           }
4247
4248         if (quotient != 0)
4249           break;
4250         delete_insns_since (last);
4251
4252         /* Try using an instruction that produces both the quotient and
4253            remainder, using truncation.  We can easily compensate the quotient
4254            or remainder to get floor rounding, once we have the remainder.
4255            Notice that we compute also the final remainder value here,
4256            and return the result right away.  */
4257         if (target == 0 || GET_MODE (target) != compute_mode)
4258           target = gen_reg_rtx (compute_mode);
4259
4260         if (rem_flag)
4261           {
4262             remainder
4263               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4264             quotient = gen_reg_rtx (compute_mode);
4265           }
4266         else
4267           {
4268             quotient
4269               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4270             remainder = gen_reg_rtx (compute_mode);
4271           }
4272
4273         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4274                                  quotient, remainder, 0))
4275           {
4276             /* This could be computed with a branch-less sequence.
4277                Save that for later.  */
4278             rtx tem;
4279             rtx label = gen_label_rtx ();
4280             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4281             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4282                                 NULL_RTX, 0, OPTAB_WIDEN);
4283             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4284             expand_dec (quotient, const1_rtx);
4285             expand_inc (remainder, op1);
4286             emit_label (label);
4287             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4288           }
4289
4290         /* No luck with division elimination or divmod.  Have to do it
4291            by conditionally adjusting op0 *and* the result.  */
4292         {
4293           rtx label1, label2, label3, label4, label5;
4294           rtx adjusted_op0;
4295           rtx tem;
4296
4297           quotient = gen_reg_rtx (compute_mode);
4298           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4299           label1 = gen_label_rtx ();
4300           label2 = gen_label_rtx ();
4301           label3 = gen_label_rtx ();
4302           label4 = gen_label_rtx ();
4303           label5 = gen_label_rtx ();
4304           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4305           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4306           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4307                               quotient, 0, OPTAB_LIB_WIDEN);
4308           if (tem != quotient)
4309             emit_move_insn (quotient, tem);
4310           emit_jump_insn (gen_jump (label5));
4311           emit_barrier ();
4312           emit_label (label1);
4313           expand_inc (adjusted_op0, const1_rtx);
4314           emit_jump_insn (gen_jump (label4));
4315           emit_barrier ();
4316           emit_label (label2);
4317           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4318           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4319                               quotient, 0, OPTAB_LIB_WIDEN);
4320           if (tem != quotient)
4321             emit_move_insn (quotient, tem);
4322           emit_jump_insn (gen_jump (label5));
4323           emit_barrier ();
4324           emit_label (label3);
4325           expand_dec (adjusted_op0, const1_rtx);
4326           emit_label (label4);
4327           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4328                               quotient, 0, OPTAB_LIB_WIDEN);
4329           if (tem != quotient)
4330             emit_move_insn (quotient, tem);
4331           expand_dec (quotient, const1_rtx);
4332           emit_label (label5);
4333         }
4334         break;
4335
4336       case CEIL_DIV_EXPR:
4337       case CEIL_MOD_EXPR:
4338         if (unsignedp)
4339           {
4340             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4341               {
4342                 rtx t1, t2, t3;
4343                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4344                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4345                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4346                                    tquotient, 1);
4347                 t2 = expand_binop (compute_mode, and_optab, op0,
4348                                    GEN_INT (d - 1),
4349                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4350                 t3 = gen_reg_rtx (compute_mode);
4351                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4352                                       compute_mode, 1, 1);
4353                 if (t3 == 0)
4354                   {
4355                     rtx lab;
4356                     lab = gen_label_rtx ();
4357                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4358                     expand_inc (t1, const1_rtx);
4359                     emit_label (lab);
4360                     quotient = t1;
4361                   }
4362                 else
4363                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4364                                                           t1, t3),
4365                                             tquotient);
4366                 break;
4367               }
4368
4369             /* Try using an instruction that produces both the quotient and
4370                remainder, using truncation.  We can easily compensate the
4371                quotient or remainder to get ceiling rounding, once we have the
4372                remainder.  Notice that we compute also the final remainder
4373                value here, and return the result right away.  */
4374             if (target == 0 || GET_MODE (target) != compute_mode)
4375               target = gen_reg_rtx (compute_mode);
4376
4377             if (rem_flag)
4378               {
4379                 remainder = (REG_P (target)
4380                              ? target : gen_reg_rtx (compute_mode));
4381                 quotient = gen_reg_rtx (compute_mode);
4382               }
4383             else
4384               {
4385                 quotient = (REG_P (target)
4386                             ? target : gen_reg_rtx (compute_mode));
4387                 remainder = gen_reg_rtx (compute_mode);
4388               }
4389
4390             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4391                                      remainder, 1))
4392               {
4393                 /* This could be computed with a branch-less sequence.
4394                    Save that for later.  */
4395                 rtx label = gen_label_rtx ();
4396                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4397                                  compute_mode, label);
4398                 expand_inc (quotient, const1_rtx);
4399                 expand_dec (remainder, op1);
4400                 emit_label (label);
4401                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4402               }
4403
4404             /* No luck with division elimination or divmod.  Have to do it
4405                by conditionally adjusting op0 *and* the result.  */
4406             {
4407               rtx label1, label2;
4408               rtx adjusted_op0, tem;
4409
4410               quotient = gen_reg_rtx (compute_mode);
4411               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4412               label1 = gen_label_rtx ();
4413               label2 = gen_label_rtx ();
4414               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4415                                compute_mode, label1);
4416               emit_move_insn  (quotient, const0_rtx);
4417               emit_jump_insn (gen_jump (label2));
4418               emit_barrier ();
4419               emit_label (label1);
4420               expand_dec (adjusted_op0, const1_rtx);
4421               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4422                                   quotient, 1, OPTAB_LIB_WIDEN);
4423               if (tem != quotient)
4424                 emit_move_insn (quotient, tem);
4425               expand_inc (quotient, const1_rtx);
4426               emit_label (label2);
4427             }
4428           }
4429         else /* signed */
4430           {
4431             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4432                 && INTVAL (op1) >= 0)
4433               {
4434                 /* This is extremely similar to the code for the unsigned case
4435                    above.  For 2.7 we should merge these variants, but for
4436                    2.6.1 I don't want to touch the code for unsigned since that
4437                    get used in C.  The signed case will only be used by other
4438                    languages (Ada).  */
4439
4440                 rtx t1, t2, t3;
4441                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4442                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4443                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4444                                    tquotient, 0);
4445                 t2 = expand_binop (compute_mode, and_optab, op0,
4446                                    GEN_INT (d - 1),
4447                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4448                 t3 = gen_reg_rtx (compute_mode);
4449                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4450                                       compute_mode, 1, 1);
4451                 if (t3 == 0)
4452                   {
4453                     rtx lab;
4454                     lab = gen_label_rtx ();
4455                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4456                     expand_inc (t1, const1_rtx);
4457                     emit_label (lab);
4458                     quotient = t1;
4459                   }
4460                 else
4461                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4462                                                           t1, t3),
4463                                             tquotient);
4464                 break;
4465               }
4466
4467             /* Try using an instruction that produces both the quotient and
4468                remainder, using truncation.  We can easily compensate the
4469                quotient or remainder to get ceiling rounding, once we have the
4470                remainder.  Notice that we compute also the final remainder
4471                value here, and return the result right away.  */
4472             if (target == 0 || GET_MODE (target) != compute_mode)
4473               target = gen_reg_rtx (compute_mode);
4474             if (rem_flag)
4475               {
4476                 remainder= (REG_P (target)
4477                             ? target : gen_reg_rtx (compute_mode));
4478                 quotient = gen_reg_rtx (compute_mode);
4479               }
4480             else
4481               {
4482                 quotient = (REG_P (target)
4483                             ? target : gen_reg_rtx (compute_mode));
4484                 remainder = gen_reg_rtx (compute_mode);
4485               }
4486
4487             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4488                                      remainder, 0))
4489               {
4490                 /* This could be computed with a branch-less sequence.
4491                    Save that for later.  */
4492                 rtx tem;
4493                 rtx label = gen_label_rtx ();
4494                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4495                                  compute_mode, label);
4496                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4497                                     NULL_RTX, 0, OPTAB_WIDEN);
4498                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4499                 expand_inc (quotient, const1_rtx);
4500                 expand_dec (remainder, op1);
4501                 emit_label (label);
4502                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4503               }
4504
4505             /* No luck with division elimination or divmod.  Have to do it
4506                by conditionally adjusting op0 *and* the result.  */
4507             {
4508               rtx label1, label2, label3, label4, label5;
4509               rtx adjusted_op0;
4510               rtx tem;
4511
4512               quotient = gen_reg_rtx (compute_mode);
4513               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4514               label1 = gen_label_rtx ();
4515               label2 = gen_label_rtx ();
4516               label3 = gen_label_rtx ();
4517               label4 = gen_label_rtx ();
4518               label5 = gen_label_rtx ();
4519               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4520               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4521                                compute_mode, label1);
4522               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4523                                   quotient, 0, OPTAB_LIB_WIDEN);
4524               if (tem != quotient)
4525                 emit_move_insn (quotient, tem);
4526               emit_jump_insn (gen_jump (label5));
4527               emit_barrier ();
4528               emit_label (label1);
4529               expand_dec (adjusted_op0, const1_rtx);
4530               emit_jump_insn (gen_jump (label4));
4531               emit_barrier ();
4532               emit_label (label2);
4533               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4534                                compute_mode, label3);
4535               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4536                                   quotient, 0, OPTAB_LIB_WIDEN);
4537               if (tem != quotient)
4538                 emit_move_insn (quotient, tem);
4539               emit_jump_insn (gen_jump (label5));
4540               emit_barrier ();
4541               emit_label (label3);
4542               expand_inc (adjusted_op0, const1_rtx);
4543               emit_label (label4);
4544               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4545                                   quotient, 0, OPTAB_LIB_WIDEN);
4546               if (tem != quotient)
4547                 emit_move_insn (quotient, tem);
4548               expand_inc (quotient, const1_rtx);
4549               emit_label (label5);
4550             }
4551           }
4552         break;
4553
4554       case EXACT_DIV_EXPR:
4555         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4556           {
4557             HOST_WIDE_INT d = INTVAL (op1);
4558             unsigned HOST_WIDE_INT ml;
4559             int pre_shift;
4560             rtx t1;
4561
4562             pre_shift = floor_log2 (d & -d);
4563             ml = invert_mod2n (d >> pre_shift, size);
4564             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4565                                build_int_cst (NULL_TREE, pre_shift),
4566                                NULL_RTX, unsignedp);
4567             quotient = expand_mult (compute_mode, t1,
4568                                     gen_int_mode (ml, compute_mode),
4569                                     NULL_RTX, 1);
4570
4571             insn = get_last_insn ();
4572             set_unique_reg_note (insn,
4573                                  REG_EQUAL,
4574                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4575                                                  compute_mode,
4576                                                  op0, op1));
4577           }
4578         break;
4579
4580       case ROUND_DIV_EXPR:
4581       case ROUND_MOD_EXPR:
4582         if (unsignedp)
4583           {
4584             rtx tem;
4585             rtx label;
4586             label = gen_label_rtx ();
4587             quotient = gen_reg_rtx (compute_mode);
4588             remainder = gen_reg_rtx (compute_mode);
4589             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4590               {
4591                 rtx tem;
4592                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4593                                          quotient, 1, OPTAB_LIB_WIDEN);
4594                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4595                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4596                                           remainder, 1, OPTAB_LIB_WIDEN);
4597               }
4598             tem = plus_constant (op1, -1);
4599             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4600                                 build_int_cst (NULL_TREE, 1),
4601                                 NULL_RTX, 1);
4602             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4603             expand_inc (quotient, const1_rtx);
4604             expand_dec (remainder, op1);
4605             emit_label (label);
4606           }
4607         else
4608           {
4609             rtx abs_rem, abs_op1, tem, mask;
4610             rtx label;
4611             label = gen_label_rtx ();
4612             quotient = gen_reg_rtx (compute_mode);
4613             remainder = gen_reg_rtx (compute_mode);
4614             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4615               {
4616                 rtx tem;
4617                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4618                                          quotient, 0, OPTAB_LIB_WIDEN);
4619                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4620                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4621                                           remainder, 0, OPTAB_LIB_WIDEN);
4622               }
4623             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4624             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4625             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4626                                 build_int_cst (NULL_TREE, 1),
4627                                 NULL_RTX, 1);
4628             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4629             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4630                                 NULL_RTX, 0, OPTAB_WIDEN);
4631             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4632                                  build_int_cst (NULL_TREE, size - 1),
4633                                  NULL_RTX, 0);
4634             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4635                                 NULL_RTX, 0, OPTAB_WIDEN);
4636             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4637                                 NULL_RTX, 0, OPTAB_WIDEN);
4638             expand_inc (quotient, tem);
4639             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4640                                 NULL_RTX, 0, OPTAB_WIDEN);
4641             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4642                                 NULL_RTX, 0, OPTAB_WIDEN);
4643             expand_dec (remainder, tem);
4644             emit_label (label);
4645           }
4646         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4647
4648       default:
4649         gcc_unreachable ();
4650       }
4651
4652   if (quotient == 0)
4653     {
4654       if (target && GET_MODE (target) != compute_mode)
4655         target = 0;
4656
4657       if (rem_flag)
4658         {
4659           /* Try to produce the remainder without producing the quotient.
4660              If we seem to have a divmod pattern that does not require widening,
4661              don't try widening here.  We should really have a WIDEN argument
4662              to expand_twoval_binop, since what we'd really like to do here is
4663              1) try a mod insn in compute_mode
4664              2) try a divmod insn in compute_mode
4665              3) try a div insn in compute_mode and multiply-subtract to get
4666                 remainder
4667              4) try the same things with widening allowed.  */
4668           remainder
4669             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4670                                  op0, op1, target,
4671                                  unsignedp,
4672                                  ((optab2->handlers[compute_mode].insn_code
4673                                    != CODE_FOR_nothing)
4674                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4675           if (remainder == 0)
4676             {
4677               /* No luck there.  Can we do remainder and divide at once
4678                  without a library call?  */
4679               remainder = gen_reg_rtx (compute_mode);
4680               if (! expand_twoval_binop ((unsignedp
4681                                           ? udivmod_optab
4682                                           : sdivmod_optab),
4683                                          op0, op1,
4684                                          NULL_RTX, remainder, unsignedp))
4685                 remainder = 0;
4686             }
4687
4688           if (remainder)
4689             return gen_lowpart (mode, remainder);
4690         }
4691
4692       /* Produce the quotient.  Try a quotient insn, but not a library call.
4693          If we have a divmod in this mode, use it in preference to widening
4694          the div (for this test we assume it will not fail). Note that optab2
4695          is set to the one of the two optabs that the call below will use.  */
4696       quotient
4697         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4698                              op0, op1, rem_flag ? NULL_RTX : target,
4699                              unsignedp,
4700                              ((optab2->handlers[compute_mode].insn_code
4701                                != CODE_FOR_nothing)
4702                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4703
4704       if (quotient == 0)
4705         {
4706           /* No luck there.  Try a quotient-and-remainder insn,
4707              keeping the quotient alone.  */
4708           quotient = gen_reg_rtx (compute_mode);
4709           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4710                                      op0, op1,
4711                                      quotient, NULL_RTX, unsignedp))
4712             {
4713               quotient = 0;
4714               if (! rem_flag)
4715                 /* Still no luck.  If we are not computing the remainder,
4716                    use a library call for the quotient.  */
4717                 quotient = sign_expand_binop (compute_mode,
4718                                               udiv_optab, sdiv_optab,
4719                                               op0, op1, target,
4720                                               unsignedp, OPTAB_LIB_WIDEN);
4721             }
4722         }
4723     }
4724
4725   if (rem_flag)
4726     {
4727       if (target && GET_MODE (target) != compute_mode)
4728         target = 0;
4729
4730       if (quotient == 0)
4731         {
4732           /* No divide instruction either.  Use library for remainder.  */
4733           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4734                                          op0, op1, target,
4735                                          unsignedp, OPTAB_LIB_WIDEN);
4736           /* No remainder function.  Try a quotient-and-remainder
4737              function, keeping the remainder.  */
4738           if (!remainder)
4739             {
4740               remainder = gen_reg_rtx (compute_mode);
4741               if (!expand_twoval_binop_libfunc
4742                   (unsignedp ? udivmod_optab : sdivmod_optab,
4743                    op0, op1,
4744                    NULL_RTX, remainder,
4745                    unsignedp ? UMOD : MOD))
4746                 remainder = NULL_RTX;
4747             }
4748         }
4749       else
4750         {
4751           /* We divided.  Now finish doing X - Y * (X / Y).  */
4752           remainder = expand_mult (compute_mode, quotient, op1,
4753                                    NULL_RTX, unsignedp);
4754           remainder = expand_binop (compute_mode, sub_optab, op0,
4755                                     remainder, target, unsignedp,
4756                                     OPTAB_LIB_WIDEN);
4757         }
4758     }
4759
4760   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4761 }
4762 \f
4763 /* Return a tree node with data type TYPE, describing the value of X.
4764    Usually this is an VAR_DECL, if there is no obvious better choice.
4765    X may be an expression, however we only support those expressions
4766    generated by loop.c.  */
4767
4768 tree
4769 make_tree (tree type, rtx x)
4770 {
4771   tree t;
4772
4773   switch (GET_CODE (x))
4774     {
4775     case CONST_INT:
4776       {
4777         HOST_WIDE_INT hi = 0;
4778
4779         if (INTVAL (x) < 0
4780             && !(TYPE_UNSIGNED (type)
4781                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4782                      < HOST_BITS_PER_WIDE_INT)))
4783           hi = -1;
4784
4785         t = build_int_cst_wide (type, INTVAL (x), hi);
4786
4787         return t;
4788       }
4789
4790     case CONST_DOUBLE:
4791       if (GET_MODE (x) == VOIDmode)
4792         t = build_int_cst_wide (type,
4793                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4794       else
4795         {
4796           REAL_VALUE_TYPE d;
4797
4798           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4799           t = build_real (type, d);
4800         }
4801
4802       return t;
4803
4804     case CONST_VECTOR:
4805       {
4806         int i, units;
4807         rtx elt;
4808         tree t = NULL_TREE;
4809
4810         units = CONST_VECTOR_NUNITS (x);
4811
4812         /* Build a tree with vector elements.  */
4813         for (i = units - 1; i >= 0; --i)
4814           {
4815             elt = CONST_VECTOR_ELT (x, i);
4816             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4817           }
4818
4819         return build_vector (type, t);
4820       }
4821
4822     case PLUS:
4823       return fold (build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4824                            make_tree (type, XEXP (x, 1))));
4825
4826     case MINUS:
4827       return fold (build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4828                            make_tree (type, XEXP (x, 1))));
4829
4830     case NEG:
4831       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4832
4833     case MULT:
4834       return fold (build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4835                            make_tree (type, XEXP (x, 1))));
4836
4837     case ASHIFT:
4838       return fold (build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4839                            make_tree (type, XEXP (x, 1))));
4840
4841     case LSHIFTRT:
4842       t = lang_hooks.types.unsigned_type (type);
4843       return fold (convert (type,
4844                             build2 (RSHIFT_EXPR, t,
4845                                     make_tree (t, XEXP (x, 0)),
4846                                     make_tree (type, XEXP (x, 1)))));
4847
4848     case ASHIFTRT:
4849       t = lang_hooks.types.signed_type (type);
4850       return fold (convert (type,
4851                             build2 (RSHIFT_EXPR, t,
4852                                     make_tree (t, XEXP (x, 0)),
4853                                     make_tree (type, XEXP (x, 1)))));
4854
4855     case DIV:
4856       if (TREE_CODE (type) != REAL_TYPE)
4857         t = lang_hooks.types.signed_type (type);
4858       else
4859         t = type;
4860
4861       return fold (convert (type,
4862                             build2 (TRUNC_DIV_EXPR, t,
4863                                     make_tree (t, XEXP (x, 0)),
4864                                     make_tree (t, XEXP (x, 1)))));
4865     case UDIV:
4866       t = lang_hooks.types.unsigned_type (type);
4867       return fold (convert (type,
4868                             build2 (TRUNC_DIV_EXPR, t,
4869                                     make_tree (t, XEXP (x, 0)),
4870                                     make_tree (t, XEXP (x, 1)))));
4871
4872     case SIGN_EXTEND:
4873     case ZERO_EXTEND:
4874       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4875                                           GET_CODE (x) == ZERO_EXTEND);
4876       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4877
4878     default:
4879       t = build_decl (VAR_DECL, NULL_TREE, type);
4880
4881       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4882          ptr_mode.  So convert.  */
4883       if (POINTER_TYPE_P (type))
4884         x = convert_memory_address (TYPE_MODE (type), x);
4885
4886       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4887          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4888       t->decl.rtl = x;
4889
4890       return t;
4891     }
4892 }
4893
4894 /* Check whether the multiplication X * MULT + ADD overflows.
4895    X, MULT and ADD must be CONST_*.
4896    MODE is the machine mode for the computation.
4897    X and MULT must have mode MODE.  ADD may have a different mode.
4898    So can X (defaults to same as MODE).
4899    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4900
4901 bool
4902 const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
4903                            enum machine_mode mode, int unsignedp)
4904 {
4905   tree type, mult_type, add_type, result;
4906
4907   type = lang_hooks.types.type_for_mode (mode, unsignedp);
4908
4909   /* In order to get a proper overflow indication from an unsigned
4910      type, we have to pretend that it's a sizetype.  */
4911   mult_type = type;
4912   if (unsignedp)
4913     {
4914       /* FIXME:It would be nice if we could step directly from this
4915          type to its sizetype equivalent.  */
4916       mult_type = build_distinct_type_copy (type);
4917       TYPE_IS_SIZETYPE (mult_type) = 1;
4918     }
4919
4920   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4921               : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
4922
4923   result = fold (build2 (PLUS_EXPR, mult_type,
4924                          fold (build2 (MULT_EXPR, mult_type,
4925                                        make_tree (mult_type, x),
4926                                        make_tree (mult_type, mult))),
4927                          make_tree (add_type, add)));
4928
4929   return TREE_CONSTANT_OVERFLOW (result);
4930 }
4931
4932 /* Return an rtx representing the value of X * MULT + ADD.
4933    TARGET is a suggestion for where to store the result (an rtx).
4934    MODE is the machine mode for the computation.
4935    X and MULT must have mode MODE.  ADD may have a different mode.
4936    So can X (defaults to same as MODE).
4937    UNSIGNEDP is nonzero to do unsigned multiplication.
4938    This may emit insns.  */
4939
4940 rtx
4941 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4942                  int unsignedp)
4943 {
4944   tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
4945   tree add_type = (GET_MODE (add) == VOIDmode
4946                    ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
4947                                                            unsignedp));
4948   tree result =  fold (build2 (PLUS_EXPR, type,
4949                                fold (build2 (MULT_EXPR, type,
4950                                              make_tree (type, x),
4951                                              make_tree (type, mult))),
4952                                make_tree (add_type, add)));
4953
4954   return expand_expr (result, target, VOIDmode, 0);
4955 }
4956 \f
4957 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4958    and returning TARGET.
4959
4960    If TARGET is 0, a pseudo-register or constant is returned.  */
4961
4962 rtx
4963 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4964 {
4965   rtx tem = 0;
4966
4967   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4968     tem = simplify_binary_operation (AND, mode, op0, op1);
4969   if (tem == 0)
4970     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4971
4972   if (target == 0)
4973     target = tem;
4974   else if (tem != target)
4975     emit_move_insn (target, tem);
4976   return target;
4977 }
4978 \f
4979 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4980    and storing in TARGET.  Normally return TARGET.
4981    Return 0 if that cannot be done.
4982
4983    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4984    it is VOIDmode, they cannot both be CONST_INT.
4985
4986    UNSIGNEDP is for the case where we have to widen the operands
4987    to perform the operation.  It says to use zero-extension.
4988
4989    NORMALIZEP is 1 if we should convert the result to be either zero
4990    or one.  Normalize is -1 if we should convert the result to be
4991    either zero or -1.  If NORMALIZEP is zero, the result will be left
4992    "raw" out of the scc insn.  */
4993
4994 rtx
4995 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4996                  enum machine_mode mode, int unsignedp, int normalizep)
4997 {
4998   rtx subtarget;
4999   enum insn_code icode;
5000   enum machine_mode compare_mode;
5001   enum machine_mode target_mode = GET_MODE (target);
5002   rtx tem;
5003   rtx last = get_last_insn ();
5004   rtx pattern, comparison;
5005
5006   if (unsignedp)
5007     code = unsigned_condition (code);
5008
5009   /* If one operand is constant, make it the second one.  Only do this
5010      if the other operand is not constant as well.  */
5011
5012   if (swap_commutative_operands_p (op0, op1))
5013     {
5014       tem = op0;
5015       op0 = op1;
5016       op1 = tem;
5017       code = swap_condition (code);
5018     }
5019
5020   if (mode == VOIDmode)
5021     mode = GET_MODE (op0);
5022
5023   /* For some comparisons with 1 and -1, we can convert this to
5024      comparisons with zero.  This will often produce more opportunities for
5025      store-flag insns.  */
5026
5027   switch (code)
5028     {
5029     case LT:
5030       if (op1 == const1_rtx)
5031         op1 = const0_rtx, code = LE;
5032       break;
5033     case LE:
5034       if (op1 == constm1_rtx)
5035         op1 = const0_rtx, code = LT;
5036       break;
5037     case GE:
5038       if (op1 == const1_rtx)
5039         op1 = const0_rtx, code = GT;
5040       break;
5041     case GT:
5042       if (op1 == constm1_rtx)
5043         op1 = const0_rtx, code = GE;
5044       break;
5045     case GEU:
5046       if (op1 == const1_rtx)
5047         op1 = const0_rtx, code = NE;
5048       break;
5049     case LTU:
5050       if (op1 == const1_rtx)
5051         op1 = const0_rtx, code = EQ;
5052       break;
5053     default:
5054       break;
5055     }
5056
5057   /* If we are comparing a double-word integer with zero or -1, we can
5058      convert the comparison into one involving a single word.  */
5059   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5060       && GET_MODE_CLASS (mode) == MODE_INT
5061       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5062     {
5063       if ((code == EQ || code == NE)
5064           && (op1 == const0_rtx || op1 == constm1_rtx))
5065         {
5066           rtx op00, op01, op0both;
5067
5068           /* Do a logical OR or AND of the two words and compare the result.  */
5069           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5070           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5071           op0both = expand_binop (word_mode,
5072                                   op1 == const0_rtx ? ior_optab : and_optab,
5073                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5074
5075           if (op0both != 0)
5076             return emit_store_flag (target, code, op0both, op1, word_mode,
5077                                     unsignedp, normalizep);
5078         }
5079       else if ((code == LT || code == GE) && op1 == const0_rtx)
5080         {
5081           rtx op0h;
5082
5083           /* If testing the sign bit, can just test on high word.  */
5084           op0h = simplify_gen_subreg (word_mode, op0, mode,
5085                                       subreg_highpart_offset (word_mode, mode));
5086           return emit_store_flag (target, code, op0h, op1, word_mode,
5087                                   unsignedp, normalizep);
5088         }
5089     }
5090
5091   /* From now on, we won't change CODE, so set ICODE now.  */
5092   icode = setcc_gen_code[(int) code];
5093
5094   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5095      complement of A (for GE) and shifting the sign bit to the low bit.  */
5096   if (op1 == const0_rtx && (code == LT || code == GE)
5097       && GET_MODE_CLASS (mode) == MODE_INT
5098       && (normalizep || STORE_FLAG_VALUE == 1
5099           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5100               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5101                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5102     {
5103       subtarget = target;
5104
5105       /* If the result is to be wider than OP0, it is best to convert it
5106          first.  If it is to be narrower, it is *incorrect* to convert it
5107          first.  */
5108       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5109         {
5110           op0 = convert_modes (target_mode, mode, op0, 0);
5111           mode = target_mode;
5112         }
5113
5114       if (target_mode != mode)
5115         subtarget = 0;
5116
5117       if (code == GE)
5118         op0 = expand_unop (mode, one_cmpl_optab, op0,
5119                            ((STORE_FLAG_VALUE == 1 || normalizep)
5120                             ? 0 : subtarget), 0);
5121
5122       if (STORE_FLAG_VALUE == 1 || normalizep)
5123         /* If we are supposed to produce a 0/1 value, we want to do
5124            a logical shift from the sign bit to the low-order bit; for
5125            a -1/0 value, we do an arithmetic shift.  */
5126         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5127                             size_int (GET_MODE_BITSIZE (mode) - 1),
5128                             subtarget, normalizep != -1);
5129
5130       if (mode != target_mode)
5131         op0 = convert_modes (target_mode, mode, op0, 0);
5132
5133       return op0;
5134     }
5135
5136   if (icode != CODE_FOR_nothing)
5137     {
5138       insn_operand_predicate_fn pred;
5139
5140       /* We think we may be able to do this with a scc insn.  Emit the
5141          comparison and then the scc insn.  */
5142
5143       do_pending_stack_adjust ();
5144       last = get_last_insn ();
5145
5146       comparison
5147         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5148       if (CONSTANT_P (comparison))
5149         {
5150           switch (GET_CODE (comparison))
5151             {
5152             case CONST_INT:
5153               if (comparison == const0_rtx)
5154                 return const0_rtx;
5155               break;
5156
5157 #ifdef FLOAT_STORE_FLAG_VALUE
5158             case CONST_DOUBLE:
5159               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5160                 return const0_rtx;
5161               break;
5162 #endif
5163             default:
5164               gcc_unreachable ();
5165             }
5166
5167           if (normalizep == 1)
5168             return const1_rtx;
5169           if (normalizep == -1)
5170             return constm1_rtx;
5171           return const_true_rtx;
5172         }
5173
5174       /* The code of COMPARISON may not match CODE if compare_from_rtx
5175          decided to swap its operands and reverse the original code.
5176
5177          We know that compare_from_rtx returns either a CONST_INT or
5178          a new comparison code, so it is safe to just extract the
5179          code from COMPARISON.  */
5180       code = GET_CODE (comparison);
5181
5182       /* Get a reference to the target in the proper mode for this insn.  */
5183       compare_mode = insn_data[(int) icode].operand[0].mode;
5184       subtarget = target;
5185       pred = insn_data[(int) icode].operand[0].predicate;
5186       if (optimize || ! (*pred) (subtarget, compare_mode))
5187         subtarget = gen_reg_rtx (compare_mode);
5188
5189       pattern = GEN_FCN (icode) (subtarget);
5190       if (pattern)
5191         {
5192           emit_insn (pattern);
5193
5194           /* If we are converting to a wider mode, first convert to
5195              TARGET_MODE, then normalize.  This produces better combining
5196              opportunities on machines that have a SIGN_EXTRACT when we are
5197              testing a single bit.  This mostly benefits the 68k.
5198
5199              If STORE_FLAG_VALUE does not have the sign bit set when
5200              interpreted in COMPARE_MODE, we can do this conversion as
5201              unsigned, which is usually more efficient.  */
5202           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5203             {
5204               convert_move (target, subtarget,
5205                             (GET_MODE_BITSIZE (compare_mode)
5206                              <= HOST_BITS_PER_WIDE_INT)
5207                             && 0 == (STORE_FLAG_VALUE
5208                                      & ((HOST_WIDE_INT) 1
5209                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
5210               op0 = target;
5211               compare_mode = target_mode;
5212             }
5213           else
5214             op0 = subtarget;
5215
5216           /* If we want to keep subexpressions around, don't reuse our
5217              last target.  */
5218
5219           if (optimize)
5220             subtarget = 0;
5221
5222           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
5223              we don't have to do anything.  */
5224           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5225             ;
5226           /* STORE_FLAG_VALUE might be the most negative number, so write
5227              the comparison this way to avoid a compiler-time warning.  */
5228           else if (- normalizep == STORE_FLAG_VALUE)
5229             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5230
5231           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5232              makes it hard to use a value of just the sign bit due to
5233              ANSI integer constant typing rules.  */
5234           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5235                    && (STORE_FLAG_VALUE
5236                        & ((HOST_WIDE_INT) 1
5237                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
5238             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5239                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5240                                 subtarget, normalizep == 1);
5241           else
5242             {
5243               gcc_assert (STORE_FLAG_VALUE & 1);
5244
5245               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5246               if (normalizep == -1)
5247                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5248             }
5249
5250           /* If we were converting to a smaller mode, do the
5251              conversion now.  */
5252           if (target_mode != compare_mode)
5253             {
5254               convert_move (target, op0, 0);
5255               return target;
5256             }
5257           else
5258             return op0;
5259         }
5260     }
5261
5262   delete_insns_since (last);
5263
5264   /* If optimizing, use different pseudo registers for each insn, instead
5265      of reusing the same pseudo.  This leads to better CSE, but slows
5266      down the compiler, since there are more pseudos */
5267   subtarget = (!optimize
5268                && (target_mode == mode)) ? target : NULL_RTX;
5269
5270   /* If we reached here, we can't do this with a scc insn.  However, there
5271      are some comparisons that can be done directly.  For example, if
5272      this is an equality comparison of integers, we can try to exclusive-or
5273      (or subtract) the two operands and use a recursive call to try the
5274      comparison with zero.  Don't do any of these cases if branches are
5275      very cheap.  */
5276
5277   if (BRANCH_COST > 0
5278       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5279       && op1 != const0_rtx)
5280     {
5281       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5282                           OPTAB_WIDEN);
5283
5284       if (tem == 0)
5285         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5286                             OPTAB_WIDEN);
5287       if (tem != 0)
5288         tem = emit_store_flag (target, code, tem, const0_rtx,
5289                                mode, unsignedp, normalizep);
5290       if (tem == 0)
5291         delete_insns_since (last);
5292       return tem;
5293     }
5294
5295   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5296      the constant zero.  Reject all other comparisons at this point.  Only
5297      do LE and GT if branches are expensive since they are expensive on
5298      2-operand machines.  */
5299
5300   if (BRANCH_COST == 0
5301       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5302       || (code != EQ && code != NE
5303           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5304     return 0;
5305
5306   /* See what we need to return.  We can only return a 1, -1, or the
5307      sign bit.  */
5308
5309   if (normalizep == 0)
5310     {
5311       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5312         normalizep = STORE_FLAG_VALUE;
5313
5314       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5315                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5316                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5317         ;
5318       else
5319         return 0;
5320     }
5321
5322   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5323      do the necessary operation below.  */
5324
5325   tem = 0;
5326
5327   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5328      the sign bit set.  */
5329
5330   if (code == LE)
5331     {
5332       /* This is destructive, so SUBTARGET can't be OP0.  */
5333       if (rtx_equal_p (subtarget, op0))
5334         subtarget = 0;
5335
5336       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5337                           OPTAB_WIDEN);
5338       if (tem)
5339         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5340                             OPTAB_WIDEN);
5341     }
5342
5343   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5344      number of bits in the mode of OP0, minus one.  */
5345
5346   if (code == GT)
5347     {
5348       if (rtx_equal_p (subtarget, op0))
5349         subtarget = 0;
5350
5351       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5352                           size_int (GET_MODE_BITSIZE (mode) - 1),
5353                           subtarget, 0);
5354       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5355                           OPTAB_WIDEN);
5356     }
5357
5358   if (code == EQ || code == NE)
5359     {
5360       /* For EQ or NE, one way to do the comparison is to apply an operation
5361          that converts the operand into a positive number if it is nonzero
5362          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5363          for NE we negate.  This puts the result in the sign bit.  Then we
5364          normalize with a shift, if needed.
5365
5366          Two operations that can do the above actions are ABS and FFS, so try
5367          them.  If that doesn't work, and MODE is smaller than a full word,
5368          we can use zero-extension to the wider mode (an unsigned conversion)
5369          as the operation.  */
5370
5371       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5372          that is compensated by the subsequent overflow when subtracting
5373          one / negating.  */
5374
5375       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5376         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5377       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5378         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5379       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5380         {
5381           tem = convert_modes (word_mode, mode, op0, 1);
5382           mode = word_mode;
5383         }
5384
5385       if (tem != 0)
5386         {
5387           if (code == EQ)
5388             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5389                                 0, OPTAB_WIDEN);
5390           else
5391             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5392         }
5393
5394       /* If we couldn't do it that way, for NE we can "or" the two's complement
5395          of the value with itself.  For EQ, we take the one's complement of
5396          that "or", which is an extra insn, so we only handle EQ if branches
5397          are expensive.  */
5398
5399       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5400         {
5401           if (rtx_equal_p (subtarget, op0))
5402             subtarget = 0;
5403
5404           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5405           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5406                               OPTAB_WIDEN);
5407
5408           if (tem && code == EQ)
5409             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5410         }
5411     }
5412
5413   if (tem && normalizep)
5414     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5415                         size_int (GET_MODE_BITSIZE (mode) - 1),
5416                         subtarget, normalizep == 1);
5417
5418   if (tem)
5419     {
5420       if (GET_MODE (tem) != target_mode)
5421         {
5422           convert_move (target, tem, 0);
5423           tem = target;
5424         }
5425       else if (!subtarget)
5426         {
5427           emit_move_insn (target, tem);
5428           tem = target;
5429         }
5430     }
5431   else
5432     delete_insns_since (last);
5433
5434   return tem;
5435 }
5436
5437 /* Like emit_store_flag, but always succeeds.  */
5438
5439 rtx
5440 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5441                        enum machine_mode mode, int unsignedp, int normalizep)
5442 {
5443   rtx tem, label;
5444
5445   /* First see if emit_store_flag can do the job.  */
5446   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5447   if (tem != 0)
5448     return tem;
5449
5450   if (normalizep == 0)
5451     normalizep = 1;
5452
5453   /* If this failed, we have to do this with set/compare/jump/set code.  */
5454
5455   if (!REG_P (target)
5456       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5457     target = gen_reg_rtx (GET_MODE (target));
5458
5459   emit_move_insn (target, const1_rtx);
5460   label = gen_label_rtx ();
5461   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5462                            NULL_RTX, label);
5463
5464   emit_move_insn (target, const0_rtx);
5465   emit_label (label);
5466
5467   return target;
5468 }
5469 \f
5470 /* Perform possibly multi-word comparison and conditional jump to LABEL
5471    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5472
5473    The algorithm is based on the code in expr.c:do_jump.
5474
5475    Note that this does not perform a general comparison.  Only variants
5476    generated within expmed.c are correctly handled, others abort (but could
5477    be handled if needed).  */
5478
5479 static void
5480 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5481                  rtx label)
5482 {
5483   /* If this mode is an integer too wide to compare properly,
5484      compare word by word.  Rely on cse to optimize constant cases.  */
5485
5486   if (GET_MODE_CLASS (mode) == MODE_INT
5487       && ! can_compare_p (op, mode, ccp_jump))
5488     {
5489       rtx label2 = gen_label_rtx ();
5490
5491       switch (op)
5492         {
5493         case LTU:
5494           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5495           break;
5496
5497         case LEU:
5498           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5499           break;
5500
5501         case LT:
5502           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5503           break;
5504
5505         case GT:
5506           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5507           break;
5508
5509         case GE:
5510           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5511           break;
5512
5513           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5514              that's the only equality operations we do */
5515         case EQ:
5516           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5517           do_jump_by_parts_equality_rtx (arg1, label2, label);
5518           break;
5519
5520         case NE:
5521           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5522           do_jump_by_parts_equality_rtx (arg1, label, label2);
5523           break;
5524
5525         default:
5526           gcc_unreachable ();
5527         }
5528
5529       emit_label (label2);
5530     }
5531   else
5532     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5533 }