gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  55
  56 /* Nonzero means divides or modulus operations are relatively cheap for
  57    powers of two, so don't use branches; emit the operation instead.
  58    Usually, this will mean that the MD file will emit non-branch
  59    sequences.  */
  60
  61 static int sdiv_pow2_cheap[NUM_MACHINE_MODES];
  62 static int smod_pow2_cheap[NUM_MACHINE_MODES];
  63
  64 #ifndef SLOW_UNALIGNED_ACCESS
  65 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  66 #endif
  67
  68 /* For compilers that support multiple targets with different word sizes,
  69    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  70    is the H8/300(H) compiler.  */
  71
  72 #ifndef MAX_BITS_PER_WORD
  73 #define MAX_BITS_PER_WORD BITS_PER_WORD
  74 #endif
  75
  76 /* Reduce conditional compilation elsewhere.  */
  77 #ifndef HAVE_insv
  78 #define HAVE_insv       0
  79 #define CODE_FOR_insv   CODE_FOR_nothing
  80 #define gen_insv(a,b,c,d) NULL_RTX
  81 #endif
  82 #ifndef HAVE_extv
  83 #define HAVE_extv       0
  84 #define CODE_FOR_extv   CODE_FOR_nothing
  85 #define gen_extv(a,b,c,d) NULL_RTX
  86 #endif
  87 #ifndef HAVE_extzv
  88 #define HAVE_extzv      0
  89 #define CODE_FOR_extzv  CODE_FOR_nothing
  90 #define gen_extzv(a,b,c,d) NULL_RTX
  91 #endif
  92
  93 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  94    shift count and some by mode.  */
  95 static int zero_cost;
  96 static int add_cost[NUM_MACHINE_MODES];
  97 static int neg_cost[NUM_MACHINE_MODES];
  98 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
  99 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 100 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 101 static int mul_cost[NUM_MACHINE_MODES];
 102 static int div_cost[NUM_MACHINE_MODES];
 103 static int mul_widen_cost[NUM_MACHINE_MODES];
 104 static int mul_highpart_cost[NUM_MACHINE_MODES];
 105
 106 void
 107 init_expmed (void)
 108 {
 109   struct
 110   {
 111     struct rtx_def reg;
 112     struct rtx_def plus;        rtunion plus_fld1;
 113     struct rtx_def neg;
 114     struct rtx_def udiv;        rtunion udiv_fld1;
 115     struct rtx_def mult;        rtunion mult_fld1;
 116     struct rtx_def div;         rtunion div_fld1;
 117     struct rtx_def mod;         rtunion mod_fld1;
 118     struct rtx_def zext;
 119     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 120     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 121     struct rtx_def wide_trunc;
 122     struct rtx_def shift;       rtunion shift_fld1;
 123     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 124     struct rtx_def shift_add;   rtunion shift_add_fld1;
 125     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 126   } all;
 127
 128   rtx pow2[MAX_BITS_PER_WORD];
 129   rtx cint[MAX_BITS_PER_WORD];
 130   int m, n;
 131   enum machine_mode mode, wider_mode;
 132
 133   zero_cost = rtx_cost (const0_rtx, 0);
 134
 135   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 136     {
 137       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 138       cint[m] = GEN_INT (m);
 139     }
 140
 141   memset (&all, 0, sizeof all);
 142
 143   PUT_CODE (&all.reg, REG);
 144   REGNO (&all.reg) = 10000;
 145
 146   PUT_CODE (&all.plus, PLUS);
 147   XEXP (&all.plus, 0) = &all.reg;
 148   XEXP (&all.plus, 1) = &all.reg;
 149
 150   PUT_CODE (&all.neg, NEG);
 151   XEXP (&all.neg, 0) = &all.reg;
 152
 153   PUT_CODE (&all.udiv, UDIV);
 154   XEXP (&all.udiv, 0) = &all.reg;
 155   XEXP (&all.udiv, 1) = &all.reg;
 156
 157   PUT_CODE (&all.mult, MULT);
 158   XEXP (&all.mult, 0) = &all.reg;
 159   XEXP (&all.mult, 1) = &all.reg;
 160
 161   PUT_CODE (&all.div, DIV);
 162   XEXP (&all.div, 0) = &all.reg;
 163   XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 164
 165   PUT_CODE (&all.mod, MOD);
 166   XEXP (&all.mod, 0) = &all.reg;
 167   XEXP (&all.mod, 1) = XEXP (&all.div, 1);
 168
 169   PUT_CODE (&all.zext, ZERO_EXTEND);
 170   XEXP (&all.zext, 0) = &all.reg;
 171
 172   PUT_CODE (&all.wide_mult, MULT);
 173   XEXP (&all.wide_mult, 0) = &all.zext;
 174   XEXP (&all.wide_mult, 1) = &all.zext;
 175
 176   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 177   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 178
 179   PUT_CODE (&all.wide_trunc, TRUNCATE);
 180   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 181
 182   PUT_CODE (&all.shift, ASHIFT);
 183   XEXP (&all.shift, 0) = &all.reg;
 184
 185   PUT_CODE (&all.shift_mult, MULT);
 186   XEXP (&all.shift_mult, 0) = &all.reg;
 187
 188   PUT_CODE (&all.shift_add, PLUS);
 189   XEXP (&all.shift_add, 0) = &all.shift_mult;
 190   XEXP (&all.shift_add, 1) = &all.reg;
 191
 192   PUT_CODE (&all.shift_sub, MINUS);
 193   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 194   XEXP (&all.shift_sub, 1) = &all.reg;
 195
 196   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 197        mode != VOIDmode;
 198        mode = GET_MODE_WIDER_MODE (mode))
 199     {
 200       PUT_MODE (&all.reg, mode);
 201       PUT_MODE (&all.plus, mode);
 202       PUT_MODE (&all.neg, mode);
 203       PUT_MODE (&all.udiv, mode);
 204       PUT_MODE (&all.mult, mode);
 205       PUT_MODE (&all.div, mode);
 206       PUT_MODE (&all.mod, mode);
 207       PUT_MODE (&all.wide_trunc, mode);
 208       PUT_MODE (&all.shift, mode);
 209       PUT_MODE (&all.shift_mult, mode);
 210       PUT_MODE (&all.shift_add, mode);
 211       PUT_MODE (&all.shift_sub, mode);
 212
 213       add_cost[mode] = rtx_cost (&all.plus, SET);
 214       neg_cost[mode] = rtx_cost (&all.neg, SET);
 215       div_cost[mode] = rtx_cost (&all.udiv, SET);
 216       mul_cost[mode] = rtx_cost (&all.mult, SET);
 217
 218       sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
 219       smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 2 * add_cost[mode]);
 220
 221       wider_mode = GET_MODE_WIDER_MODE (mode);
 222       if (wider_mode != VOIDmode)
 223         {
 224           PUT_MODE (&all.zext, wider_mode);
 225           PUT_MODE (&all.wide_mult, wider_mode);
 226           PUT_MODE (&all.wide_lshr, wider_mode);
 227           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 228
 229           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 230           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 231         }
 232
 233       shift_cost[mode][0] = 0;
 234       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 235
 236       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 237       for (m = 1; m < n; m++)
 238         {
 239           XEXP (&all.shift, 1) = cint[m];
 240           XEXP (&all.shift_mult, 1) = pow2[m];
 241
 242           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 243           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 244           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 245         }
 246     }
 247 }
 248
 249 /* Return an rtx representing minus the value of X.
 250    MODE is the intended mode of the result,
 251    useful if X is a CONST_INT.  */
 252
 253 rtx
 254 negate_rtx (enum machine_mode mode, rtx x)
 255 {
 256   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 257
 258   if (result == 0)
 259     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 260
 261   return result;
 262 }
 263
 264 /* Report on the availability of insv/extv/extzv and the desired mode
 265    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 266    is false; else the mode of the specified operand.  If OPNO is -1,
 267    all the caller cares about is whether the insn is available.  */
 268 enum machine_mode
 269 mode_for_extraction (enum extraction_pattern pattern, int opno)
 270 {
 271   const struct insn_data *data;
 272
 273   switch (pattern)
 274     {
 275     case EP_insv:
 276       if (HAVE_insv)
 277         {
 278           data = &insn_data[CODE_FOR_insv];
 279           break;
 280         }
 281       return MAX_MACHINE_MODE;
 282
 283     case EP_extv:
 284       if (HAVE_extv)
 285         {
 286           data = &insn_data[CODE_FOR_extv];
 287           break;
 288         }
 289       return MAX_MACHINE_MODE;
 290
 291     case EP_extzv:
 292       if (HAVE_extzv)
 293         {
 294           data = &insn_data[CODE_FOR_extzv];
 295           break;
 296         }
 297       return MAX_MACHINE_MODE;
 298
 299     default:
 300       abort ();
 301     }
 302
 303   if (opno == -1)
 304     return VOIDmode;
 305
 306   /* Everyone who uses this function used to follow it with
 307      if (result == VOIDmode) result = word_mode; */
 308   if (data->operand[opno].mode == VOIDmode)
 309     return word_mode;
 310   return data->operand[opno].mode;
 311 }
 312
 313 \f
 314 /* Generate code to store value from rtx VALUE
 315    into a bit-field within structure STR_RTX
 316    containing BITSIZE bits starting at bit BITNUM.
 317    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 318    ALIGN is the alignment that STR_RTX is known to have.
 319    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 320
 321 /* ??? Note that there are two different ideas here for how
 322    to determine the size to count bits within, for a register.
 323    One is BITS_PER_WORD, and the other is the size of operand 3
 324    of the insv pattern.
 325
 326    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 327    else, we use the mode of operand 3.  */
 328
 329 rtx
 330 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 331                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 332                  rtx value)
 333 {
 334   unsigned int unit
 335     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 336   unsigned HOST_WIDE_INT offset = bitnum / unit;
 337   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 338   rtx op0 = str_rtx;
 339   int byte_offset;
 340
 341   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 342
 343   while (GET_CODE (op0) == SUBREG)
 344     {
 345       /* The following line once was done only if WORDS_BIG_ENDIAN,
 346          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 347          meaningful at a much higher level; when structures are copied
 348          between memory and regs, the higher-numbered regs
 349          always get higher addresses.  */
 350       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 351       /* We used to adjust BITPOS here, but now we do the whole adjustment
 352          right after the loop.  */
 353       op0 = SUBREG_REG (op0);
 354     }
 355
 356   /* Use vec_set patterns for inserting parts of vectors whenever
 357      available.  */
 358   if (VECTOR_MODE_P (GET_MODE (op0))
 359       && !MEM_P (op0)
 360       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 361           != CODE_FOR_nothing)
 362       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 363       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 364       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 365     {
 366       enum machine_mode outermode = GET_MODE (op0);
 367       enum machine_mode innermode = GET_MODE_INNER (outermode);
 368       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 369       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 370       rtx rtxpos = GEN_INT (pos);
 371       rtx src = value;
 372       rtx dest = op0;
 373       rtx pat, seq;
 374       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 375       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 376       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 377
 378       start_sequence ();
 379
 380       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 381         src = copy_to_mode_reg (mode1, src);
 382
 383       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 384         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 385
 386       /* We could handle this, but we should always be called with a pseudo
 387          for our targets and all insns should take them as outputs.  */
 388       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
 389           || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
 390           || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 391         abort ();
 392       pat = GEN_FCN (icode) (dest, src, rtxpos);
 393       seq = get_insns ();
 394       end_sequence ();
 395       if (pat)
 396         {
 397           emit_insn (seq);
 398           emit_insn (pat);
 399           return dest;
 400         }
 401     }
 402
 403   if (flag_force_mem)
 404     {
 405       int old_generating_concat_p = generating_concat_p;
 406       generating_concat_p = 0;
 407       value = force_not_mem (value);
 408       generating_concat_p = old_generating_concat_p;
 409     }
 410
 411   /* If the target is a register, overwriting the entire object, or storing
 412      a full-word or multi-word field can be done with just a SUBREG.
 413
 414      If the target is memory, storing any naturally aligned field can be
 415      done with a simple store.  For targets that support fast unaligned
 416      memory, any naturally sized, unit aligned field can be done directly.  */
 417
 418   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 419                 + (offset * UNITS_PER_WORD);
 420
 421   if (bitpos == 0
 422       && bitsize == GET_MODE_BITSIZE (fieldmode)
 423       && (!MEM_P (op0)
 424           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 425              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 426              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 427           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 428              || (offset * BITS_PER_UNIT % bitsize == 0
 429                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 430     {
 431       if (GET_MODE (op0) != fieldmode)
 432         {
 433           if (GET_CODE (op0) == SUBREG)
 434             {
 435               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 436                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 437                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 438                 op0 = SUBREG_REG (op0);
 439               else
 440                 /* Else we've got some float mode source being extracted into
 441                    a different float mode destination -- this combination of
 442                    subregs results in Severe Tire Damage.  */
 443                 abort ();
 444             }
 445           if (REG_P (op0))
 446             op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 447           else
 448             op0 = adjust_address (op0, fieldmode, offset);
 449         }
 450       emit_move_insn (op0, value);
 451       return value;
 452     }
 453
 454   /* Make sure we are playing with integral modes.  Pun with subregs
 455      if we aren't.  This must come after the entire register case above,
 456      since that case is valid for any mode.  The following cases are only
 457      valid for integral modes.  */
 458   {
 459     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 460     if (imode != GET_MODE (op0))
 461       {
 462         if (MEM_P (op0))
 463           op0 = adjust_address (op0, imode, 0);
 464         else if (imode != BLKmode)
 465           op0 = gen_lowpart (imode, op0);
 466         else
 467           abort ();
 468       }
 469   }
 470
 471   /* We may be accessing data outside the field, which means
 472      we can alias adjacent data.  */
 473   if (MEM_P (op0))
 474     {
 475       op0 = shallow_copy_rtx (op0);
 476       set_mem_alias_set (op0, 0);
 477       set_mem_expr (op0, 0);
 478     }
 479
 480   /* If OP0 is a register, BITPOS must count within a word.
 481      But as we have it, it counts within whatever size OP0 now has.
 482      On a bigendian machine, these are not the same, so convert.  */
 483   if (BYTES_BIG_ENDIAN
 484       && !MEM_P (op0)
 485       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 486     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 487
 488   /* Storing an lsb-aligned field in a register
 489      can be done with a movestrict instruction.  */
 490
 491   if (!MEM_P (op0)
 492       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 493       && bitsize == GET_MODE_BITSIZE (fieldmode)
 494       && (movstrict_optab->handlers[fieldmode].insn_code
 495           != CODE_FOR_nothing))
 496     {
 497       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 498
 499       /* Get appropriate low part of the value being stored.  */
 500       if (GET_CODE (value) == CONST_INT || REG_P (value))
 501         value = gen_lowpart (fieldmode, value);
 502       else if (!(GET_CODE (value) == SYMBOL_REF
 503                  || GET_CODE (value) == LABEL_REF
 504                  || GET_CODE (value) == CONST))
 505         value = convert_to_mode (fieldmode, value, 0);
 506
 507       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 508         value = copy_to_mode_reg (fieldmode, value);
 509
 510       if (GET_CODE (op0) == SUBREG)
 511         {
 512           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 513               || GET_MODE_CLASS (fieldmode) == MODE_INT
 514               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 515             op0 = SUBREG_REG (op0);
 516           else
 517             /* Else we've got some float mode source being extracted into
 518                a different float mode destination -- this combination of
 519                subregs results in Severe Tire Damage.  */
 520             abort ();
 521         }
 522
 523       emit_insn (GEN_FCN (icode)
 524                  (gen_rtx_SUBREG (fieldmode, op0,
 525                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 526                                   + (offset * UNITS_PER_WORD)),
 527                                   value));
 528
 529       return value;
 530     }
 531
 532   /* Handle fields bigger than a word.  */
 533
 534   if (bitsize > BITS_PER_WORD)
 535     {
 536       /* Here we transfer the words of the field
 537          in the order least significant first.
 538          This is because the most significant word is the one which may
 539          be less than full.
 540          However, only do that if the value is not BLKmode.  */
 541
 542       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 543       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 544       unsigned int i;
 545
 546       /* This is the mode we must force value to, so that there will be enough
 547          subwords to extract.  Note that fieldmode will often (always?) be
 548          VOIDmode, because that is what store_field uses to indicate that this
 549          is a bit field, but passing VOIDmode to operand_subword_force will
 550          result in an abort.  */
 551       fieldmode = GET_MODE (value);
 552       if (fieldmode == VOIDmode)
 553         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 554
 555       for (i = 0; i < nwords; i++)
 556         {
 557           /* If I is 0, use the low-order word in both field and target;
 558              if I is 1, use the next to lowest word; and so on.  */
 559           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 560           unsigned int bit_offset = (backwards
 561                                      ? MAX ((int) bitsize - ((int) i + 1)
 562                                             * BITS_PER_WORD,
 563                                             0)
 564                                      : (int) i * BITS_PER_WORD);
 565
 566           store_bit_field (op0, MIN (BITS_PER_WORD,
 567                                      bitsize - i * BITS_PER_WORD),
 568                            bitnum + bit_offset, word_mode,
 569                            operand_subword_force (value, wordnum, fieldmode));
 570         }
 571       return value;
 572     }
 573
 574   /* From here on we can assume that the field to be stored in is
 575      a full-word (whatever type that is), since it is shorter than a word.  */
 576
 577   /* OFFSET is the number of words or bytes (UNIT says which)
 578      from STR_RTX to the first word or byte containing part of the field.  */
 579
 580   if (!MEM_P (op0))
 581     {
 582       if (offset != 0
 583           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 584         {
 585           if (!REG_P (op0))
 586             {
 587               /* Since this is a destination (lvalue), we can't copy it to a
 588                  pseudo.  We can trivially remove a SUBREG that does not
 589                  change the size of the operand.  Such a SUBREG may have been
 590                  added above.  Otherwise, abort.  */
 591               if (GET_CODE (op0) == SUBREG
 592                   && (GET_MODE_SIZE (GET_MODE (op0))
 593                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 594                 op0 = SUBREG_REG (op0);
 595               else
 596                 abort ();
 597             }
 598           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 599                                 op0, (offset * UNITS_PER_WORD));
 600         }
 601       offset = 0;
 602     }
 603
 604   /* If VALUE is a floating-point mode, access it as an integer of the
 605      corresponding size.  This can occur on a machine with 64 bit registers
 606      that uses SFmode for float.  This can also occur for unaligned float
 607      structure fields.  */
 608   if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 609       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 610     value = gen_lowpart ((GET_MODE (value) == VOIDmode
 611                           ? word_mode : int_mode_for_mode (GET_MODE (value))),
 612                          value);
 613
 614   /* Now OFFSET is nonzero only if OP0 is memory
 615      and is therefore always measured in bytes.  */
 616
 617   if (HAVE_insv
 618       && GET_MODE (value) != BLKmode
 619       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 620       /* Ensure insv's size is wide enough for this field.  */
 621       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 622       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 623             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 624     {
 625       int xbitpos = bitpos;
 626       rtx value1;
 627       rtx xop0 = op0;
 628       rtx last = get_last_insn ();
 629       rtx pat;
 630       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 631       int save_volatile_ok = volatile_ok;
 632
 633       volatile_ok = 1;
 634
 635       /* If this machine's insv can only insert into a register, copy OP0
 636          into a register and save it back later.  */
 637       /* This used to check flag_force_mem, but that was a serious
 638          de-optimization now that flag_force_mem is enabled by -O2.  */
 639       if (MEM_P (op0)
 640           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 641                 (op0, VOIDmode)))
 642         {
 643           rtx tempreg;
 644           enum machine_mode bestmode;
 645
 646           /* Get the mode to use for inserting into this field.  If OP0 is
 647              BLKmode, get the smallest mode consistent with the alignment. If
 648              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 649              mode. Otherwise, use the smallest mode containing the field.  */
 650
 651           if (GET_MODE (op0) == BLKmode
 652               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 653             bestmode
 654               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 655                                MEM_VOLATILE_P (op0));
 656           else
 657             bestmode = GET_MODE (op0);
 658
 659           if (bestmode == VOIDmode
 660               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 661                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 662             goto insv_loses;
 663
 664           /* Adjust address to point to the containing unit of that mode.
 665              Compute offset as multiple of this unit, counting in bytes.  */
 666           unit = GET_MODE_BITSIZE (bestmode);
 667           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 668           bitpos = bitnum % unit;
 669           op0 = adjust_address (op0, bestmode,  offset);
 670
 671           /* Fetch that unit, store the bitfield in it, then store
 672              the unit.  */
 673           tempreg = copy_to_reg (op0);
 674           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value);
 675           emit_move_insn (op0, tempreg);
 676           return value;
 677         }
 678       volatile_ok = save_volatile_ok;
 679
 680       /* Add OFFSET into OP0's address.  */
 681       if (MEM_P (xop0))
 682         xop0 = adjust_address (xop0, byte_mode, offset);
 683
 684       /* If xop0 is a register, we need it in MAXMODE
 685          to make it acceptable to the format of insv.  */
 686       if (GET_CODE (xop0) == SUBREG)
 687         /* We can't just change the mode, because this might clobber op0,
 688            and we will need the original value of op0 if insv fails.  */
 689         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 690       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 691         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 692
 693       /* On big-endian machines, we count bits from the most significant.
 694          If the bit field insn does not, we must invert.  */
 695
 696       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 697         xbitpos = unit - bitsize - xbitpos;
 698
 699       /* We have been counting XBITPOS within UNIT.
 700          Count instead within the size of the register.  */
 701       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 702         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 703
 704       unit = GET_MODE_BITSIZE (maxmode);
 705
 706       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 707       value1 = value;
 708       if (GET_MODE (value) != maxmode)
 709         {
 710           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 711             {
 712               /* Optimization: Don't bother really extending VALUE
 713                  if it has all the bits we will actually use.  However,
 714                  if we must narrow it, be sure we do it correctly.  */
 715
 716               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 717                 {
 718                   rtx tmp;
 719
 720                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 721                   if (! tmp)
 722                     tmp = simplify_gen_subreg (maxmode,
 723                                                force_reg (GET_MODE (value),
 724                                                           value1),
 725                                                GET_MODE (value), 0);
 726                   value1 = tmp;
 727                 }
 728               else
 729                 value1 = gen_lowpart (maxmode, value1);
 730             }
 731           else if (GET_CODE (value) == CONST_INT)
 732             value1 = gen_int_mode (INTVAL (value), maxmode);
 733           else if (!CONSTANT_P (value))
 734             /* Parse phase is supposed to make VALUE's data type
 735                match that of the component reference, which is a type
 736                at least as wide as the field; so VALUE should have
 737                a mode that corresponds to that type.  */
 738             abort ();
 739         }
 740
 741       /* If this machine's insv insists on a register,
 742          get VALUE1 into a register.  */
 743       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 744              (value1, maxmode)))
 745         value1 = force_reg (maxmode, value1);
 746
 747       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 748       if (pat)
 749         emit_insn (pat);
 750       else
 751         {
 752           delete_insns_since (last);
 753           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 754         }
 755     }
 756   else
 757     insv_loses:
 758     /* Insv is not available; store using shifts and boolean ops.  */
 759     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 760   return value;
 761 }
 762 \f
 763 /* Use shifts and boolean operations to store VALUE
 764    into a bit field of width BITSIZE
 765    in a memory location specified by OP0 except offset by OFFSET bytes.
 766      (OFFSET must be 0 if OP0 is a register.)
 767    The field starts at position BITPOS within the byte.
 768     (If OP0 is a register, it may be a full word or a narrower mode,
 769      but BITPOS still counts within a full word,
 770      which is significant on bigendian machines.)  */
 771
 772 static void
 773 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 774                        unsigned HOST_WIDE_INT bitsize,
 775                        unsigned HOST_WIDE_INT bitpos, rtx value)
 776 {
 777   enum machine_mode mode;
 778   unsigned int total_bits = BITS_PER_WORD;
 779   rtx subtarget, temp;
 780   int all_zero = 0;
 781   int all_one = 0;
 782
 783   /* There is a case not handled here:
 784      a structure with a known alignment of just a halfword
 785      and a field split across two aligned halfwords within the structure.
 786      Or likewise a structure with a known alignment of just a byte
 787      and a field split across two bytes.
 788      Such cases are not supposed to be able to occur.  */
 789
 790   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 791     {
 792       if (offset != 0)
 793         abort ();
 794       /* Special treatment for a bit field split across two registers.  */
 795       if (bitsize + bitpos > BITS_PER_WORD)
 796         {
 797           store_split_bit_field (op0, bitsize, bitpos, value);
 798           return;
 799         }
 800     }
 801   else
 802     {
 803       /* Get the proper mode to use for this field.  We want a mode that
 804          includes the entire field.  If such a mode would be larger than
 805          a word, we won't be doing the extraction the normal way.
 806          We don't want a mode bigger than the destination.  */
 807
 808       mode = GET_MODE (op0);
 809       if (GET_MODE_BITSIZE (mode) == 0
 810           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 811         mode = word_mode;
 812       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 813                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 814
 815       if (mode == VOIDmode)
 816         {
 817           /* The only way this should occur is if the field spans word
 818              boundaries.  */
 819           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 820                                  value);
 821           return;
 822         }
 823
 824       total_bits = GET_MODE_BITSIZE (mode);
 825
 826       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 827          be in the range 0 to total_bits-1, and put any excess bytes in
 828          OFFSET.  */
 829       if (bitpos >= total_bits)
 830         {
 831           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 832           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 833                      * BITS_PER_UNIT);
 834         }
 835
 836       /* Get ref to an aligned byte, halfword, or word containing the field.
 837          Adjust BITPOS to be position within a word,
 838          and OFFSET to be the offset of that word.
 839          Then alter OP0 to refer to that word.  */
 840       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 841       offset -= (offset % (total_bits / BITS_PER_UNIT));
 842       op0 = adjust_address (op0, mode, offset);
 843     }
 844
 845   mode = GET_MODE (op0);
 846
 847   /* Now MODE is either some integral mode for a MEM as OP0,
 848      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 849      The bit field is contained entirely within OP0.
 850      BITPOS is the starting bit number within OP0.
 851      (OP0's mode may actually be narrower than MODE.)  */
 852
 853   if (BYTES_BIG_ENDIAN)
 854       /* BITPOS is the distance between our msb
 855          and that of the containing datum.
 856          Convert it to the distance from the lsb.  */
 857       bitpos = total_bits - bitsize - bitpos;
 858
 859   /* Now BITPOS is always the distance between our lsb
 860      and that of OP0.  */
 861
 862   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 863      we must first convert its mode to MODE.  */
 864
 865   if (GET_CODE (value) == CONST_INT)
 866     {
 867       HOST_WIDE_INT v = INTVAL (value);
 868
 869       if (bitsize < HOST_BITS_PER_WIDE_INT)
 870         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 871
 872       if (v == 0)
 873         all_zero = 1;
 874       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 875                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 876                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 877         all_one = 1;
 878
 879       value = lshift_value (mode, value, bitpos, bitsize);
 880     }
 881   else
 882     {
 883       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 884                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 885
 886       if (GET_MODE (value) != mode)
 887         {
 888           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 889               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 890             value = gen_lowpart (mode, value);
 891           else
 892             value = convert_to_mode (mode, value, 1);
 893         }
 894
 895       if (must_and)
 896         value = expand_binop (mode, and_optab, value,
 897                               mask_rtx (mode, 0, bitsize, 0),
 898                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 899       if (bitpos > 0)
 900         value = expand_shift (LSHIFT_EXPR, mode, value,
 901                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 902     }
 903
 904   /* Now clear the chosen bits in OP0,
 905      except that if VALUE is -1 we need not bother.  */
 906
 907   subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0;
 908
 909   if (! all_one)
 910     {
 911       temp = expand_binop (mode, and_optab, op0,
 912                            mask_rtx (mode, bitpos, bitsize, 1),
 913                            subtarget, 1, OPTAB_LIB_WIDEN);
 914       subtarget = temp;
 915     }
 916   else
 917     temp = op0;
 918
 919   /* Now logical-or VALUE into OP0, unless it is zero.  */
 920
 921   if (! all_zero)
 922     temp = expand_binop (mode, ior_optab, temp, value,
 923                          subtarget, 1, OPTAB_LIB_WIDEN);
 924   if (op0 != temp)
 925     emit_move_insn (op0, temp);
 926 }
 927 \f
 928 /* Store a bit field that is split across multiple accessible memory objects.
 929
 930    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 931    BITSIZE is the field width; BITPOS the position of its first bit
 932    (within the word).
 933    VALUE is the value to store.
 934
 935    This does not yet handle fields wider than BITS_PER_WORD.  */
 936
 937 static void
 938 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 939                        unsigned HOST_WIDE_INT bitpos, rtx value)
 940 {
 941   unsigned int unit;
 942   unsigned int bitsdone = 0;
 943
 944   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 945      much at a time.  */
 946   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 947     unit = BITS_PER_WORD;
 948   else
 949     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 950
 951   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 952      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 953      that VALUE might be a floating-point constant.  */
 954   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 955     {
 956       rtx word = gen_lowpart_common (word_mode, value);
 957
 958       if (word && (value != word))
 959         value = word;
 960       else
 961         value = gen_lowpart_common (word_mode,
 962                                     force_reg (GET_MODE (value) != VOIDmode
 963                                                ? GET_MODE (value)
 964                                                : word_mode, value));
 965     }
 966
 967   while (bitsdone < bitsize)
 968     {
 969       unsigned HOST_WIDE_INT thissize;
 970       rtx part, word;
 971       unsigned HOST_WIDE_INT thispos;
 972       unsigned HOST_WIDE_INT offset;
 973
 974       offset = (bitpos + bitsdone) / unit;
 975       thispos = (bitpos + bitsdone) % unit;
 976
 977       /* THISSIZE must not overrun a word boundary.  Otherwise,
 978          store_fixed_bit_field will call us again, and we will mutually
 979          recurse forever.  */
 980       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 981       thissize = MIN (thissize, unit - thispos);
 982
 983       if (BYTES_BIG_ENDIAN)
 984         {
 985           int total_bits;
 986
 987           /* We must do an endian conversion exactly the same way as it is
 988              done in extract_bit_field, so that the two calls to
 989              extract_fixed_bit_field will have comparable arguments.  */
 990           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
 991             total_bits = BITS_PER_WORD;
 992           else
 993             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 994
 995           /* Fetch successively less significant portions.  */
 996           if (GET_CODE (value) == CONST_INT)
 997             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 998                              >> (bitsize - bitsdone - thissize))
 999                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1000           else
1001             /* The args are chosen so that the last part includes the
1002                lsb.  Give extract_bit_field the value it needs (with
1003                endianness compensation) to fetch the piece we want.  */
1004             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1005                                             total_bits - bitsize + bitsdone,
1006                                             NULL_RTX, 1);
1007         }
1008       else
1009         {
1010           /* Fetch successively more significant portions.  */
1011           if (GET_CODE (value) == CONST_INT)
1012             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1013                              >> bitsdone)
1014                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1015           else
1016             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1017                                             bitsdone, NULL_RTX, 1);
1018         }
1019
1020       /* If OP0 is a register, then handle OFFSET here.
1021
1022          When handling multiword bitfields, extract_bit_field may pass
1023          down a word_mode SUBREG of a larger REG for a bitfield that actually
1024          crosses a word boundary.  Thus, for a SUBREG, we must find
1025          the current word starting from the base register.  */
1026       if (GET_CODE (op0) == SUBREG)
1027         {
1028           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1029           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1030                                         GET_MODE (SUBREG_REG (op0)));
1031           offset = 0;
1032         }
1033       else if (REG_P (op0))
1034         {
1035           word = operand_subword_force (op0, offset, GET_MODE (op0));
1036           offset = 0;
1037         }
1038       else
1039         word = op0;
1040
1041       /* OFFSET is in UNITs, and UNIT is in bits.
1042          store_fixed_bit_field wants offset in bytes.  */
1043       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1044                              thispos, part);
1045       bitsdone += thissize;
1046     }
1047 }
1048 \f
1049 /* Generate code to extract a byte-field from STR_RTX
1050    containing BITSIZE bits, starting at BITNUM,
1051    and put it in TARGET if possible (if TARGET is nonzero).
1052    Regardless of TARGET, we return the rtx for where the value is placed.
1053
1054    STR_RTX is the structure containing the byte (a REG or MEM).
1055    UNSIGNEDP is nonzero if this is an unsigned bit field.
1056    MODE is the natural mode of the field value once extracted.
1057    TMODE is the mode the caller would like the value to have;
1058    but the value may be returned with type MODE instead.
1059
1060    TOTAL_SIZE is the size in bytes of the containing structure,
1061    or -1 if varying.
1062
1063    If a TARGET is specified and we can store in it at no extra cost,
1064    we do so, and return TARGET.
1065    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1066    if they are equally easy.  */
1067
1068 rtx
1069 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1070                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1071                    enum machine_mode mode, enum machine_mode tmode)
1072 {
1073   unsigned int unit
1074     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1075   unsigned HOST_WIDE_INT offset = bitnum / unit;
1076   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1077   rtx op0 = str_rtx;
1078   rtx spec_target = target;
1079   rtx spec_target_subreg = 0;
1080   enum machine_mode int_mode;
1081   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1082   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1083   enum machine_mode mode1;
1084   int byte_offset;
1085
1086   if (tmode == VOIDmode)
1087     tmode = mode;
1088
1089   while (GET_CODE (op0) == SUBREG)
1090     {
1091       bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1092       if (bitpos > unit)
1093         {
1094           offset += (bitpos / unit);
1095           bitpos %= unit;
1096         }
1097       op0 = SUBREG_REG (op0);
1098     }
1099
1100   if (REG_P (op0)
1101       && mode == GET_MODE (op0)
1102       && bitnum == 0
1103       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1104     {
1105       /* We're trying to extract a full register from itself.  */
1106       return op0;
1107     }
1108
1109   /* Use vec_extract patterns for extracting parts of vectors whenever
1110      available.  */
1111   if (VECTOR_MODE_P (GET_MODE (op0))
1112       && !MEM_P (op0)
1113       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1114           != CODE_FOR_nothing)
1115       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1116           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1117     {
1118       enum machine_mode outermode = GET_MODE (op0);
1119       enum machine_mode innermode = GET_MODE_INNER (outermode);
1120       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1121       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1122       rtx rtxpos = GEN_INT (pos);
1123       rtx src = op0;
1124       rtx dest = NULL, pat, seq;
1125       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1126       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1127       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1128
1129       if (innermode == tmode || innermode == mode)
1130         dest = target;
1131
1132       if (!dest)
1133         dest = gen_reg_rtx (innermode);
1134
1135       start_sequence ();
1136
1137       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1138         dest = copy_to_mode_reg (mode0, dest);
1139
1140       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1141         src = copy_to_mode_reg (mode1, src);
1142
1143       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1144         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1145
1146       /* We could handle this, but we should always be called with a pseudo
1147          for our targets and all insns should take them as outputs.  */
1148       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
1149           || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
1150           || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1151         abort ();
1152
1153       pat = GEN_FCN (icode) (dest, src, rtxpos);
1154       seq = get_insns ();
1155       end_sequence ();
1156       if (pat)
1157         {
1158           emit_insn (seq);
1159           emit_insn (pat);
1160           return dest;
1161         }
1162     }
1163
1164   /* Make sure we are playing with integral modes.  Pun with subregs
1165      if we aren't.  */
1166   {
1167     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1168     if (imode != GET_MODE (op0))
1169       {
1170         if (MEM_P (op0))
1171           op0 = adjust_address (op0, imode, 0);
1172         else if (imode != BLKmode)
1173           op0 = gen_lowpart (imode, op0);
1174         else
1175           abort ();
1176       }
1177   }
1178
1179   /* We may be accessing data outside the field, which means
1180      we can alias adjacent data.  */
1181   if (MEM_P (op0))
1182     {
1183       op0 = shallow_copy_rtx (op0);
1184       set_mem_alias_set (op0, 0);
1185       set_mem_expr (op0, 0);
1186     }
1187
1188   /* Extraction of a full-word or multi-word value from a structure
1189      in a register or aligned memory can be done with just a SUBREG.
1190      A subword value in the least significant part of a register
1191      can also be extracted with a SUBREG.  For this, we need the
1192      byte offset of the value in op0.  */
1193
1194   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1195
1196   /* If OP0 is a register, BITPOS must count within a word.
1197      But as we have it, it counts within whatever size OP0 now has.
1198      On a bigendian machine, these are not the same, so convert.  */
1199   if (BYTES_BIG_ENDIAN
1200       && !MEM_P (op0)
1201       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1202     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1203
1204   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1205      If that's wrong, the solution is to test for it and set TARGET to 0
1206      if needed.  */
1207
1208   /* Only scalar integer modes can be converted via subregs.  There is an
1209      additional problem for FP modes here in that they can have a precision
1210      which is different from the size.  mode_for_size uses precision, but
1211      we want a mode based on the size, so we must avoid calling it for FP
1212      modes.  */
1213   mode1  = (SCALAR_INT_MODE_P (tmode)
1214             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1215             : mode);
1216
1217   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1218         && bitpos % BITS_PER_WORD == 0)
1219        || (mode1 != BLKmode
1220            /* ??? The big endian test here is wrong.  This is correct
1221               if the value is in a register, and if mode_for_size is not
1222               the same mode as op0.  This causes us to get unnecessarily
1223               inefficient code from the Thumb port when -mbig-endian.  */
1224            && (BYTES_BIG_ENDIAN
1225                ? bitpos + bitsize == BITS_PER_WORD
1226                : bitpos == 0)))
1227       && ((!MEM_P (op0)
1228            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1229                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1230            && GET_MODE_SIZE (mode1) != 0
1231            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1232           || (MEM_P (op0)
1233               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1234                   || (offset * BITS_PER_UNIT % bitsize == 0
1235                       && MEM_ALIGN (op0) % bitsize == 0)))))
1236     {
1237       if (mode1 != GET_MODE (op0))
1238         {
1239           if (GET_CODE (op0) == SUBREG)
1240             {
1241               if (GET_MODE (SUBREG_REG (op0)) == mode1
1242                   || GET_MODE_CLASS (mode1) == MODE_INT
1243                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1244                 op0 = SUBREG_REG (op0);
1245               else
1246                 /* Else we've got some float mode source being extracted into
1247                    a different float mode destination -- this combination of
1248                    subregs results in Severe Tire Damage.  */
1249                 goto no_subreg_mode_swap;
1250             }
1251           if (REG_P (op0))
1252             op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1253           else
1254             op0 = adjust_address (op0, mode1, offset);
1255         }
1256       if (mode1 != mode)
1257         return convert_to_mode (tmode, op0, unsignedp);
1258       return op0;
1259     }
1260  no_subreg_mode_swap:
1261
1262   /* Handle fields bigger than a word.  */
1263
1264   if (bitsize > BITS_PER_WORD)
1265     {
1266       /* Here we transfer the words of the field
1267          in the order least significant first.
1268          This is because the most significant word is the one which may
1269          be less than full.  */
1270
1271       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1272       unsigned int i;
1273
1274       if (target == 0 || !REG_P (target))
1275         target = gen_reg_rtx (mode);
1276
1277       /* Indicate for flow that the entire target reg is being set.  */
1278       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1279
1280       for (i = 0; i < nwords; i++)
1281         {
1282           /* If I is 0, use the low-order word in both field and target;
1283              if I is 1, use the next to lowest word; and so on.  */
1284           /* Word number in TARGET to use.  */
1285           unsigned int wordnum
1286             = (WORDS_BIG_ENDIAN
1287                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1288                : i);
1289           /* Offset from start of field in OP0.  */
1290           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1291                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1292                                                 * (int) BITS_PER_WORD))
1293                                      : (int) i * BITS_PER_WORD);
1294           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1295           rtx result_part
1296             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1297                                            bitsize - i * BITS_PER_WORD),
1298                                  bitnum + bit_offset, 1, target_part, mode,
1299                                  word_mode);
1300
1301           if (target_part == 0)
1302             abort ();
1303
1304           if (result_part != target_part)
1305             emit_move_insn (target_part, result_part);
1306         }
1307
1308       if (unsignedp)
1309         {
1310           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1311              need to be zero'd out.  */
1312           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1313             {
1314               unsigned int i, total_words;
1315
1316               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1317               for (i = nwords; i < total_words; i++)
1318                 emit_move_insn
1319                   (operand_subword (target,
1320                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1321                                     1, VOIDmode),
1322                    const0_rtx);
1323             }
1324           return target;
1325         }
1326
1327       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1328       target = expand_shift (LSHIFT_EXPR, mode, target,
1329                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1330                              NULL_RTX, 0);
1331       return expand_shift (RSHIFT_EXPR, mode, target,
1332                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1333                            NULL_RTX, 0);
1334     }
1335
1336   /* From here on we know the desired field is smaller than a word.  */
1337
1338   /* Check if there is a correspondingly-sized integer field, so we can
1339      safely extract it as one size of integer, if necessary; then
1340      truncate or extend to the size that is wanted; then use SUBREGs or
1341      convert_to_mode to get one of the modes we really wanted.  */
1342
1343   int_mode = int_mode_for_mode (tmode);
1344   if (int_mode == BLKmode)
1345     int_mode = int_mode_for_mode (mode);
1346   if (int_mode == BLKmode)
1347     abort ();    /* Should probably push op0 out to memory and then
1348                     do a load.  */
1349
1350   /* OFFSET is the number of words or bytes (UNIT says which)
1351      from STR_RTX to the first word or byte containing part of the field.  */
1352
1353   if (!MEM_P (op0))
1354     {
1355       if (offset != 0
1356           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1357         {
1358           if (!REG_P (op0))
1359             op0 = copy_to_reg (op0);
1360           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1361                                 op0, (offset * UNITS_PER_WORD));
1362         }
1363       offset = 0;
1364     }
1365
1366   /* Now OFFSET is nonzero only for memory operands.  */
1367
1368   if (unsignedp)
1369     {
1370       if (HAVE_extzv
1371           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1372           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1373                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1374         {
1375           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1376           rtx bitsize_rtx, bitpos_rtx;
1377           rtx last = get_last_insn ();
1378           rtx xop0 = op0;
1379           rtx xtarget = target;
1380           rtx xspec_target = spec_target;
1381           rtx xspec_target_subreg = spec_target_subreg;
1382           rtx pat;
1383           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1384
1385           if (MEM_P (xop0))
1386             {
1387               int save_volatile_ok = volatile_ok;
1388               volatile_ok = 1;
1389
1390               /* Is the memory operand acceptable?  */
1391               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1392                      (xop0, GET_MODE (xop0))))
1393                 {
1394                   /* No, load into a reg and extract from there.  */
1395                   enum machine_mode bestmode;
1396
1397                   /* Get the mode to use for inserting into this field.  If
1398                      OP0 is BLKmode, get the smallest mode consistent with the
1399                      alignment. If OP0 is a non-BLKmode object that is no
1400                      wider than MAXMODE, use its mode. Otherwise, use the
1401                      smallest mode containing the field.  */
1402
1403                   if (GET_MODE (xop0) == BLKmode
1404                       || (GET_MODE_SIZE (GET_MODE (op0))
1405                           > GET_MODE_SIZE (maxmode)))
1406                     bestmode = get_best_mode (bitsize, bitnum,
1407                                               MEM_ALIGN (xop0), maxmode,
1408                                               MEM_VOLATILE_P (xop0));
1409                   else
1410                     bestmode = GET_MODE (xop0);
1411
1412                   if (bestmode == VOIDmode
1413                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1414                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1415                     goto extzv_loses;
1416
1417                   /* Compute offset as multiple of this unit,
1418                      counting in bytes.  */
1419                   unit = GET_MODE_BITSIZE (bestmode);
1420                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1421                   xbitpos = bitnum % unit;
1422                   xop0 = adjust_address (xop0, bestmode, xoffset);
1423
1424                   /* Fetch it to a register in that size.  */
1425                   xop0 = force_reg (bestmode, xop0);
1426
1427                   /* XBITPOS counts within UNIT, which is what is expected.  */
1428                 }
1429               else
1430                 /* Get ref to first byte containing part of the field.  */
1431                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1432
1433               volatile_ok = save_volatile_ok;
1434             }
1435
1436           /* If op0 is a register, we need it in MAXMODE (which is usually
1437              SImode). to make it acceptable to the format of extzv.  */
1438           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1439             goto extzv_loses;
1440           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1441             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1442
1443           /* On big-endian machines, we count bits from the most significant.
1444              If the bit field insn does not, we must invert.  */
1445           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1446             xbitpos = unit - bitsize - xbitpos;
1447
1448           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1449           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1450             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1451
1452           unit = GET_MODE_BITSIZE (maxmode);
1453
1454           if (xtarget == 0
1455               || (flag_force_mem && MEM_P (xtarget)))
1456             xtarget = xspec_target = gen_reg_rtx (tmode);
1457
1458           if (GET_MODE (xtarget) != maxmode)
1459             {
1460               if (REG_P (xtarget))
1461                 {
1462                   int wider = (GET_MODE_SIZE (maxmode)
1463                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1464                   xtarget = gen_lowpart (maxmode, xtarget);
1465                   if (wider)
1466                     xspec_target_subreg = xtarget;
1467                 }
1468               else
1469                 xtarget = gen_reg_rtx (maxmode);
1470             }
1471
1472           /* If this machine's extzv insists on a register target,
1473              make sure we have one.  */
1474           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1475                  (xtarget, maxmode)))
1476             xtarget = gen_reg_rtx (maxmode);
1477
1478           bitsize_rtx = GEN_INT (bitsize);
1479           bitpos_rtx = GEN_INT (xbitpos);
1480
1481           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1482           if (pat)
1483             {
1484               emit_insn (pat);
1485               target = xtarget;
1486               spec_target = xspec_target;
1487               spec_target_subreg = xspec_target_subreg;
1488             }
1489           else
1490             {
1491               delete_insns_since (last);
1492               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1493                                                 bitpos, target, 1);
1494             }
1495         }
1496       else
1497       extzv_loses:
1498         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1499                                           bitpos, target, 1);
1500     }
1501   else
1502     {
1503       if (HAVE_extv
1504           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1505           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1506                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1507         {
1508           int xbitpos = bitpos, xoffset = offset;
1509           rtx bitsize_rtx, bitpos_rtx;
1510           rtx last = get_last_insn ();
1511           rtx xop0 = op0, xtarget = target;
1512           rtx xspec_target = spec_target;
1513           rtx xspec_target_subreg = spec_target_subreg;
1514           rtx pat;
1515           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1516
1517           if (MEM_P (xop0))
1518             {
1519               /* Is the memory operand acceptable?  */
1520               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1521                      (xop0, GET_MODE (xop0))))
1522                 {
1523                   /* No, load into a reg and extract from there.  */
1524                   enum machine_mode bestmode;
1525
1526                   /* Get the mode to use for inserting into this field.  If
1527                      OP0 is BLKmode, get the smallest mode consistent with the
1528                      alignment. If OP0 is a non-BLKmode object that is no
1529                      wider than MAXMODE, use its mode. Otherwise, use the
1530                      smallest mode containing the field.  */
1531
1532                   if (GET_MODE (xop0) == BLKmode
1533                       || (GET_MODE_SIZE (GET_MODE (op0))
1534                           > GET_MODE_SIZE (maxmode)))
1535                     bestmode = get_best_mode (bitsize, bitnum,
1536                                               MEM_ALIGN (xop0), maxmode,
1537                                               MEM_VOLATILE_P (xop0));
1538                   else
1539                     bestmode = GET_MODE (xop0);
1540
1541                   if (bestmode == VOIDmode
1542                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1543                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1544                     goto extv_loses;
1545
1546                   /* Compute offset as multiple of this unit,
1547                      counting in bytes.  */
1548                   unit = GET_MODE_BITSIZE (bestmode);
1549                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1550                   xbitpos = bitnum % unit;
1551                   xop0 = adjust_address (xop0, bestmode, xoffset);
1552
1553                   /* Fetch it to a register in that size.  */
1554                   xop0 = force_reg (bestmode, xop0);
1555
1556                   /* XBITPOS counts within UNIT, which is what is expected.  */
1557                 }
1558               else
1559                 /* Get ref to first byte containing part of the field.  */
1560                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1561             }
1562
1563           /* If op0 is a register, we need it in MAXMODE (which is usually
1564              SImode) to make it acceptable to the format of extv.  */
1565           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1566             goto extv_loses;
1567           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1568             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1569
1570           /* On big-endian machines, we count bits from the most significant.
1571              If the bit field insn does not, we must invert.  */
1572           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1573             xbitpos = unit - bitsize - xbitpos;
1574
1575           /* XBITPOS counts within a size of UNIT.
1576              Adjust to count within a size of MAXMODE.  */
1577           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1578             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1579
1580           unit = GET_MODE_BITSIZE (maxmode);
1581
1582           if (xtarget == 0
1583               || (flag_force_mem && MEM_P (xtarget)))
1584             xtarget = xspec_target = gen_reg_rtx (tmode);
1585
1586           if (GET_MODE (xtarget) != maxmode)
1587             {
1588               if (REG_P (xtarget))
1589                 {
1590                   int wider = (GET_MODE_SIZE (maxmode)
1591                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1592                   xtarget = gen_lowpart (maxmode, xtarget);
1593                   if (wider)
1594                     xspec_target_subreg = xtarget;
1595                 }
1596               else
1597                 xtarget = gen_reg_rtx (maxmode);
1598             }
1599
1600           /* If this machine's extv insists on a register target,
1601              make sure we have one.  */
1602           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1603                  (xtarget, maxmode)))
1604             xtarget = gen_reg_rtx (maxmode);
1605
1606           bitsize_rtx = GEN_INT (bitsize);
1607           bitpos_rtx = GEN_INT (xbitpos);
1608
1609           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1610           if (pat)
1611             {
1612               emit_insn (pat);
1613               target = xtarget;
1614               spec_target = xspec_target;
1615               spec_target_subreg = xspec_target_subreg;
1616             }
1617           else
1618             {
1619               delete_insns_since (last);
1620               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1621                                                 bitpos, target, 0);
1622             }
1623         }
1624       else
1625       extv_loses:
1626         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1627                                           bitpos, target, 0);
1628     }
1629   if (target == spec_target)
1630     return target;
1631   if (target == spec_target_subreg)
1632     return spec_target;
1633   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1634     {
1635       /* If the target mode is floating-point, first convert to the
1636          integer mode of that size and then access it as a floating-point
1637          value via a SUBREG.  */
1638       if (GET_MODE_CLASS (tmode) != MODE_INT
1639           && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1640         {
1641           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1642                                                    MODE_INT, 0),
1643                                     target, unsignedp);
1644           return gen_lowpart (tmode, target);
1645         }
1646       else
1647         return convert_to_mode (tmode, target, unsignedp);
1648     }
1649   return target;
1650 }
1651 \f
1652 /* Extract a bit field using shifts and boolean operations
1653    Returns an rtx to represent the value.
1654    OP0 addresses a register (word) or memory (byte).
1655    BITPOS says which bit within the word or byte the bit field starts in.
1656    OFFSET says how many bytes farther the bit field starts;
1657     it is 0 if OP0 is a register.
1658    BITSIZE says how many bits long the bit field is.
1659     (If OP0 is a register, it may be narrower than a full word,
1660      but BITPOS still counts within a full word,
1661      which is significant on bigendian machines.)
1662
1663    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1664    If TARGET is nonzero, attempts to store the value there
1665    and return TARGET, but this is not guaranteed.
1666    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1667
1668 static rtx
1669 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1670                          unsigned HOST_WIDE_INT offset,
1671                          unsigned HOST_WIDE_INT bitsize,
1672                          unsigned HOST_WIDE_INT bitpos, rtx target,
1673                          int unsignedp)
1674 {
1675   unsigned int total_bits = BITS_PER_WORD;
1676   enum machine_mode mode;
1677
1678   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1679     {
1680       /* Special treatment for a bit field split across two registers.  */
1681       if (bitsize + bitpos > BITS_PER_WORD)
1682         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1683     }
1684   else
1685     {
1686       /* Get the proper mode to use for this field.  We want a mode that
1687          includes the entire field.  If such a mode would be larger than
1688          a word, we won't be doing the extraction the normal way.  */
1689
1690       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1691                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1692
1693       if (mode == VOIDmode)
1694         /* The only way this should occur is if the field spans word
1695            boundaries.  */
1696         return extract_split_bit_field (op0, bitsize,
1697                                         bitpos + offset * BITS_PER_UNIT,
1698                                         unsignedp);
1699
1700       total_bits = GET_MODE_BITSIZE (mode);
1701
1702       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1703          be in the range 0 to total_bits-1, and put any excess bytes in
1704          OFFSET.  */
1705       if (bitpos >= total_bits)
1706         {
1707           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1708           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1709                      * BITS_PER_UNIT);
1710         }
1711
1712       /* Get ref to an aligned byte, halfword, or word containing the field.
1713          Adjust BITPOS to be position within a word,
1714          and OFFSET to be the offset of that word.
1715          Then alter OP0 to refer to that word.  */
1716       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1717       offset -= (offset % (total_bits / BITS_PER_UNIT));
1718       op0 = adjust_address (op0, mode, offset);
1719     }
1720
1721   mode = GET_MODE (op0);
1722
1723   if (BYTES_BIG_ENDIAN)
1724     /* BITPOS is the distance between our msb and that of OP0.
1725        Convert it to the distance from the lsb.  */
1726     bitpos = total_bits - bitsize - bitpos;
1727
1728   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1729      We have reduced the big-endian case to the little-endian case.  */
1730
1731   if (unsignedp)
1732     {
1733       if (bitpos)
1734         {
1735           /* If the field does not already start at the lsb,
1736              shift it so it does.  */
1737           tree amount = build_int_2 (bitpos, 0);
1738           /* Maybe propagate the target for the shift.  */
1739           /* But not if we will return it--could confuse integrate.c.  */
1740           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1741           if (tmode != mode) subtarget = 0;
1742           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1743         }
1744       /* Convert the value to the desired mode.  */
1745       if (mode != tmode)
1746         op0 = convert_to_mode (tmode, op0, 1);
1747
1748       /* Unless the msb of the field used to be the msb when we shifted,
1749          mask out the upper bits.  */
1750
1751       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1752         return expand_binop (GET_MODE (op0), and_optab, op0,
1753                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1754                              target, 1, OPTAB_LIB_WIDEN);
1755       return op0;
1756     }
1757
1758   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1759      then arithmetic-shift its lsb to the lsb of the word.  */
1760   op0 = force_reg (mode, op0);
1761   if (mode != tmode)
1762     target = 0;
1763
1764   /* Find the narrowest integer mode that contains the field.  */
1765
1766   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1767        mode = GET_MODE_WIDER_MODE (mode))
1768     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1769       {
1770         op0 = convert_to_mode (mode, op0, 0);
1771         break;
1772       }
1773
1774   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1775     {
1776       tree amount
1777         = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1778       /* Maybe propagate the target for the shift.  */
1779       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1780       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1781     }
1782
1783   return expand_shift (RSHIFT_EXPR, mode, op0,
1784                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1785                        target, 0);
1786 }
1787 \f
1788 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1789    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1790    complement of that if COMPLEMENT.  The mask is truncated if
1791    necessary to the width of mode MODE.  The mask is zero-extended if
1792    BITSIZE+BITPOS is too small for MODE.  */
1793
1794 static rtx
1795 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1796 {
1797   HOST_WIDE_INT masklow, maskhigh;
1798
1799   if (bitsize == 0)
1800     masklow = 0;
1801   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1802     masklow = (HOST_WIDE_INT) -1 << bitpos;
1803   else
1804     masklow = 0;
1805
1806   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1807     masklow &= ((unsigned HOST_WIDE_INT) -1
1808                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1809
1810   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1811     maskhigh = -1;
1812   else
1813     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1814
1815   if (bitsize == 0)
1816     maskhigh = 0;
1817   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1818     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1819                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1820   else
1821     maskhigh = 0;
1822
1823   if (complement)
1824     {
1825       maskhigh = ~maskhigh;
1826       masklow = ~masklow;
1827     }
1828
1829   return immed_double_const (masklow, maskhigh, mode);
1830 }
1831
1832 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1833    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1834
1835 static rtx
1836 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1837 {
1838   unsigned HOST_WIDE_INT v = INTVAL (value);
1839   HOST_WIDE_INT low, high;
1840
1841   if (bitsize < HOST_BITS_PER_WIDE_INT)
1842     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1843
1844   if (bitpos < HOST_BITS_PER_WIDE_INT)
1845     {
1846       low = v << bitpos;
1847       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1848     }
1849   else
1850     {
1851       low = 0;
1852       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1853     }
1854
1855   return immed_double_const (low, high, mode);
1856 }
1857 \f
1858 /* Extract a bit field that is split across two words
1859    and return an RTX for the result.
1860
1861    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1862    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1863    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1864
1865 static rtx
1866 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1867                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1868 {
1869   unsigned int unit;
1870   unsigned int bitsdone = 0;
1871   rtx result = NULL_RTX;
1872   int first = 1;
1873
1874   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1875      much at a time.  */
1876   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1877     unit = BITS_PER_WORD;
1878   else
1879     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1880
1881   while (bitsdone < bitsize)
1882     {
1883       unsigned HOST_WIDE_INT thissize;
1884       rtx part, word;
1885       unsigned HOST_WIDE_INT thispos;
1886       unsigned HOST_WIDE_INT offset;
1887
1888       offset = (bitpos + bitsdone) / unit;
1889       thispos = (bitpos + bitsdone) % unit;
1890
1891       /* THISSIZE must not overrun a word boundary.  Otherwise,
1892          extract_fixed_bit_field will call us again, and we will mutually
1893          recurse forever.  */
1894       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1895       thissize = MIN (thissize, unit - thispos);
1896
1897       /* If OP0 is a register, then handle OFFSET here.
1898
1899          When handling multiword bitfields, extract_bit_field may pass
1900          down a word_mode SUBREG of a larger REG for a bitfield that actually
1901          crosses a word boundary.  Thus, for a SUBREG, we must find
1902          the current word starting from the base register.  */
1903       if (GET_CODE (op0) == SUBREG)
1904         {
1905           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1906           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1907                                         GET_MODE (SUBREG_REG (op0)));
1908           offset = 0;
1909         }
1910       else if (REG_P (op0))
1911         {
1912           word = operand_subword_force (op0, offset, GET_MODE (op0));
1913           offset = 0;
1914         }
1915       else
1916         word = op0;
1917
1918       /* Extract the parts in bit-counting order,
1919          whose meaning is determined by BYTES_PER_UNIT.
1920          OFFSET is in UNITs, and UNIT is in bits.
1921          extract_fixed_bit_field wants offset in bytes.  */
1922       part = extract_fixed_bit_field (word_mode, word,
1923                                       offset * unit / BITS_PER_UNIT,
1924                                       thissize, thispos, 0, 1);
1925       bitsdone += thissize;
1926
1927       /* Shift this part into place for the result.  */
1928       if (BYTES_BIG_ENDIAN)
1929         {
1930           if (bitsize != bitsdone)
1931             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1932                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1933         }
1934       else
1935         {
1936           if (bitsdone != thissize)
1937             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1938                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1939         }
1940
1941       if (first)
1942         result = part;
1943       else
1944         /* Combine the parts with bitwise or.  This works
1945            because we extracted each part as an unsigned bit field.  */
1946         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1947                                OPTAB_LIB_WIDEN);
1948
1949       first = 0;
1950     }
1951
1952   /* Unsigned bit field: we are done.  */
1953   if (unsignedp)
1954     return result;
1955   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1956   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1957                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1958                          NULL_RTX, 0);
1959   return expand_shift (RSHIFT_EXPR, word_mode, result,
1960                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1961 }
1962 \f
1963 /* Add INC into TARGET.  */
1964
1965 void
1966 expand_inc (rtx target, rtx inc)
1967 {
1968   rtx value = expand_binop (GET_MODE (target), add_optab,
1969                             target, inc,
1970                             target, 0, OPTAB_LIB_WIDEN);
1971   if (value != target)
1972     emit_move_insn (target, value);
1973 }
1974
1975 /* Subtract DEC from TARGET.  */
1976
1977 void
1978 expand_dec (rtx target, rtx dec)
1979 {
1980   rtx value = expand_binop (GET_MODE (target), sub_optab,
1981                             target, dec,
1982                             target, 0, OPTAB_LIB_WIDEN);
1983   if (value != target)
1984     emit_move_insn (target, value);
1985 }
1986 \f
1987 /* Output a shift instruction for expression code CODE,
1988    with SHIFTED being the rtx for the value to shift,
1989    and AMOUNT the tree for the amount to shift by.
1990    Store the result in the rtx TARGET, if that is convenient.
1991    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1992    Return the rtx for where the value is.  */
1993
1994 rtx
1995 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
1996               tree amount, rtx target, int unsignedp)
1997 {
1998   rtx op1, temp = 0;
1999   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2000   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2001   int try;
2002
2003   /* Previously detected shift-counts computed by NEGATE_EXPR
2004      and shifted in the other direction; but that does not work
2005      on all machines.  */
2006
2007   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2008
2009   if (SHIFT_COUNT_TRUNCATED)
2010     {
2011       if (GET_CODE (op1) == CONST_INT
2012           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2013               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2014         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2015                        % GET_MODE_BITSIZE (mode));
2016       else if (GET_CODE (op1) == SUBREG
2017                && subreg_lowpart_p (op1))
2018         op1 = SUBREG_REG (op1);
2019     }
2020
2021   if (op1 == const0_rtx)
2022     return shifted;
2023
2024   /* Check whether its cheaper to implement a left shift by a constant
2025      bit count by a sequence of additions.  */
2026   if (code == LSHIFT_EXPR
2027       && GET_CODE (op1) == CONST_INT
2028       && INTVAL (op1) > 0
2029       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2030       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2031     {
2032       int i;
2033       for (i = 0; i < INTVAL (op1); i++)
2034         {
2035           temp = force_reg (mode, shifted);
2036           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2037                                   unsignedp, OPTAB_LIB_WIDEN);
2038         }
2039       return shifted;
2040     }
2041
2042   for (try = 0; temp == 0 && try < 3; try++)
2043     {
2044       enum optab_methods methods;
2045
2046       if (try == 0)
2047         methods = OPTAB_DIRECT;
2048       else if (try == 1)
2049         methods = OPTAB_WIDEN;
2050       else
2051         methods = OPTAB_LIB_WIDEN;
2052
2053       if (rotate)
2054         {
2055           /* Widening does not work for rotation.  */
2056           if (methods == OPTAB_WIDEN)
2057             continue;
2058           else if (methods == OPTAB_LIB_WIDEN)
2059             {
2060               /* If we have been unable to open-code this by a rotation,
2061                  do it as the IOR of two shifts.  I.e., to rotate A
2062                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2063                  where C is the bitsize of A.
2064
2065                  It is theoretically possible that the target machine might
2066                  not be able to perform either shift and hence we would
2067                  be making two libcalls rather than just the one for the
2068                  shift (similarly if IOR could not be done).  We will allow
2069                  this extremely unlikely lossage to avoid complicating the
2070                  code below.  */
2071
2072               rtx subtarget = target == shifted ? 0 : target;
2073               rtx temp1;
2074               tree type = TREE_TYPE (amount);
2075               tree new_amount = make_tree (type, op1);
2076               tree other_amount
2077                 = fold (build (MINUS_EXPR, type,
2078                                convert (type,
2079                                         build_int_2 (GET_MODE_BITSIZE (mode),
2080                                                      0)),
2081                                amount));
2082
2083               shifted = force_reg (mode, shifted);
2084
2085               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2086                                    mode, shifted, new_amount, subtarget, 1);
2087               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2088                                     mode, shifted, other_amount, 0, 1);
2089               return expand_binop (mode, ior_optab, temp, temp1, target,
2090                                    unsignedp, methods);
2091             }
2092
2093           temp = expand_binop (mode,
2094                                left ? rotl_optab : rotr_optab,
2095                                shifted, op1, target, unsignedp, methods);
2096
2097           /* If we don't have the rotate, but we are rotating by a constant
2098              that is in range, try a rotate in the opposite direction.  */
2099
2100           if (temp == 0 && GET_CODE (op1) == CONST_INT
2101               && INTVAL (op1) > 0
2102               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2103             temp = expand_binop (mode,
2104                                  left ? rotr_optab : rotl_optab,
2105                                  shifted,
2106                                  GEN_INT (GET_MODE_BITSIZE (mode)
2107                                           - INTVAL (op1)),
2108                                  target, unsignedp, methods);
2109         }
2110       else if (unsignedp)
2111         temp = expand_binop (mode,
2112                              left ? ashl_optab : lshr_optab,
2113                              shifted, op1, target, unsignedp, methods);
2114
2115       /* Do arithmetic shifts.
2116          Also, if we are going to widen the operand, we can just as well
2117          use an arithmetic right-shift instead of a logical one.  */
2118       if (temp == 0 && ! rotate
2119           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2120         {
2121           enum optab_methods methods1 = methods;
2122
2123           /* If trying to widen a log shift to an arithmetic shift,
2124              don't accept an arithmetic shift of the same size.  */
2125           if (unsignedp)
2126             methods1 = OPTAB_MUST_WIDEN;
2127
2128           /* Arithmetic shift */
2129
2130           temp = expand_binop (mode,
2131                                left ? ashl_optab : ashr_optab,
2132                                shifted, op1, target, unsignedp, methods1);
2133         }
2134
2135       /* We used to try extzv here for logical right shifts, but that was
2136          only useful for one machine, the VAX, and caused poor code
2137          generation there for lshrdi3, so the code was deleted and a
2138          define_expand for lshrsi3 was added to vax.md.  */
2139     }
2140
2141   if (temp == 0)
2142     abort ();
2143   return temp;
2144 }
2145 \f
2146 enum alg_code { alg_zero, alg_m, alg_shift,
2147                   alg_add_t_m2, alg_sub_t_m2,
2148                   alg_add_factor, alg_sub_factor,
2149                   alg_add_t2_m, alg_sub_t2_m,
2150                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2151
2152 /* This structure records a sequence of operations.
2153    `ops' is the number of operations recorded.
2154    `cost' is their total cost.
2155    The operations are stored in `op' and the corresponding
2156    logarithms of the integer coefficients in `log'.
2157
2158    These are the operations:
2159    alg_zero             total := 0;
2160    alg_m                total := multiplicand;
2161    alg_shift            total := total * coeff
2162    alg_add_t_m2         total := total + multiplicand * coeff;
2163    alg_sub_t_m2         total := total - multiplicand * coeff;
2164    alg_add_factor       total := total * coeff + total;
2165    alg_sub_factor       total := total * coeff - total;
2166    alg_add_t2_m         total := total * coeff + multiplicand;
2167    alg_sub_t2_m         total := total * coeff - multiplicand;
2168
2169    The first operand must be either alg_zero or alg_m.  */
2170
2171 struct algorithm
2172 {
2173   short cost;
2174   short ops;
2175   /* The size of the OP and LOG fields are not directly related to the
2176      word size, but the worst-case algorithms will be if we have few
2177      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2178      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2179      in total wordsize operations.  */
2180   enum alg_code op[MAX_BITS_PER_WORD];
2181   char log[MAX_BITS_PER_WORD];
2182 };
2183
2184 /* Indicates the type of fixup needed after a constant multiplication.
2185    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2186    the result should be negated, and ADD_VARIANT means that the
2187    multiplicand should be added to the result.  */
2188 enum mult_variant {basic_variant, negate_variant, add_variant};
2189
2190 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2191                         int, enum machine_mode mode);
2192 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2193                                  struct algorithm *, enum mult_variant *, int);
2194 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2195                               const struct algorithm *, enum mult_variant);
2196 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2197                                                  int, unsigned HOST_WIDE_INT *,
2198                                                  int *, int *);
2199 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2200 static rtx extract_high_half (enum machine_mode, rtx);
2201 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2202                                        int, int);
2203 /* Compute and return the best algorithm for multiplying by T.
2204    The algorithm must cost less than cost_limit
2205    If retval.cost >= COST_LIMIT, no algorithm was found and all
2206    other field of the returned struct are undefined.
2207    MODE is the machine mode of the multiplication.  */
2208
2209 static void
2210 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2211             int cost_limit, enum machine_mode mode)
2212 {
2213   int m;
2214   struct algorithm *alg_in, *best_alg;
2215   int cost;
2216   unsigned HOST_WIDE_INT q;
2217   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2218
2219   /* Indicate that no algorithm is yet found.  If no algorithm
2220      is found, this value will be returned and indicate failure.  */
2221   alg_out->cost = cost_limit;
2222
2223   if (cost_limit <= 0)
2224     return;
2225
2226   /* Restrict the bits of "t" to the multiplication's mode.  */
2227   t &= GET_MODE_MASK (mode);
2228
2229   /* t == 1 can be done in zero cost.  */
2230   if (t == 1)
2231     {
2232       alg_out->ops = 1;
2233       alg_out->cost = 0;
2234       alg_out->op[0] = alg_m;
2235       return;
2236     }
2237
2238   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2239      fail now.  */
2240   if (t == 0)
2241     {
2242       if (zero_cost >= cost_limit)
2243         return;
2244       else
2245         {
2246           alg_out->ops = 1;
2247           alg_out->cost = zero_cost;
2248           alg_out->op[0] = alg_zero;
2249           return;
2250         }
2251     }
2252
2253   /* We'll be needing a couple extra algorithm structures now.  */
2254
2255   alg_in = alloca (sizeof (struct algorithm));
2256   best_alg = alloca (sizeof (struct algorithm));
2257
2258   /* If we have a group of zero bits at the low-order part of T, try
2259      multiplying by the remaining bits and then doing a shift.  */
2260
2261   if ((t & 1) == 0)
2262     {
2263       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2264       if (m < maxm)
2265         {
2266           q = t >> m;
2267           /* The function expand_shift will choose between a shift and
2268              a sequence of additions, so the observed cost is given as
2269              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2270           cost = m * add_cost[mode];
2271           if (shift_cost[mode][m] < cost)
2272             cost = shift_cost[mode][m];
2273           synth_mult (alg_in, q, cost_limit - cost, mode);
2274
2275           cost += alg_in->cost;
2276           if (cost < cost_limit)
2277             {
2278               struct algorithm *x;
2279               x = alg_in, alg_in = best_alg, best_alg = x;
2280               best_alg->log[best_alg->ops] = m;
2281               best_alg->op[best_alg->ops] = alg_shift;
2282               cost_limit = cost;
2283             }
2284         }
2285     }
2286
2287   /* If we have an odd number, add or subtract one.  */
2288   if ((t & 1) != 0)
2289     {
2290       unsigned HOST_WIDE_INT w;
2291
2292       for (w = 1; (w & t) != 0; w <<= 1)
2293         ;
2294       /* If T was -1, then W will be zero after the loop.  This is another
2295          case where T ends with ...111.  Handling this with (T + 1) and
2296          subtract 1 produces slightly better code and results in algorithm
2297          selection much faster than treating it like the ...0111 case
2298          below.  */
2299       if (w == 0
2300           || (w > 2
2301               /* Reject the case where t is 3.
2302                  Thus we prefer addition in that case.  */
2303               && t != 3))
2304         {
2305           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2306
2307           cost = add_cost[mode];
2308           synth_mult (alg_in, t + 1, cost_limit - cost, mode);
2309
2310           cost += alg_in->cost;
2311           if (cost < cost_limit)
2312             {
2313               struct algorithm *x;
2314               x = alg_in, alg_in = best_alg, best_alg = x;
2315               best_alg->log[best_alg->ops] = 0;
2316               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2317               cost_limit = cost;
2318             }
2319         }
2320       else
2321         {
2322           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2323
2324           cost = add_cost[mode];
2325           synth_mult (alg_in, t - 1, cost_limit - cost, mode);
2326
2327           cost += alg_in->cost;
2328           if (cost < cost_limit)
2329             {
2330               struct algorithm *x;
2331               x = alg_in, alg_in = best_alg, best_alg = x;
2332               best_alg->log[best_alg->ops] = 0;
2333               best_alg->op[best_alg->ops] = alg_add_t_m2;
2334               cost_limit = cost;
2335             }
2336         }
2337     }
2338
2339   /* Look for factors of t of the form
2340      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2341      If we find such a factor, we can multiply by t using an algorithm that
2342      multiplies by q, shift the result by m and add/subtract it to itself.
2343
2344      We search for large factors first and loop down, even if large factors
2345      are less probable than small; if we find a large factor we will find a
2346      good sequence quickly, and therefore be able to prune (by decreasing
2347      COST_LIMIT) the search.  */
2348
2349   for (m = floor_log2 (t - 1); m >= 2; m--)
2350     {
2351       unsigned HOST_WIDE_INT d;
2352
2353       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2354       if (t % d == 0 && t > d && m < maxm)
2355         {
2356           cost = add_cost[mode] + shift_cost[mode][m];
2357           if (shiftadd_cost[mode][m] < cost)
2358             cost = shiftadd_cost[mode][m];
2359           synth_mult (alg_in, t / d, cost_limit - cost, mode);
2360
2361           cost += alg_in->cost;
2362           if (cost < cost_limit)
2363             {
2364               struct algorithm *x;
2365               x = alg_in, alg_in = best_alg, best_alg = x;
2366               best_alg->log[best_alg->ops] = m;
2367               best_alg->op[best_alg->ops] = alg_add_factor;
2368               cost_limit = cost;
2369             }
2370           /* Other factors will have been taken care of in the recursion.  */
2371           break;
2372         }
2373
2374       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2375       if (t % d == 0 && t > d && m < maxm)
2376         {
2377           cost = add_cost[mode] + shift_cost[mode][m];
2378           if (shiftsub_cost[mode][m] < cost)
2379             cost = shiftsub_cost[mode][m];
2380           synth_mult (alg_in, t / d, cost_limit - cost, mode);
2381
2382           cost += alg_in->cost;
2383           if (cost < cost_limit)
2384             {
2385               struct algorithm *x;
2386               x = alg_in, alg_in = best_alg, best_alg = x;
2387               best_alg->log[best_alg->ops] = m;
2388               best_alg->op[best_alg->ops] = alg_sub_factor;
2389               cost_limit = cost;
2390             }
2391           break;
2392         }
2393     }
2394
2395   /* Try shift-and-add (load effective address) instructions,
2396      i.e. do a*3, a*5, a*9.  */
2397   if ((t & 1) != 0)
2398     {
2399       q = t - 1;
2400       q = q & -q;
2401       m = exact_log2 (q);
2402       if (m >= 0 && m < maxm)
2403         {
2404           cost = shiftadd_cost[mode][m];
2405           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost, mode);
2406
2407           cost += alg_in->cost;
2408           if (cost < cost_limit)
2409             {
2410               struct algorithm *x;
2411               x = alg_in, alg_in = best_alg, best_alg = x;
2412               best_alg->log[best_alg->ops] = m;
2413               best_alg->op[best_alg->ops] = alg_add_t2_m;
2414               cost_limit = cost;
2415             }
2416         }
2417
2418       q = t + 1;
2419       q = q & -q;
2420       m = exact_log2 (q);
2421       if (m >= 0 && m < maxm)
2422         {
2423           cost = shiftsub_cost[mode][m];
2424           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost, mode);
2425
2426           cost += alg_in->cost;
2427           if (cost < cost_limit)
2428             {
2429               struct algorithm *x;
2430               x = alg_in, alg_in = best_alg, best_alg = x;
2431               best_alg->log[best_alg->ops] = m;
2432               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2433               cost_limit = cost;
2434             }
2435         }
2436     }
2437
2438   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2439      we have not found any algorithm.  */
2440   if (cost_limit == alg_out->cost)
2441     return;
2442
2443   /* If we are getting a too long sequence for `struct algorithm'
2444      to record, make this search fail.  */
2445   if (best_alg->ops == MAX_BITS_PER_WORD)
2446     return;
2447
2448   /* Copy the algorithm from temporary space to the space at alg_out.
2449      We avoid using structure assignment because the majority of
2450      best_alg is normally undefined, and this is a critical function.  */
2451   alg_out->ops = best_alg->ops + 1;
2452   alg_out->cost = cost_limit;
2453   memcpy (alg_out->op, best_alg->op,
2454           alg_out->ops * sizeof *alg_out->op);
2455   memcpy (alg_out->log, best_alg->log,
2456           alg_out->ops * sizeof *alg_out->log);
2457 }
2458 \f
2459 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2460    Try three variations:
2461
2462        - a shift/add sequence based on VAL itself
2463        - a shift/add sequence based on -VAL, followed by a negation
2464        - a shift/add sequence based on VAL - 1, followed by an addition.
2465
2466    Return true if the cheapest of these cost less than MULT_COST,
2467    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2468
2469 static bool
2470 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2471                      struct algorithm *alg, enum mult_variant *variant,
2472                      int mult_cost)
2473 {
2474   struct algorithm alg2;
2475
2476   *variant = basic_variant;
2477   synth_mult (alg, val, mult_cost, mode);
2478
2479   /* This works only if the inverted value actually fits in an
2480      `unsigned int' */
2481   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2482     {
2483       synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - neg_cost[mode],
2484                   mode);
2485       alg2.cost += neg_cost[mode];
2486       if (alg2.cost < alg->cost)
2487         *alg = alg2, *variant = negate_variant;
2488     }
2489
2490   /* This proves very useful for division-by-constant.  */
2491   synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost[mode],
2492               mode);
2493   alg2.cost += add_cost[mode];
2494   if (alg2.cost < alg->cost)
2495     *alg = alg2, *variant = add_variant;
2496
2497   return alg->cost < mult_cost;
2498 }
2499
2500 /* A subroutine of expand_mult, used for constant multiplications.
2501    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2502    convenient.  Use the shift/add sequence described by ALG and apply
2503    the final fixup specified by VARIANT.  */
2504
2505 static rtx
2506 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2507                    rtx target, const struct algorithm *alg,
2508                    enum mult_variant variant)
2509 {
2510   HOST_WIDE_INT val_so_far;
2511   rtx insn, accum, tem;
2512   int opno;
2513   enum machine_mode nmode;
2514
2515   /* Avoid referencing memory over and over.
2516      For speed, but also for correctness when mem is volatile.  */
2517   if (MEM_P (op0))
2518     op0 = force_reg (mode, op0);
2519
2520   /* ACCUM starts out either as OP0 or as a zero, depending on
2521      the first operation.  */
2522
2523   if (alg->op[0] == alg_zero)
2524     {
2525       accum = copy_to_mode_reg (mode, const0_rtx);
2526       val_so_far = 0;
2527     }
2528   else if (alg->op[0] == alg_m)
2529     {
2530       accum = copy_to_mode_reg (mode, op0);
2531       val_so_far = 1;
2532     }
2533   else
2534     abort ();
2535
2536   for (opno = 1; opno < alg->ops; opno++)
2537     {
2538       int log = alg->log[opno];
2539       int preserve = preserve_subexpressions_p ();
2540       rtx shift_subtarget = preserve ? 0 : accum;
2541       rtx add_target
2542         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2543            && ! preserve)
2544           ? target : 0;
2545       rtx accum_target = preserve ? 0 : accum;
2546
2547       switch (alg->op[opno])
2548         {
2549         case alg_shift:
2550           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2551                                 build_int_2 (log, 0), NULL_RTX, 0);
2552           val_so_far <<= log;
2553           break;
2554
2555         case alg_add_t_m2:
2556           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2557                               build_int_2 (log, 0), NULL_RTX, 0);
2558           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2559                                  add_target ? add_target : accum_target);
2560           val_so_far += (HOST_WIDE_INT) 1 << log;
2561           break;
2562
2563         case alg_sub_t_m2:
2564           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2565                               build_int_2 (log, 0), NULL_RTX, 0);
2566           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2567                                  add_target ? add_target : accum_target);
2568           val_so_far -= (HOST_WIDE_INT) 1 << log;
2569           break;
2570
2571         case alg_add_t2_m:
2572           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2573                                 build_int_2 (log, 0), shift_subtarget,
2574                                 0);
2575           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2576                                  add_target ? add_target : accum_target);
2577           val_so_far = (val_so_far << log) + 1;
2578           break;
2579
2580         case alg_sub_t2_m:
2581           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2582                                 build_int_2 (log, 0), shift_subtarget, 0);
2583           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2584                                  add_target ? add_target : accum_target);
2585           val_so_far = (val_so_far << log) - 1;
2586           break;
2587
2588         case alg_add_factor:
2589           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2590                               build_int_2 (log, 0), NULL_RTX, 0);
2591           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2592                                  add_target ? add_target : accum_target);
2593           val_so_far += val_so_far << log;
2594           break;
2595
2596         case alg_sub_factor:
2597           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2598                               build_int_2 (log, 0), NULL_RTX, 0);
2599           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2600                                  (add_target ? add_target
2601                                   : preserve ? 0 : tem));
2602           val_so_far = (val_so_far << log) - val_so_far;
2603           break;
2604
2605         default:
2606           abort ();
2607         }
2608
2609       /* Write a REG_EQUAL note on the last insn so that we can cse
2610          multiplication sequences.  Note that if ACCUM is a SUBREG,
2611          we've set the inner register and must properly indicate
2612          that.  */
2613
2614       tem = op0, nmode = mode;
2615       if (GET_CODE (accum) == SUBREG)
2616         {
2617           nmode = GET_MODE (SUBREG_REG (accum));
2618           tem = gen_lowpart (nmode, op0);
2619         }
2620
2621       insn = get_last_insn ();
2622       set_unique_reg_note (insn, REG_EQUAL,
2623                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
2624     }
2625
2626   if (variant == negate_variant)
2627     {
2628       val_so_far = -val_so_far;
2629       accum = expand_unop (mode, neg_optab, accum, target, 0);
2630     }
2631   else if (variant == add_variant)
2632     {
2633       val_so_far = val_so_far + 1;
2634       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2635     }
2636
2637   /* Compare only the bits of val and val_so_far that are significant
2638      in the result mode, to avoid sign-/zero-extension confusion.  */
2639   val &= GET_MODE_MASK (mode);
2640   val_so_far &= GET_MODE_MASK (mode);
2641   if (val != val_so_far)
2642     abort ();
2643
2644   return accum;
2645 }
2646
2647 /* Perform a multiplication and return an rtx for the result.
2648    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2649    TARGET is a suggestion for where to store the result (an rtx).
2650
2651    We check specially for a constant integer as OP1.
2652    If you want this check for OP0 as well, then before calling
2653    you should swap the two operands if OP0 would be constant.  */
2654
2655 rtx
2656 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2657              int unsignedp)
2658 {
2659   rtx const_op1 = op1;
2660   enum mult_variant variant;
2661   struct algorithm algorithm;
2662
2663   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2664      less than or equal in size to `unsigned int' this doesn't matter.
2665      If the mode is larger than `unsigned int', then synth_mult works only
2666      if the constant value exactly fits in an `unsigned int' without any
2667      truncation.  This means that multiplying by negative values does
2668      not work; results are off by 2^32 on a 32 bit machine.  */
2669
2670   /* If we are multiplying in DImode, it may still be a win
2671      to try to work with shifts and adds.  */
2672   if (GET_CODE (op1) == CONST_DOUBLE
2673       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2674       && HOST_BITS_PER_INT >= BITS_PER_WORD
2675       && CONST_DOUBLE_HIGH (op1) == 0)
2676     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2677   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2678            && GET_CODE (op1) == CONST_INT
2679            && INTVAL (op1) < 0)
2680     const_op1 = 0;
2681
2682   /* We used to test optimize here, on the grounds that it's better to
2683      produce a smaller program when -O is not used.
2684      But this causes such a terrible slowdown sometimes
2685      that it seems better to use synth_mult always.  */
2686
2687   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2688       && (unsignedp || !flag_trapv))
2689     {
2690       int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2691
2692       if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
2693                                mult_cost))
2694         return expand_mult_const (mode, op0, INTVAL (const_op1), target,
2695                                   &algorithm, variant);
2696     }
2697
2698   if (GET_CODE (op0) == CONST_DOUBLE)
2699     {
2700       rtx temp = op0;
2701       op0 = op1;
2702       op1 = temp;
2703     }
2704
2705   /* Expand x*2.0 as x+x.  */
2706   if (GET_CODE (op1) == CONST_DOUBLE
2707       && GET_MODE_CLASS (mode) == MODE_FLOAT)
2708     {
2709       REAL_VALUE_TYPE d;
2710       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
2711
2712       if (REAL_VALUES_EQUAL (d, dconst2))
2713         {
2714           op0 = force_reg (GET_MODE (op0), op0);
2715           return expand_binop (mode, add_optab, op0, op0,
2716                                target, unsignedp, OPTAB_LIB_WIDEN);
2717         }
2718     }
2719
2720   /* This used to use umul_optab if unsigned, but for non-widening multiply
2721      there is no difference between signed and unsigned.  */
2722   op0 = expand_binop (mode,
2723                       ! unsignedp
2724                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2725                       ? smulv_optab : smul_optab,
2726                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2727   if (op0 == 0)
2728     abort ();
2729   return op0;
2730 }
2731 \f
2732 /* Return the smallest n such that 2**n >= X.  */
2733
2734 int
2735 ceil_log2 (unsigned HOST_WIDE_INT x)
2736 {
2737   return floor_log2 (x - 1) + 1;
2738 }
2739
2740 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2741    replace division by D, and put the least significant N bits of the result
2742    in *MULTIPLIER_PTR and return the most significant bit.
2743
2744    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2745    needed precision is in PRECISION (should be <= N).
2746
2747    PRECISION should be as small as possible so this function can choose
2748    multiplier more freely.
2749
2750    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2751    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2752
2753    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2754    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2755
2756 static
2757 unsigned HOST_WIDE_INT
2758 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
2759                    unsigned HOST_WIDE_INT *multiplier_ptr,
2760                    int *post_shift_ptr, int *lgup_ptr)
2761 {
2762   HOST_WIDE_INT mhigh_hi, mlow_hi;
2763   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2764   int lgup, post_shift;
2765   int pow, pow2;
2766   unsigned HOST_WIDE_INT nl, dummy1;
2767   HOST_WIDE_INT nh, dummy2;
2768
2769   /* lgup = ceil(log2(divisor)); */
2770   lgup = ceil_log2 (d);
2771
2772   if (lgup > n)
2773     abort ();
2774
2775   pow = n + lgup;
2776   pow2 = n + lgup - precision;
2777
2778   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2779     {
2780       /* We could handle this with some effort, but this case is much better
2781          handled directly with a scc insn, so rely on caller using that.  */
2782       abort ();
2783     }
2784
2785   /* mlow = 2^(N + lgup)/d */
2786  if (pow >= HOST_BITS_PER_WIDE_INT)
2787     {
2788       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2789       nl = 0;
2790     }
2791   else
2792     {
2793       nh = 0;
2794       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2795     }
2796   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2797                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2798
2799   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2800   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2801     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2802   else
2803     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2804   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2805                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2806
2807   if (mhigh_hi && nh - d >= d)
2808     abort ();
2809   if (mhigh_hi > 1 || mlow_hi > 1)
2810     abort ();
2811   /* Assert that mlow < mhigh.  */
2812   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2813     abort ();
2814
2815   /* If precision == N, then mlow, mhigh exceed 2^N
2816      (but they do not exceed 2^(N+1)).  */
2817
2818   /* Reduce to lowest terms.  */
2819   for (post_shift = lgup; post_shift > 0; post_shift--)
2820     {
2821       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2822       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2823       if (ml_lo >= mh_lo)
2824         break;
2825
2826       mlow_hi = 0;
2827       mlow_lo = ml_lo;
2828       mhigh_hi = 0;
2829       mhigh_lo = mh_lo;
2830     }
2831
2832   *post_shift_ptr = post_shift;
2833   *lgup_ptr = lgup;
2834   if (n < HOST_BITS_PER_WIDE_INT)
2835     {
2836       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2837       *multiplier_ptr = mhigh_lo & mask;
2838       return mhigh_lo >= mask;
2839     }
2840   else
2841     {
2842       *multiplier_ptr = mhigh_lo;
2843       return mhigh_hi;
2844     }
2845 }
2846
2847 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2848    congruent to 1 (mod 2**N).  */
2849
2850 static unsigned HOST_WIDE_INT
2851 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
2852 {
2853   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2854
2855   /* The algorithm notes that the choice y = x satisfies
2856      x*y == 1 mod 2^3, since x is assumed odd.
2857      Each iteration doubles the number of bits of significance in y.  */
2858
2859   unsigned HOST_WIDE_INT mask;
2860   unsigned HOST_WIDE_INT y = x;
2861   int nbit = 3;
2862
2863   mask = (n == HOST_BITS_PER_WIDE_INT
2864           ? ~(unsigned HOST_WIDE_INT) 0
2865           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2866
2867   while (nbit < n)
2868     {
2869       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2870       nbit *= 2;
2871     }
2872   return y;
2873 }
2874
2875 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2876    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2877    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2878    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2879    become signed.
2880
2881    The result is put in TARGET if that is convenient.
2882
2883    MODE is the mode of operation.  */
2884
2885 rtx
2886 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
2887                              rtx op1, rtx target, int unsignedp)
2888 {
2889   rtx tem;
2890   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2891
2892   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2893                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2894                       NULL_RTX, 0);
2895   tem = expand_and (mode, tem, op1, NULL_RTX);
2896   adj_operand
2897     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2898                      adj_operand);
2899
2900   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2901                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2902                       NULL_RTX, 0);
2903   tem = expand_and (mode, tem, op0, NULL_RTX);
2904   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2905                           target);
2906
2907   return target;
2908 }
2909
2910 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
2911
2912 static rtx
2913 extract_high_half (enum machine_mode mode, rtx op)
2914 {
2915   enum machine_mode wider_mode;
2916
2917   if (mode == word_mode)
2918     return gen_highpart (mode, op);
2919
2920   wider_mode = GET_MODE_WIDER_MODE (mode);
2921   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
2922                      build_int_2 (GET_MODE_BITSIZE (mode), 0), 0, 1);
2923   return convert_modes (mode, wider_mode, op, 0);
2924 }
2925
2926 /* Like expand_mult_highpart, but only consider using a multiplication
2927    optab.  OP1 is an rtx for the constant operand.  */
2928
2929 static rtx
2930 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
2931                             rtx target, int unsignedp, int max_cost)
2932 {
2933   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
2934   enum machine_mode wider_mode;
2935   optab moptab;
2936   rtx tem;
2937   int size;
2938
2939   wider_mode = GET_MODE_WIDER_MODE (mode);
2940   size = GET_MODE_BITSIZE (mode);
2941
2942   /* Firstly, try using a multiplication insn that only generates the needed
2943      high part of the product, and in the sign flavor of unsignedp.  */
2944   if (mul_highpart_cost[mode] < max_cost)
2945     {
2946       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2947       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
2948                           unsignedp, OPTAB_DIRECT);
2949       if (tem)
2950         return tem;
2951     }
2952
2953   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2954      Need to adjust the result after the multiplication.  */
2955   if (size - 1 < BITS_PER_WORD
2956       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
2957           + 4 * add_cost[mode] < max_cost))
2958     {
2959       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2960       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
2961                           unsignedp, OPTAB_DIRECT);
2962       if (tem)
2963         /* We used the wrong signedness.  Adjust the result.  */
2964         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
2965                                             tem, unsignedp);
2966     }
2967
2968   /* Try widening multiplication.  */
2969   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2970   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2971       && mul_widen_cost[wider_mode] < max_cost)
2972     {
2973       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
2974                           unsignedp, OPTAB_WIDEN);
2975       if (tem)
2976         return extract_high_half (mode, tem);
2977     }
2978
2979   /* Try widening the mode and perform a non-widening multiplication.  */
2980   moptab = smul_optab;
2981   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2982       && size - 1 < BITS_PER_WORD
2983       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
2984     {
2985       tem = expand_binop (wider_mode, moptab, op0, op1, 0,
2986                           unsignedp, OPTAB_WIDEN);
2987       if (tem)
2988         return extract_high_half (mode, tem);
2989     }
2990
2991   /* Try widening multiplication of opposite signedness, and adjust.  */
2992   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2993   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2994       && size - 1 < BITS_PER_WORD
2995       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
2996           + 4 * add_cost[mode] < max_cost))
2997     {
2998       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
2999                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3000       if (tem != 0)
3001         {
3002           tem = extract_high_half (mode, tem);
3003           /* We used the wrong signedness.  Adjust the result.  */
3004           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3005                                               target, unsignedp);
3006         }
3007     }
3008
3009   return 0;
3010 }
3011
3012 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
3013    in TARGET if that is convenient, and return where the result is.  If the
3014    operation can not be performed, 0 is returned.
3015
3016    MODE is the mode of operation and result.
3017
3018    UNSIGNEDP nonzero means unsigned multiply.
3019
3020    MAX_COST is the total allowed cost for the expanded RTL.  */
3021
3022 rtx
3023 expand_mult_highpart (enum machine_mode mode, rtx op0,
3024                       unsigned HOST_WIDE_INT cnst1, rtx target,
3025                       int unsignedp, int max_cost)
3026 {
3027   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3028   int extra_cost;
3029   bool sign_adjust = false;
3030   enum mult_variant variant;
3031   struct algorithm alg;
3032   rtx op1, tem;
3033
3034   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3035   if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3036     abort ();
3037
3038   op1 = gen_int_mode (cnst1, wider_mode);
3039   cnst1 &= GET_MODE_MASK (mode);
3040
3041   /* We can't optimize modes wider than BITS_PER_WORD.
3042      ??? We might be able to perform double-word arithmetic if
3043      mode == word_mode, however all the cost calculations in
3044      synth_mult etc. assume single-word operations.  */
3045   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3046     return expand_mult_highpart_optab (mode, op0, op1, target,
3047                                        unsignedp, max_cost);
3048
3049   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3050
3051   /* Check whether we try to multiply by a negative constant.  */
3052   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3053     {
3054       sign_adjust = true;
3055       extra_cost += add_cost[mode];
3056     }
3057
3058   /* See whether shift/add multiplication is cheap enough.  */
3059   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3060                            max_cost - extra_cost))
3061     {
3062       /* See whether the specialized multiplication optabs are
3063          cheaper than the shift/add version.  */
3064       tem = expand_mult_highpart_optab (mode, op0, op1, target,
3065                                         unsignedp, alg.cost + extra_cost);
3066       if (tem)
3067         return tem;
3068
3069       tem = convert_to_mode (wider_mode, op0, unsignedp);
3070       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3071       tem = extract_high_half (mode, tem);
3072
3073       /* Adjust result for signedness.  */
3074       if (sign_adjust)
3075         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3076
3077       return tem;
3078     }
3079   return expand_mult_highpart_optab (mode, op0, op1, target,
3080                                      unsignedp, max_cost);
3081 }
3082
3083
3084 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3085
3086 static rtx
3087 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3088 {
3089   unsigned HOST_WIDE_INT mask;
3090   rtx result, temp, shift, label;
3091   int logd;
3092
3093   logd = floor_log2 (d);
3094   result = gen_reg_rtx (mode);
3095
3096   /* Avoid conditional branches when they're expensive.  */
3097   if (BRANCH_COST >= 2
3098       && !optimize_size)
3099     {
3100       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3101                                       mode, 0, -1);
3102       if (signmask)
3103         {
3104           signmask = force_reg (mode, signmask);
3105           mask = ((HOST_WIDE_INT) 1 << logd) - 1;
3106           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3107
3108           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3109              which instruction sequence to use.  If logical right shifts
3110              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3111              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3112
3113           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3114           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3115               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3116             {
3117               temp = expand_binop (mode, xor_optab, op0, signmask,
3118                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3119               temp = expand_binop (mode, sub_optab, temp, signmask,
3120                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3121               temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3122                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3123               temp = expand_binop (mode, xor_optab, temp, signmask,
3124                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3125               temp = expand_binop (mode, sub_optab, temp, signmask,
3126                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3127             }
3128           else
3129             {
3130               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3131                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3132               signmask = force_reg (mode, signmask);
3133
3134               temp = expand_binop (mode, add_optab, op0, signmask,
3135                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3136               temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3137                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3138               temp = expand_binop (mode, sub_optab, temp, signmask,
3139                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3140             }
3141           return temp;
3142         }
3143     }
3144
3145   /* Mask contains the mode's signbit and the significant bits of the
3146      modulus.  By including the signbit in the operation, many targets
3147      can avoid an explicit compare operation in the following comparison
3148      against zero.  */
3149
3150   mask = (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1)
3151          | (((HOST_WIDE_INT) 1 << logd) - 1);
3152
3153   temp = expand_binop (mode, and_optab, op0, GEN_INT (mask), result,
3154                        1, OPTAB_LIB_WIDEN);
3155   if (temp != result)
3156     emit_move_insn (result, temp);
3157
3158   label = gen_label_rtx ();
3159   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3160
3161   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3162                        0, OPTAB_LIB_WIDEN);
3163   mask = (HOST_WIDE_INT) -1 << logd;
3164   temp = expand_binop (mode, ior_optab, temp, GEN_INT (mask), result,
3165                        1, OPTAB_LIB_WIDEN);
3166   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3167                        0, OPTAB_LIB_WIDEN);
3168   if (temp != result)
3169     emit_move_insn (result, temp);
3170   emit_label (label);
3171   return result;
3172 }
3173 \f
3174 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3175    if that is convenient, and returning where the result is.
3176    You may request either the quotient or the remainder as the result;
3177    specify REM_FLAG nonzero to get the remainder.
3178
3179    CODE is the expression code for which kind of division this is;
3180    it controls how rounding is done.  MODE is the machine mode to use.
3181    UNSIGNEDP nonzero means do unsigned division.  */
3182
3183 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3184    and then correct it by or'ing in missing high bits
3185    if result of ANDI is nonzero.
3186    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3187    This could optimize to a bfexts instruction.
3188    But C doesn't use these operations, so their optimizations are
3189    left for later.  */
3190 /* ??? For modulo, we don't actually need the highpart of the first product,
3191    the low part will do nicely.  And for small divisors, the second multiply
3192    can also be a low-part only multiply or even be completely left out.
3193    E.g. to calculate the remainder of a division by 3 with a 32 bit
3194    multiply, multiply with 0x55555556 and extract the upper two bits;
3195    the result is exact for inputs up to 0x1fffffff.
3196    The input range can be reduced by using cross-sum rules.
3197    For odd divisors >= 3, the following table gives right shift counts
3198    so that if a number is shifted by an integer multiple of the given
3199    amount, the remainder stays the same:
3200    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3201    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3202    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3203    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3204    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3205
3206    Cross-sum rules for even numbers can be derived by leaving as many bits
3207    to the right alone as the divisor has zeros to the right.
3208    E.g. if x is an unsigned 32 bit number:
3209    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3210    */
3211
3212 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
3213
3214 rtx
3215 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3216                rtx op0, rtx op1, rtx target, int unsignedp)
3217 {
3218   enum machine_mode compute_mode;
3219   rtx tquotient;
3220   rtx quotient = 0, remainder = 0;
3221   rtx last;
3222   int size;
3223   rtx insn, set;
3224   optab optab1, optab2;
3225   int op1_is_constant, op1_is_pow2 = 0;
3226   int max_cost, extra_cost;
3227   static HOST_WIDE_INT last_div_const = 0;
3228   static HOST_WIDE_INT ext_op1;
3229
3230   op1_is_constant = GET_CODE (op1) == CONST_INT;
3231   if (op1_is_constant)
3232     {
3233       ext_op1 = INTVAL (op1);
3234       if (unsignedp)
3235         ext_op1 &= GET_MODE_MASK (mode);
3236       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3237                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3238     }
3239
3240   /*
3241      This is the structure of expand_divmod:
3242
3243      First comes code to fix up the operands so we can perform the operations
3244      correctly and efficiently.
3245
3246      Second comes a switch statement with code specific for each rounding mode.
3247      For some special operands this code emits all RTL for the desired
3248      operation, for other cases, it generates only a quotient and stores it in
3249      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3250      to indicate that it has not done anything.
3251
3252      Last comes code that finishes the operation.  If QUOTIENT is set and
3253      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3254      QUOTIENT is not set, it is computed using trunc rounding.
3255
3256      We try to generate special code for division and remainder when OP1 is a
3257      constant.  If |OP1| = 2**n we can use shifts and some other fast
3258      operations.  For other values of OP1, we compute a carefully selected
3259      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3260      by m.
3261
3262      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3263      half of the product.  Different strategies for generating the product are
3264      implemented in expand_mult_highpart.
3265
3266      If what we actually want is the remainder, we generate that by another
3267      by-constant multiplication and a subtraction.  */
3268
3269   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3270      code below will malfunction if we are, so check here and handle
3271      the special case if so.  */
3272   if (op1 == const1_rtx)
3273     return rem_flag ? const0_rtx : op0;
3274
3275     /* When dividing by -1, we could get an overflow.
3276      negv_optab can handle overflows.  */
3277   if (! unsignedp && op1 == constm1_rtx)
3278     {
3279       if (rem_flag)
3280         return const0_rtx;
3281       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3282                           ? negv_optab : neg_optab, op0, target, 0);
3283     }
3284
3285   if (target
3286       /* Don't use the function value register as a target
3287          since we have to read it as well as write it,
3288          and function-inlining gets confused by this.  */
3289       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3290           /* Don't clobber an operand while doing a multi-step calculation.  */
3291           || ((rem_flag || op1_is_constant)
3292               && (reg_mentioned_p (target, op0)
3293                   || (MEM_P (op0) && MEM_P (target))))
3294           || reg_mentioned_p (target, op1)
3295           || (MEM_P (op1) && MEM_P (target))))
3296     target = 0;
3297
3298   /* Get the mode in which to perform this computation.  Normally it will
3299      be MODE, but sometimes we can't do the desired operation in MODE.
3300      If so, pick a wider mode in which we can do the operation.  Convert
3301      to that mode at the start to avoid repeated conversions.
3302
3303      First see what operations we need.  These depend on the expression
3304      we are evaluating.  (We assume that divxx3 insns exist under the
3305      same conditions that modxx3 insns and that these insns don't normally
3306      fail.  If these assumptions are not correct, we may generate less
3307      efficient code in some cases.)
3308
3309      Then see if we find a mode in which we can open-code that operation
3310      (either a division, modulus, or shift).  Finally, check for the smallest
3311      mode for which we can do the operation with a library call.  */
3312
3313   /* We might want to refine this now that we have division-by-constant
3314      optimization.  Since expand_mult_highpart tries so many variants, it is
3315      not straightforward to generalize this.  Maybe we should make an array
3316      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3317
3318   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3319             ? (unsignedp ? lshr_optab : ashr_optab)
3320             : (unsignedp ? udiv_optab : sdiv_optab));
3321   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3322             ? optab1
3323             : (unsignedp ? udivmod_optab : sdivmod_optab));
3324
3325   for (compute_mode = mode; compute_mode != VOIDmode;
3326        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3327     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3328         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3329       break;
3330
3331   if (compute_mode == VOIDmode)
3332     for (compute_mode = mode; compute_mode != VOIDmode;
3333          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3334       if (optab1->handlers[compute_mode].libfunc
3335           || optab2->handlers[compute_mode].libfunc)
3336         break;
3337
3338   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3339      in expand_binop.  */
3340   if (compute_mode == VOIDmode)
3341     compute_mode = mode;
3342
3343   if (target && GET_MODE (target) == compute_mode)
3344     tquotient = target;
3345   else
3346     tquotient = gen_reg_rtx (compute_mode);
3347
3348   size = GET_MODE_BITSIZE (compute_mode);
3349 #if 0
3350   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3351      (mode), and thereby get better code when OP1 is a constant.  Do that
3352      later.  It will require going over all usages of SIZE below.  */
3353   size = GET_MODE_BITSIZE (mode);
3354 #endif
3355
3356   /* Only deduct something for a REM if the last divide done was
3357      for a different constant.   Then set the constant of the last
3358      divide.  */
3359   max_cost = div_cost[compute_mode]
3360     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3361                       && INTVAL (op1) == last_div_const)
3362        ? mul_cost[compute_mode] + add_cost[compute_mode]
3363        : 0);
3364
3365   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3366
3367   /* Now convert to the best mode to use.  */
3368   if (compute_mode != mode)
3369     {
3370       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3371       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3372
3373       /* convert_modes may have placed op1 into a register, so we
3374          must recompute the following.  */
3375       op1_is_constant = GET_CODE (op1) == CONST_INT;
3376       op1_is_pow2 = (op1_is_constant
3377                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3378                           || (! unsignedp
3379                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3380     }
3381
3382   /* If one of the operands is a volatile MEM, copy it into a register.  */
3383
3384   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3385     op0 = force_reg (compute_mode, op0);
3386   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3387     op1 = force_reg (compute_mode, op1);
3388
3389   /* If we need the remainder or if OP1 is constant, we need to
3390      put OP0 in a register in case it has any queued subexpressions.  */
3391   if (rem_flag || op1_is_constant)
3392     op0 = force_reg (compute_mode, op0);
3393
3394   last = get_last_insn ();
3395
3396   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3397   if (unsignedp)
3398     {
3399       if (code == FLOOR_DIV_EXPR)
3400         code = TRUNC_DIV_EXPR;
3401       if (code == FLOOR_MOD_EXPR)
3402         code = TRUNC_MOD_EXPR;
3403       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3404         code = TRUNC_DIV_EXPR;
3405     }
3406
3407   if (op1 != const0_rtx)
3408     switch (code)
3409       {
3410       case TRUNC_MOD_EXPR:
3411       case TRUNC_DIV_EXPR:
3412         if (op1_is_constant)
3413           {
3414             if (unsignedp)
3415               {
3416                 unsigned HOST_WIDE_INT mh, ml;
3417                 int pre_shift, post_shift;
3418                 int dummy;
3419                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3420                                             & GET_MODE_MASK (compute_mode));
3421
3422                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3423                   {
3424                     pre_shift = floor_log2 (d);
3425                     if (rem_flag)
3426                       {
3427                         remainder
3428                           = expand_binop (compute_mode, and_optab, op0,
3429                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3430                                           remainder, 1,
3431                                           OPTAB_LIB_WIDEN);
3432                         if (remainder)
3433                           return gen_lowpart (mode, remainder);
3434                       }
3435                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3436                                              build_int_2 (pre_shift, 0),
3437                                              tquotient, 1);
3438                   }
3439                 else if (size <= HOST_BITS_PER_WIDE_INT)
3440                   {
3441                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3442                       {
3443                         /* Most significant bit of divisor is set; emit an scc
3444                            insn.  */
3445                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3446                                                     compute_mode, 1, 1);
3447                         if (quotient == 0)
3448                           goto fail1;
3449                       }
3450                     else
3451                       {
3452                         /* Find a suitable multiplier and right shift count
3453                            instead of multiplying with D.  */
3454
3455                         mh = choose_multiplier (d, size, size,
3456                                                 &ml, &post_shift, &dummy);
3457
3458                         /* If the suggested multiplier is more than SIZE bits,
3459                            we can do better for even divisors, using an
3460                            initial right shift.  */
3461                         if (mh != 0 && (d & 1) == 0)
3462                           {
3463                             pre_shift = floor_log2 (d & -d);
3464                             mh = choose_multiplier (d >> pre_shift, size,
3465                                                     size - pre_shift,
3466                                                     &ml, &post_shift, &dummy);
3467                             if (mh)
3468                               abort ();
3469                           }
3470                         else
3471                           pre_shift = 0;
3472
3473                         if (mh != 0)
3474                           {
3475                             rtx t1, t2, t3, t4;
3476
3477                             if (post_shift - 1 >= BITS_PER_WORD)
3478                               goto fail1;
3479
3480                             extra_cost
3481                               = (shift_cost[compute_mode][post_shift - 1]
3482                                  + shift_cost[compute_mode][1]
3483                                  + 2 * add_cost[compute_mode]);
3484                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3485                                                        NULL_RTX, 1,
3486                                                        max_cost - extra_cost);
3487                             if (t1 == 0)
3488                               goto fail1;
3489                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3490                                                                op0, t1),
3491                                                 NULL_RTX);
3492                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3493                                                build_int_2 (1, 0), NULL_RTX,1);
3494                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3495                                                               t1, t3),
3496                                                 NULL_RTX);
3497                             quotient
3498                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3499                                               build_int_2 (post_shift - 1, 0),
3500                                               tquotient, 1);
3501                           }
3502                         else
3503                           {
3504                             rtx t1, t2;
3505
3506                             if (pre_shift >= BITS_PER_WORD
3507                                 || post_shift >= BITS_PER_WORD)
3508                               goto fail1;
3509
3510                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3511                                                build_int_2 (pre_shift, 0),
3512                                                NULL_RTX, 1);
3513                             extra_cost
3514                               = (shift_cost[compute_mode][pre_shift]
3515                                  + shift_cost[compute_mode][post_shift]);
3516                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3517                                                        NULL_RTX, 1,
3518                                                        max_cost - extra_cost);
3519                             if (t2 == 0)
3520                               goto fail1;
3521                             quotient
3522                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3523                                               build_int_2 (post_shift, 0),
3524                                               tquotient, 1);
3525                           }
3526                       }
3527                   }
3528                 else            /* Too wide mode to use tricky code */
3529                   break;
3530
3531                 insn = get_last_insn ();
3532                 if (insn != last
3533                     && (set = single_set (insn)) != 0
3534                     && SET_DEST (set) == quotient)
3535                   set_unique_reg_note (insn,
3536                                        REG_EQUAL,
3537                                        gen_rtx_UDIV (compute_mode, op0, op1));
3538               }
3539             else                /* TRUNC_DIV, signed */
3540               {
3541                 unsigned HOST_WIDE_INT ml;
3542                 int lgup, post_shift;
3543                 HOST_WIDE_INT d = INTVAL (op1);
3544                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3545
3546                 /* n rem d = n rem -d */
3547                 if (rem_flag && d < 0)
3548                   {
3549                     d = abs_d;
3550                     op1 = gen_int_mode (abs_d, compute_mode);
3551                   }
3552
3553                 if (d == 1)
3554                   quotient = op0;
3555                 else if (d == -1)
3556                   quotient = expand_unop (compute_mode, neg_optab, op0,
3557                                           tquotient, 0);
3558                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3559                   {
3560                     /* This case is not handled correctly below.  */
3561                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3562                                                 compute_mode, 1, 1);
3563                     if (quotient == 0)
3564                       goto fail1;
3565                   }
3566                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3567                          && (rem_flag ? smod_pow2_cheap[compute_mode]
3568                                       : sdiv_pow2_cheap[compute_mode])
3569                          /* We assume that cheap metric is true if the
3570                             optab has an expander for this mode.  */
3571                          && (((rem_flag ? smod_optab : sdiv_optab)
3572                               ->handlers[compute_mode].insn_code
3573                               != CODE_FOR_nothing)
3574                              || (sdivmod_optab->handlers[compute_mode]
3575                                  .insn_code != CODE_FOR_nothing)))
3576                   ;
3577                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3578                   {
3579                     if (rem_flag)
3580                       {
3581                         remainder = expand_smod_pow2 (compute_mode, op0, d);
3582                         if (remainder)
3583                           return gen_lowpart (mode, remainder);
3584                       }
3585                     lgup = floor_log2 (abs_d);
3586                     if (BRANCH_COST < 1 || (abs_d != 2 && BRANCH_COST < 3))
3587                       {
3588                         rtx label = gen_label_rtx ();
3589                         rtx t1;
3590
3591                         t1 = copy_to_mode_reg (compute_mode, op0);
3592                         do_cmp_and_jump (t1, const0_rtx, GE,
3593                                          compute_mode, label);
3594                         expand_inc (t1, gen_int_mode (abs_d - 1,
3595                                                       compute_mode));
3596                         emit_label (label);
3597                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3598                                                  build_int_2 (lgup, 0),
3599                                                  tquotient, 0);
3600                       }
3601                     else
3602                       {
3603                         rtx t1, t2, t3;
3604                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3605                                            build_int_2 (size - 1, 0),
3606                                            NULL_RTX, 0);
3607                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3608                                            build_int_2 (size - lgup, 0),
3609                                            NULL_RTX, 1);
3610                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3611                                                           op0, t2),
3612                                             NULL_RTX);
3613                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3614                                                  build_int_2 (lgup, 0),
3615                                                  tquotient, 0);
3616                       }
3617
3618                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
3619                        negate the quotient.  */
3620                     if (d < 0)
3621                       {
3622                         insn = get_last_insn ();
3623                         if (insn != last
3624                             && (set = single_set (insn)) != 0
3625                             && SET_DEST (set) == quotient
3626                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3627                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3628                           set_unique_reg_note (insn,
3629                                                REG_EQUAL,
3630                                                gen_rtx_DIV (compute_mode,
3631                                                             op0,
3632                                                             GEN_INT
3633                                                             (trunc_int_for_mode
3634                                                              (abs_d,
3635                                                               compute_mode))));
3636
3637                         quotient = expand_unop (compute_mode, neg_optab,
3638                                                 quotient, quotient, 0);
3639                       }
3640                   }
3641                 else if (size <= HOST_BITS_PER_WIDE_INT)
3642                   {
3643                     choose_multiplier (abs_d, size, size - 1,
3644                                        &ml, &post_shift, &lgup);
3645                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3646                       {
3647                         rtx t1, t2, t3;
3648
3649                         if (post_shift >= BITS_PER_WORD
3650                             || size - 1 >= BITS_PER_WORD)
3651                           goto fail1;
3652
3653                         extra_cost = (shift_cost[compute_mode][post_shift]
3654                                       + shift_cost[compute_mode][size - 1]
3655                                       + add_cost[compute_mode]);
3656                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3657                                                    NULL_RTX, 0,
3658                                                    max_cost - extra_cost);
3659                         if (t1 == 0)
3660                           goto fail1;
3661                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3662                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3663                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3664                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3665                         if (d < 0)
3666                           quotient
3667                             = force_operand (gen_rtx_MINUS (compute_mode,
3668                                                             t3, t2),
3669                                              tquotient);
3670                         else
3671                           quotient
3672                             = force_operand (gen_rtx_MINUS (compute_mode,
3673                                                             t2, t3),
3674                                              tquotient);
3675                       }
3676                     else
3677                       {
3678                         rtx t1, t2, t3, t4;
3679
3680                         if (post_shift >= BITS_PER_WORD
3681                             || size - 1 >= BITS_PER_WORD)
3682                           goto fail1;
3683
3684                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3685                         extra_cost = (shift_cost[compute_mode][post_shift]
3686                                       + shift_cost[compute_mode][size - 1]
3687                                       + 2 * add_cost[compute_mode]);
3688                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3689                                                    NULL_RTX, 0,
3690                                                    max_cost - extra_cost);
3691                         if (t1 == 0)
3692                           goto fail1;
3693                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3694                                                           t1, op0),
3695                                             NULL_RTX);
3696                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3697                                            build_int_2 (post_shift, 0),
3698                                            NULL_RTX, 0);
3699                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3700                                            build_int_2 (size - 1, 0),
3701                                            NULL_RTX, 0);
3702                         if (d < 0)
3703                           quotient
3704                             = force_operand (gen_rtx_MINUS (compute_mode,
3705                                                             t4, t3),
3706                                              tquotient);
3707                         else
3708                           quotient
3709                             = force_operand (gen_rtx_MINUS (compute_mode,
3710                                                             t3, t4),
3711                                              tquotient);
3712                       }
3713                   }
3714                 else            /* Too wide mode to use tricky code */
3715                   break;
3716
3717                 insn = get_last_insn ();
3718                 if (insn != last
3719                     && (set = single_set (insn)) != 0
3720                     && SET_DEST (set) == quotient)
3721                   set_unique_reg_note (insn,
3722                                        REG_EQUAL,
3723                                        gen_rtx_DIV (compute_mode, op0, op1));
3724               }
3725             break;
3726           }
3727       fail1:
3728         delete_insns_since (last);
3729         break;
3730
3731       case FLOOR_DIV_EXPR:
3732       case FLOOR_MOD_EXPR:
3733       /* We will come here only for signed operations.  */
3734         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3735           {
3736             unsigned HOST_WIDE_INT mh, ml;
3737             int pre_shift, lgup, post_shift;
3738             HOST_WIDE_INT d = INTVAL (op1);
3739
3740             if (d > 0)
3741               {
3742                 /* We could just as easily deal with negative constants here,
3743                    but it does not seem worth the trouble for GCC 2.6.  */
3744                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3745                   {
3746                     pre_shift = floor_log2 (d);
3747                     if (rem_flag)
3748                       {
3749                         remainder = expand_binop (compute_mode, and_optab, op0,
3750                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3751                                                   remainder, 0, OPTAB_LIB_WIDEN);
3752                         if (remainder)
3753                           return gen_lowpart (mode, remainder);
3754                       }
3755                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3756                                              build_int_2 (pre_shift, 0),
3757                                              tquotient, 0);
3758                   }
3759                 else
3760                   {
3761                     rtx t1, t2, t3, t4;
3762
3763                     mh = choose_multiplier (d, size, size - 1,
3764                                             &ml, &post_shift, &lgup);
3765                     if (mh)
3766                       abort ();
3767
3768                     if (post_shift < BITS_PER_WORD
3769                         && size - 1 < BITS_PER_WORD)
3770                       {
3771                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3772                                            build_int_2 (size - 1, 0),
3773                                            NULL_RTX, 0);
3774                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3775                                            NULL_RTX, 0, OPTAB_WIDEN);
3776                         extra_cost = (shift_cost[compute_mode][post_shift]
3777                                       + shift_cost[compute_mode][size - 1]
3778                                       + 2 * add_cost[compute_mode]);
3779                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3780                                                    NULL_RTX, 1,
3781                                                    max_cost - extra_cost);
3782                         if (t3 != 0)
3783                           {
3784                             t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3785                                                build_int_2 (post_shift, 0),
3786                                                NULL_RTX, 1);
3787                             quotient = expand_binop (compute_mode, xor_optab,
3788                                                      t4, t1, tquotient, 0,
3789                                                      OPTAB_WIDEN);
3790                           }
3791                       }
3792                   }
3793               }
3794             else
3795               {
3796                 rtx nsign, t1, t2, t3, t4;
3797                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3798                                                   op0, constm1_rtx), NULL_RTX);
3799                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3800                                    0, OPTAB_WIDEN);
3801                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3802                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3803                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3804                                     NULL_RTX);
3805                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3806                                     NULL_RTX, 0);
3807                 if (t4)
3808                   {
3809                     rtx t5;
3810                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3811                                       NULL_RTX, 0);
3812                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3813                                                             t4, t5),
3814                                               tquotient);
3815                   }
3816               }
3817           }
3818
3819         if (quotient != 0)
3820           break;
3821         delete_insns_since (last);
3822
3823         /* Try using an instruction that produces both the quotient and
3824            remainder, using truncation.  We can easily compensate the quotient
3825            or remainder to get floor rounding, once we have the remainder.
3826            Notice that we compute also the final remainder value here,
3827            and return the result right away.  */
3828         if (target == 0 || GET_MODE (target) != compute_mode)
3829           target = gen_reg_rtx (compute_mode);
3830
3831         if (rem_flag)
3832           {
3833             remainder
3834               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
3835             quotient = gen_reg_rtx (compute_mode);
3836           }
3837         else
3838           {
3839             quotient
3840               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
3841             remainder = gen_reg_rtx (compute_mode);
3842           }
3843
3844         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3845                                  quotient, remainder, 0))
3846           {
3847             /* This could be computed with a branch-less sequence.
3848                Save that for later.  */
3849             rtx tem;
3850             rtx label = gen_label_rtx ();
3851             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3852             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3853                                 NULL_RTX, 0, OPTAB_WIDEN);
3854             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3855             expand_dec (quotient, const1_rtx);
3856             expand_inc (remainder, op1);
3857             emit_label (label);
3858             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3859           }
3860
3861         /* No luck with division elimination or divmod.  Have to do it
3862            by conditionally adjusting op0 *and* the result.  */
3863         {
3864           rtx label1, label2, label3, label4, label5;
3865           rtx adjusted_op0;
3866           rtx tem;
3867
3868           quotient = gen_reg_rtx (compute_mode);
3869           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3870           label1 = gen_label_rtx ();
3871           label2 = gen_label_rtx ();
3872           label3 = gen_label_rtx ();
3873           label4 = gen_label_rtx ();
3874           label5 = gen_label_rtx ();
3875           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3876           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3877           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3878                               quotient, 0, OPTAB_LIB_WIDEN);
3879           if (tem != quotient)
3880             emit_move_insn (quotient, tem);
3881           emit_jump_insn (gen_jump (label5));
3882           emit_barrier ();
3883           emit_label (label1);
3884           expand_inc (adjusted_op0, const1_rtx);
3885           emit_jump_insn (gen_jump (label4));
3886           emit_barrier ();
3887           emit_label (label2);
3888           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3889           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3890                               quotient, 0, OPTAB_LIB_WIDEN);
3891           if (tem != quotient)
3892             emit_move_insn (quotient, tem);
3893           emit_jump_insn (gen_jump (label5));
3894           emit_barrier ();
3895           emit_label (label3);
3896           expand_dec (adjusted_op0, const1_rtx);
3897           emit_label (label4);
3898           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3899                               quotient, 0, OPTAB_LIB_WIDEN);
3900           if (tem != quotient)
3901             emit_move_insn (quotient, tem);
3902           expand_dec (quotient, const1_rtx);
3903           emit_label (label5);
3904         }
3905         break;
3906
3907       case CEIL_DIV_EXPR:
3908       case CEIL_MOD_EXPR:
3909         if (unsignedp)
3910           {
3911             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3912               {
3913                 rtx t1, t2, t3;
3914                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3915                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3916                                    build_int_2 (floor_log2 (d), 0),
3917                                    tquotient, 1);
3918                 t2 = expand_binop (compute_mode, and_optab, op0,
3919                                    GEN_INT (d - 1),
3920                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3921                 t3 = gen_reg_rtx (compute_mode);
3922                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3923                                       compute_mode, 1, 1);
3924                 if (t3 == 0)
3925                   {
3926                     rtx lab;
3927                     lab = gen_label_rtx ();
3928                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3929                     expand_inc (t1, const1_rtx);
3930                     emit_label (lab);
3931                     quotient = t1;
3932                   }
3933                 else
3934                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3935                                                           t1, t3),
3936                                             tquotient);
3937                 break;
3938               }
3939
3940             /* Try using an instruction that produces both the quotient and
3941                remainder, using truncation.  We can easily compensate the
3942                quotient or remainder to get ceiling rounding, once we have the
3943                remainder.  Notice that we compute also the final remainder
3944                value here, and return the result right away.  */
3945             if (target == 0 || GET_MODE (target) != compute_mode)
3946               target = gen_reg_rtx (compute_mode);
3947
3948             if (rem_flag)
3949               {
3950                 remainder = (REG_P (target)
3951                              ? target : gen_reg_rtx (compute_mode));
3952                 quotient = gen_reg_rtx (compute_mode);
3953               }
3954             else
3955               {
3956                 quotient = (REG_P (target)
3957                             ? target : gen_reg_rtx (compute_mode));
3958                 remainder = gen_reg_rtx (compute_mode);
3959               }
3960
3961             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3962                                      remainder, 1))
3963               {
3964                 /* This could be computed with a branch-less sequence.
3965                    Save that for later.  */
3966                 rtx label = gen_label_rtx ();
3967                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3968                                  compute_mode, label);
3969                 expand_inc (quotient, const1_rtx);
3970                 expand_dec (remainder, op1);
3971                 emit_label (label);
3972                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3973               }
3974
3975             /* No luck with division elimination or divmod.  Have to do it
3976                by conditionally adjusting op0 *and* the result.  */
3977             {
3978               rtx label1, label2;
3979               rtx adjusted_op0, tem;
3980
3981               quotient = gen_reg_rtx (compute_mode);
3982               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3983               label1 = gen_label_rtx ();
3984               label2 = gen_label_rtx ();
3985               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3986                                compute_mode, label1);
3987               emit_move_insn  (quotient, const0_rtx);
3988               emit_jump_insn (gen_jump (label2));
3989               emit_barrier ();
3990               emit_label (label1);
3991               expand_dec (adjusted_op0, const1_rtx);
3992               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3993                                   quotient, 1, OPTAB_LIB_WIDEN);
3994               if (tem != quotient)
3995                 emit_move_insn (quotient, tem);
3996               expand_inc (quotient, const1_rtx);
3997               emit_label (label2);
3998             }
3999           }
4000         else /* signed */
4001           {
4002             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4003                 && INTVAL (op1) >= 0)
4004               {
4005                 /* This is extremely similar to the code for the unsigned case
4006                    above.  For 2.7 we should merge these variants, but for
4007                    2.6.1 I don't want to touch the code for unsigned since that
4008                    get used in C.  The signed case will only be used by other
4009                    languages (Ada).  */
4010
4011                 rtx t1, t2, t3;
4012                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4013                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4014                                    build_int_2 (floor_log2 (d), 0),
4015                                    tquotient, 0);
4016                 t2 = expand_binop (compute_mode, and_optab, op0,
4017                                    GEN_INT (d - 1),
4018                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4019                 t3 = gen_reg_rtx (compute_mode);
4020                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4021                                       compute_mode, 1, 1);
4022                 if (t3 == 0)
4023                   {
4024                     rtx lab;
4025                     lab = gen_label_rtx ();
4026                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4027                     expand_inc (t1, const1_rtx);
4028                     emit_label (lab);
4029                     quotient = t1;
4030                   }
4031                 else
4032                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4033                                                           t1, t3),
4034                                             tquotient);
4035                 break;
4036               }
4037
4038             /* Try using an instruction that produces both the quotient and
4039                remainder, using truncation.  We can easily compensate the
4040                quotient or remainder to get ceiling rounding, once we have the
4041                remainder.  Notice that we compute also the final remainder
4042                value here, and return the result right away.  */
4043             if (target == 0 || GET_MODE (target) != compute_mode)
4044               target = gen_reg_rtx (compute_mode);
4045             if (rem_flag)
4046               {
4047                 remainder= (REG_P (target)
4048                             ? target : gen_reg_rtx (compute_mode));
4049                 quotient = gen_reg_rtx (compute_mode);
4050               }
4051             else
4052               {
4053                 quotient = (REG_P (target)
4054                             ? target : gen_reg_rtx (compute_mode));
4055                 remainder = gen_reg_rtx (compute_mode);
4056               }
4057
4058             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4059                                      remainder, 0))
4060               {
4061                 /* This could be computed with a branch-less sequence.
4062                    Save that for later.  */
4063                 rtx tem;
4064                 rtx label = gen_label_rtx ();
4065                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4066                                  compute_mode, label);
4067                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4068                                     NULL_RTX, 0, OPTAB_WIDEN);
4069                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4070                 expand_inc (quotient, const1_rtx);
4071                 expand_dec (remainder, op1);
4072                 emit_label (label);
4073                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4074               }
4075
4076             /* No luck with division elimination or divmod.  Have to do it
4077                by conditionally adjusting op0 *and* the result.  */
4078             {
4079               rtx label1, label2, label3, label4, label5;
4080               rtx adjusted_op0;
4081               rtx tem;
4082
4083               quotient = gen_reg_rtx (compute_mode);
4084               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4085               label1 = gen_label_rtx ();
4086               label2 = gen_label_rtx ();
4087               label3 = gen_label_rtx ();
4088               label4 = gen_label_rtx ();
4089               label5 = gen_label_rtx ();
4090               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4091               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4092                                compute_mode, label1);
4093               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4094                                   quotient, 0, OPTAB_LIB_WIDEN);
4095               if (tem != quotient)
4096                 emit_move_insn (quotient, tem);
4097               emit_jump_insn (gen_jump (label5));
4098               emit_barrier ();
4099               emit_label (label1);
4100               expand_dec (adjusted_op0, const1_rtx);
4101               emit_jump_insn (gen_jump (label4));
4102               emit_barrier ();
4103               emit_label (label2);
4104               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4105                                compute_mode, label3);
4106               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4107                                   quotient, 0, OPTAB_LIB_WIDEN);
4108               if (tem != quotient)
4109                 emit_move_insn (quotient, tem);
4110               emit_jump_insn (gen_jump (label5));
4111               emit_barrier ();
4112               emit_label (label3);
4113               expand_inc (adjusted_op0, const1_rtx);
4114               emit_label (label4);
4115               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4116                                   quotient, 0, OPTAB_LIB_WIDEN);
4117               if (tem != quotient)
4118                 emit_move_insn (quotient, tem);
4119               expand_inc (quotient, const1_rtx);
4120               emit_label (label5);
4121             }
4122           }
4123         break;
4124
4125       case EXACT_DIV_EXPR:
4126         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4127           {
4128             HOST_WIDE_INT d = INTVAL (op1);
4129             unsigned HOST_WIDE_INT ml;
4130             int pre_shift;
4131             rtx t1;
4132
4133             pre_shift = floor_log2 (d & -d);
4134             ml = invert_mod2n (d >> pre_shift, size);
4135             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4136                                build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
4137             quotient = expand_mult (compute_mode, t1,
4138                                     gen_int_mode (ml, compute_mode),
4139                                     NULL_RTX, 1);
4140
4141             insn = get_last_insn ();
4142             set_unique_reg_note (insn,
4143                                  REG_EQUAL,
4144                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4145                                                  compute_mode,
4146                                                  op0, op1));
4147           }
4148         break;
4149
4150       case ROUND_DIV_EXPR:
4151       case ROUND_MOD_EXPR:
4152         if (unsignedp)
4153           {
4154             rtx tem;
4155             rtx label;
4156             label = gen_label_rtx ();
4157             quotient = gen_reg_rtx (compute_mode);
4158             remainder = gen_reg_rtx (compute_mode);
4159             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4160               {
4161                 rtx tem;
4162                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4163                                          quotient, 1, OPTAB_LIB_WIDEN);
4164                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4165                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4166                                           remainder, 1, OPTAB_LIB_WIDEN);
4167               }
4168             tem = plus_constant (op1, -1);
4169             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4170                                 build_int_2 (1, 0), NULL_RTX, 1);
4171             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4172             expand_inc (quotient, const1_rtx);
4173             expand_dec (remainder, op1);
4174             emit_label (label);
4175           }
4176         else
4177           {
4178             rtx abs_rem, abs_op1, tem, mask;
4179             rtx label;
4180             label = gen_label_rtx ();
4181             quotient = gen_reg_rtx (compute_mode);
4182             remainder = gen_reg_rtx (compute_mode);
4183             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4184               {
4185                 rtx tem;
4186                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4187                                          quotient, 0, OPTAB_LIB_WIDEN);
4188                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4189                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4190                                           remainder, 0, OPTAB_LIB_WIDEN);
4191               }
4192             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4193             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4194             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4195                                 build_int_2 (1, 0), NULL_RTX, 1);
4196             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4197             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4198                                 NULL_RTX, 0, OPTAB_WIDEN);
4199             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4200                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
4201             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4202                                 NULL_RTX, 0, OPTAB_WIDEN);
4203             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4204                                 NULL_RTX, 0, OPTAB_WIDEN);
4205             expand_inc (quotient, tem);
4206             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4207                                 NULL_RTX, 0, OPTAB_WIDEN);
4208             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4209                                 NULL_RTX, 0, OPTAB_WIDEN);
4210             expand_dec (remainder, tem);
4211             emit_label (label);
4212           }
4213         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4214
4215       default:
4216         abort ();
4217       }
4218
4219   if (quotient == 0)
4220     {
4221       if (target && GET_MODE (target) != compute_mode)
4222         target = 0;
4223
4224       if (rem_flag)
4225         {
4226           /* Try to produce the remainder without producing the quotient.
4227              If we seem to have a divmod pattern that does not require widening,
4228              don't try widening here.  We should really have a WIDEN argument
4229              to expand_twoval_binop, since what we'd really like to do here is
4230              1) try a mod insn in compute_mode
4231              2) try a divmod insn in compute_mode
4232              3) try a div insn in compute_mode and multiply-subtract to get
4233                 remainder
4234              4) try the same things with widening allowed.  */
4235           remainder
4236             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4237                                  op0, op1, target,
4238                                  unsignedp,
4239                                  ((optab2->handlers[compute_mode].insn_code
4240                                    != CODE_FOR_nothing)
4241                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4242           if (remainder == 0)
4243             {
4244               /* No luck there.  Can we do remainder and divide at once
4245                  without a library call?  */
4246               remainder = gen_reg_rtx (compute_mode);
4247               if (! expand_twoval_binop ((unsignedp
4248                                           ? udivmod_optab
4249                                           : sdivmod_optab),
4250                                          op0, op1,
4251                                          NULL_RTX, remainder, unsignedp))
4252                 remainder = 0;
4253             }
4254
4255           if (remainder)
4256             return gen_lowpart (mode, remainder);
4257         }
4258
4259       /* Produce the quotient.  Try a quotient insn, but not a library call.
4260          If we have a divmod in this mode, use it in preference to widening
4261          the div (for this test we assume it will not fail). Note that optab2
4262          is set to the one of the two optabs that the call below will use.  */
4263       quotient
4264         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4265                              op0, op1, rem_flag ? NULL_RTX : target,
4266                              unsignedp,
4267                              ((optab2->handlers[compute_mode].insn_code
4268                                != CODE_FOR_nothing)
4269                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4270
4271       if (quotient == 0)
4272         {
4273           /* No luck there.  Try a quotient-and-remainder insn,
4274              keeping the quotient alone.  */
4275           quotient = gen_reg_rtx (compute_mode);
4276           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4277                                      op0, op1,
4278                                      quotient, NULL_RTX, unsignedp))
4279             {
4280               quotient = 0;
4281               if (! rem_flag)
4282                 /* Still no luck.  If we are not computing the remainder,
4283                    use a library call for the quotient.  */
4284                 quotient = sign_expand_binop (compute_mode,
4285                                               udiv_optab, sdiv_optab,
4286                                               op0, op1, target,
4287                                               unsignedp, OPTAB_LIB_WIDEN);
4288             }
4289         }
4290     }
4291
4292   if (rem_flag)
4293     {
4294       if (target && GET_MODE (target) != compute_mode)
4295         target = 0;
4296
4297       if (quotient == 0)
4298         /* No divide instruction either.  Use library for remainder.  */
4299         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4300                                        op0, op1, target,
4301                                        unsignedp, OPTAB_LIB_WIDEN);
4302       else
4303         {
4304           /* We divided.  Now finish doing X - Y * (X / Y).  */
4305           remainder = expand_mult (compute_mode, quotient, op1,
4306                                    NULL_RTX, unsignedp);
4307           remainder = expand_binop (compute_mode, sub_optab, op0,
4308                                     remainder, target, unsignedp,
4309                                     OPTAB_LIB_WIDEN);
4310         }
4311     }
4312
4313   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4314 }
4315 \f
4316 /* Return a tree node with data type TYPE, describing the value of X.
4317    Usually this is an VAR_DECL, if there is no obvious better choice.
4318    X may be an expression, however we only support those expressions
4319    generated by loop.c.  */
4320
4321 tree
4322 make_tree (tree type, rtx x)
4323 {
4324   tree t;
4325
4326   switch (GET_CODE (x))
4327     {
4328     case CONST_INT:
4329       t = build_int_2 (INTVAL (x),
4330                        (TYPE_UNSIGNED (type)
4331                         && (GET_MODE_BITSIZE (TYPE_MODE (type))
4332                             < HOST_BITS_PER_WIDE_INT))
4333                        || INTVAL (x) >= 0 ? 0 : -1);
4334       TREE_TYPE (t) = type;
4335       return t;
4336
4337     case CONST_DOUBLE:
4338       if (GET_MODE (x) == VOIDmode)
4339         {
4340           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4341           TREE_TYPE (t) = type;
4342         }
4343       else
4344         {
4345           REAL_VALUE_TYPE d;
4346
4347           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4348           t = build_real (type, d);
4349         }
4350
4351       return t;
4352
4353     case CONST_VECTOR:
4354       {
4355         int i, units;
4356         rtx elt;
4357         tree t = NULL_TREE;
4358
4359         units = CONST_VECTOR_NUNITS (x);
4360
4361         /* Build a tree with vector elements.  */
4362         for (i = units - 1; i >= 0; --i)
4363           {
4364             elt = CONST_VECTOR_ELT (x, i);
4365             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4366           }
4367
4368         return build_vector (type, t);
4369       }
4370
4371     case PLUS:
4372       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4373                           make_tree (type, XEXP (x, 1))));
4374
4375     case MINUS:
4376       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4377                           make_tree (type, XEXP (x, 1))));
4378
4379     case NEG:
4380       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4381
4382     case MULT:
4383       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4384                           make_tree (type, XEXP (x, 1))));
4385
4386     case ASHIFT:
4387       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4388                           make_tree (type, XEXP (x, 1))));
4389
4390     case LSHIFTRT:
4391       t = lang_hooks.types.unsigned_type (type);
4392       return fold (convert (type,
4393                             build (RSHIFT_EXPR, t,
4394                                    make_tree (t, XEXP (x, 0)),
4395                                    make_tree (type, XEXP (x, 1)))));
4396
4397     case ASHIFTRT:
4398       t = lang_hooks.types.signed_type (type);
4399       return fold (convert (type,
4400                             build (RSHIFT_EXPR, t,
4401                                    make_tree (t, XEXP (x, 0)),
4402                                    make_tree (type, XEXP (x, 1)))));
4403
4404     case DIV:
4405       if (TREE_CODE (type) != REAL_TYPE)
4406         t = lang_hooks.types.signed_type (type);
4407       else
4408         t = type;
4409
4410       return fold (convert (type,
4411                             build (TRUNC_DIV_EXPR, t,
4412                                    make_tree (t, XEXP (x, 0)),
4413                                    make_tree (t, XEXP (x, 1)))));
4414     case UDIV:
4415       t = lang_hooks.types.unsigned_type (type);
4416       return fold (convert (type,
4417                             build (TRUNC_DIV_EXPR, t,
4418                                    make_tree (t, XEXP (x, 0)),
4419                                    make_tree (t, XEXP (x, 1)))));
4420
4421     case SIGN_EXTEND:
4422     case ZERO_EXTEND:
4423       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4424                                           GET_CODE (x) == ZERO_EXTEND);
4425       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4426
4427     default:
4428       t = build_decl (VAR_DECL, NULL_TREE, type);
4429
4430       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4431          ptr_mode.  So convert.  */
4432       if (POINTER_TYPE_P (type))
4433         x = convert_memory_address (TYPE_MODE (type), x);
4434
4435       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4436          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4437       t->decl.rtl = x;
4438
4439       return t;
4440     }
4441 }
4442
4443 /* Check whether the multiplication X * MULT + ADD overflows.
4444    X, MULT and ADD must be CONST_*.
4445    MODE is the machine mode for the computation.
4446    X and MULT must have mode MODE.  ADD may have a different mode.
4447    So can X (defaults to same as MODE).
4448    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4449
4450 bool
4451 const_mult_add_overflow_p (rtx x, rtx mult, rtx add, enum machine_mode mode, int unsignedp)
4452 {
4453   tree type, mult_type, add_type, result;
4454
4455   type = lang_hooks.types.type_for_mode (mode, unsignedp);
4456
4457   /* In order to get a proper overflow indication from an unsigned
4458      type, we have to pretend that it's a sizetype.  */
4459   mult_type = type;
4460   if (unsignedp)
4461     {
4462       mult_type = copy_node (type);
4463       TYPE_IS_SIZETYPE (mult_type) = 1;
4464     }
4465
4466   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4467               : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
4468
4469   result = fold (build (PLUS_EXPR, mult_type,
4470                         fold (build (MULT_EXPR, mult_type,
4471                                      make_tree (mult_type, x),
4472                                      make_tree (mult_type, mult))),
4473                         make_tree (add_type, add)));
4474
4475   return TREE_CONSTANT_OVERFLOW (result);
4476 }
4477
4478 /* Return an rtx representing the value of X * MULT + ADD.
4479    TARGET is a suggestion for where to store the result (an rtx).
4480    MODE is the machine mode for the computation.
4481    X and MULT must have mode MODE.  ADD may have a different mode.
4482    So can X (defaults to same as MODE).
4483    UNSIGNEDP is nonzero to do unsigned multiplication.
4484    This may emit insns.  */
4485
4486 rtx
4487 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4488                  int unsignedp)
4489 {
4490   tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
4491   tree add_type = (GET_MODE (add) == VOIDmode
4492                    ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
4493                                                            unsignedp));
4494   tree result =  fold (build (PLUS_EXPR, type,
4495                               fold (build (MULT_EXPR, type,
4496                                            make_tree (type, x),
4497                                            make_tree (type, mult))),
4498                               make_tree (add_type, add)));
4499
4500   return expand_expr (result, target, VOIDmode, 0);
4501 }
4502 \f
4503 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4504    and returning TARGET.
4505
4506    If TARGET is 0, a pseudo-register or constant is returned.  */
4507
4508 rtx
4509 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4510 {
4511   rtx tem = 0;
4512
4513   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4514     tem = simplify_binary_operation (AND, mode, op0, op1);
4515   if (tem == 0)
4516     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4517
4518   if (target == 0)
4519     target = tem;
4520   else if (tem != target)
4521     emit_move_insn (target, tem);
4522   return target;
4523 }
4524 \f
4525 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4526    and storing in TARGET.  Normally return TARGET.
4527    Return 0 if that cannot be done.
4528
4529    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4530    it is VOIDmode, they cannot both be CONST_INT.
4531
4532    UNSIGNEDP is for the case where we have to widen the operands
4533    to perform the operation.  It says to use zero-extension.
4534
4535    NORMALIZEP is 1 if we should convert the result to be either zero
4536    or one.  Normalize is -1 if we should convert the result to be
4537    either zero or -1.  If NORMALIZEP is zero, the result will be left
4538    "raw" out of the scc insn.  */
4539
4540 rtx
4541 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4542                  enum machine_mode mode, int unsignedp, int normalizep)
4543 {
4544   rtx subtarget;
4545   enum insn_code icode;
4546   enum machine_mode compare_mode;
4547   enum machine_mode target_mode = GET_MODE (target);
4548   rtx tem;
4549   rtx last = get_last_insn ();
4550   rtx pattern, comparison;
4551
4552   if (unsignedp)
4553     code = unsigned_condition (code);
4554
4555   /* If one operand is constant, make it the second one.  Only do this
4556      if the other operand is not constant as well.  */
4557
4558   if (swap_commutative_operands_p (op0, op1))
4559     {
4560       tem = op0;
4561       op0 = op1;
4562       op1 = tem;
4563       code = swap_condition (code);
4564     }
4565
4566   if (mode == VOIDmode)
4567     mode = GET_MODE (op0);
4568
4569   /* For some comparisons with 1 and -1, we can convert this to
4570      comparisons with zero.  This will often produce more opportunities for
4571      store-flag insns.  */
4572
4573   switch (code)
4574     {
4575     case LT:
4576       if (op1 == const1_rtx)
4577         op1 = const0_rtx, code = LE;
4578       break;
4579     case LE:
4580       if (op1 == constm1_rtx)
4581         op1 = const0_rtx, code = LT;
4582       break;
4583     case GE:
4584       if (op1 == const1_rtx)
4585         op1 = const0_rtx, code = GT;
4586       break;
4587     case GT:
4588       if (op1 == constm1_rtx)
4589         op1 = const0_rtx, code = GE;
4590       break;
4591     case GEU:
4592       if (op1 == const1_rtx)
4593         op1 = const0_rtx, code = NE;
4594       break;
4595     case LTU:
4596       if (op1 == const1_rtx)
4597         op1 = const0_rtx, code = EQ;
4598       break;
4599     default:
4600       break;
4601     }
4602
4603   /* If we are comparing a double-word integer with zero or -1, we can
4604      convert the comparison into one involving a single word.  */
4605   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4606       && GET_MODE_CLASS (mode) == MODE_INT
4607       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
4608     {
4609       if ((code == EQ || code == NE)
4610           && (op1 == const0_rtx || op1 == constm1_rtx))
4611         {
4612           rtx op00, op01, op0both;
4613
4614           /* Do a logical OR or AND of the two words and compare the result.  */
4615           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
4616           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
4617           op0both = expand_binop (word_mode,
4618                                   op1 == const0_rtx ? ior_optab : and_optab,
4619                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
4620
4621           if (op0both != 0)
4622             return emit_store_flag (target, code, op0both, op1, word_mode,
4623                                     unsignedp, normalizep);
4624         }
4625       else if ((code == LT || code == GE) && op1 == const0_rtx)
4626         {
4627           rtx op0h;
4628
4629           /* If testing the sign bit, can just test on high word.  */
4630           op0h = simplify_gen_subreg (word_mode, op0, mode,
4631                                       subreg_highpart_offset (word_mode, mode));
4632           return emit_store_flag (target, code, op0h, op1, word_mode,
4633                                   unsignedp, normalizep);
4634         }
4635     }
4636
4637   /* From now on, we won't change CODE, so set ICODE now.  */
4638   icode = setcc_gen_code[(int) code];
4639
4640   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4641      complement of A (for GE) and shifting the sign bit to the low bit.  */
4642   if (op1 == const0_rtx && (code == LT || code == GE)
4643       && GET_MODE_CLASS (mode) == MODE_INT
4644       && (normalizep || STORE_FLAG_VALUE == 1
4645           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4646               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4647                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4648     {
4649       subtarget = target;
4650
4651       /* If the result is to be wider than OP0, it is best to convert it
4652          first.  If it is to be narrower, it is *incorrect* to convert it
4653          first.  */
4654       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4655         {
4656           op0 = convert_modes (target_mode, mode, op0, 0);
4657           mode = target_mode;
4658         }
4659
4660       if (target_mode != mode)
4661         subtarget = 0;
4662
4663       if (code == GE)
4664         op0 = expand_unop (mode, one_cmpl_optab, op0,
4665                            ((STORE_FLAG_VALUE == 1 || normalizep)
4666                             ? 0 : subtarget), 0);
4667
4668       if (STORE_FLAG_VALUE == 1 || normalizep)
4669         /* If we are supposed to produce a 0/1 value, we want to do
4670            a logical shift from the sign bit to the low-order bit; for
4671            a -1/0 value, we do an arithmetic shift.  */
4672         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4673                             size_int (GET_MODE_BITSIZE (mode) - 1),
4674                             subtarget, normalizep != -1);
4675
4676       if (mode != target_mode)
4677         op0 = convert_modes (target_mode, mode, op0, 0);
4678
4679       return op0;
4680     }
4681
4682   if (icode != CODE_FOR_nothing)
4683     {
4684       insn_operand_predicate_fn pred;
4685
4686       /* We think we may be able to do this with a scc insn.  Emit the
4687          comparison and then the scc insn.  */
4688
4689       do_pending_stack_adjust ();
4690       last = get_last_insn ();
4691
4692       comparison
4693         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4694       if (CONSTANT_P (comparison))
4695         {
4696           if (GET_CODE (comparison) == CONST_INT)
4697             {
4698               if (comparison == const0_rtx)
4699                 return const0_rtx;
4700             }
4701 #ifdef FLOAT_STORE_FLAG_VALUE
4702           else if (GET_CODE (comparison) == CONST_DOUBLE)
4703             {
4704               if (comparison == CONST0_RTX (GET_MODE (comparison)))
4705                 return const0_rtx;
4706             }
4707 #endif
4708           else
4709             abort ();
4710           if (normalizep == 1)
4711             return const1_rtx;
4712           if (normalizep == -1)
4713             return constm1_rtx;
4714           return const_true_rtx;
4715         }
4716
4717       /* The code of COMPARISON may not match CODE if compare_from_rtx
4718          decided to swap its operands and reverse the original code.
4719
4720          We know that compare_from_rtx returns either a CONST_INT or
4721          a new comparison code, so it is safe to just extract the
4722          code from COMPARISON.  */
4723       code = GET_CODE (comparison);
4724
4725       /* Get a reference to the target in the proper mode for this insn.  */
4726       compare_mode = insn_data[(int) icode].operand[0].mode;
4727       subtarget = target;
4728       pred = insn_data[(int) icode].operand[0].predicate;
4729       if (preserve_subexpressions_p ()
4730           || ! (*pred) (subtarget, compare_mode))
4731         subtarget = gen_reg_rtx (compare_mode);
4732
4733       pattern = GEN_FCN (icode) (subtarget);
4734       if (pattern)
4735         {
4736           emit_insn (pattern);
4737
4738           /* If we are converting to a wider mode, first convert to
4739              TARGET_MODE, then normalize.  This produces better combining
4740              opportunities on machines that have a SIGN_EXTRACT when we are
4741              testing a single bit.  This mostly benefits the 68k.
4742
4743              If STORE_FLAG_VALUE does not have the sign bit set when
4744              interpreted in COMPARE_MODE, we can do this conversion as
4745              unsigned, which is usually more efficient.  */
4746           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4747             {
4748               convert_move (target, subtarget,
4749                             (GET_MODE_BITSIZE (compare_mode)
4750                              <= HOST_BITS_PER_WIDE_INT)
4751                             && 0 == (STORE_FLAG_VALUE
4752                                      & ((HOST_WIDE_INT) 1
4753                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4754               op0 = target;
4755               compare_mode = target_mode;
4756             }
4757           else
4758             op0 = subtarget;
4759
4760           /* If we want to keep subexpressions around, don't reuse our
4761              last target.  */
4762
4763           if (preserve_subexpressions_p ())
4764             subtarget = 0;
4765
4766           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4767              we don't have to do anything.  */
4768           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4769             ;
4770           /* STORE_FLAG_VALUE might be the most negative number, so write
4771              the comparison this way to avoid a compiler-time warning.  */
4772           else if (- normalizep == STORE_FLAG_VALUE)
4773             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4774
4775           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4776              makes it hard to use a value of just the sign bit due to
4777              ANSI integer constant typing rules.  */
4778           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4779                    && (STORE_FLAG_VALUE
4780                        & ((HOST_WIDE_INT) 1
4781                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4782             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4783                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4784                                 subtarget, normalizep == 1);
4785           else if (STORE_FLAG_VALUE & 1)
4786             {
4787               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4788               if (normalizep == -1)
4789                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4790             }
4791           else
4792             abort ();
4793
4794           /* If we were converting to a smaller mode, do the
4795              conversion now.  */
4796           if (target_mode != compare_mode)
4797             {
4798               convert_move (target, op0, 0);
4799               return target;
4800             }
4801           else
4802             return op0;
4803         }
4804     }
4805
4806   delete_insns_since (last);
4807
4808   /* If expensive optimizations, use different pseudo registers for each
4809      insn, instead of reusing the same pseudo.  This leads to better CSE,
4810      but slows down the compiler, since there are more pseudos */
4811   subtarget = (!flag_expensive_optimizations
4812                && (target_mode == mode)) ? target : NULL_RTX;
4813
4814   /* If we reached here, we can't do this with a scc insn.  However, there
4815      are some comparisons that can be done directly.  For example, if
4816      this is an equality comparison of integers, we can try to exclusive-or
4817      (or subtract) the two operands and use a recursive call to try the
4818      comparison with zero.  Don't do any of these cases if branches are
4819      very cheap.  */
4820
4821   if (BRANCH_COST > 0
4822       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4823       && op1 != const0_rtx)
4824     {
4825       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4826                           OPTAB_WIDEN);
4827
4828       if (tem == 0)
4829         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4830                             OPTAB_WIDEN);
4831       if (tem != 0)
4832         tem = emit_store_flag (target, code, tem, const0_rtx,
4833                                mode, unsignedp, normalizep);
4834       if (tem == 0)
4835         delete_insns_since (last);
4836       return tem;
4837     }
4838
4839   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4840      the constant zero.  Reject all other comparisons at this point.  Only
4841      do LE and GT if branches are expensive since they are expensive on
4842      2-operand machines.  */
4843
4844   if (BRANCH_COST == 0
4845       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4846       || (code != EQ && code != NE
4847           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4848     return 0;
4849
4850   /* See what we need to return.  We can only return a 1, -1, or the
4851      sign bit.  */
4852
4853   if (normalizep == 0)
4854     {
4855       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4856         normalizep = STORE_FLAG_VALUE;
4857
4858       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4859                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4860                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4861         ;
4862       else
4863         return 0;
4864     }
4865
4866   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4867      do the necessary operation below.  */
4868
4869   tem = 0;
4870
4871   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4872      the sign bit set.  */
4873
4874   if (code == LE)
4875     {
4876       /* This is destructive, so SUBTARGET can't be OP0.  */
4877       if (rtx_equal_p (subtarget, op0))
4878         subtarget = 0;
4879
4880       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4881                           OPTAB_WIDEN);
4882       if (tem)
4883         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4884                             OPTAB_WIDEN);
4885     }
4886
4887   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4888      number of bits in the mode of OP0, minus one.  */
4889
4890   if (code == GT)
4891     {
4892       if (rtx_equal_p (subtarget, op0))
4893         subtarget = 0;
4894
4895       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4896                           size_int (GET_MODE_BITSIZE (mode) - 1),
4897                           subtarget, 0);
4898       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4899                           OPTAB_WIDEN);
4900     }
4901
4902   if (code == EQ || code == NE)
4903     {
4904       /* For EQ or NE, one way to do the comparison is to apply an operation
4905          that converts the operand into a positive number if it is nonzero
4906          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4907          for NE we negate.  This puts the result in the sign bit.  Then we
4908          normalize with a shift, if needed.
4909
4910          Two operations that can do the above actions are ABS and FFS, so try
4911          them.  If that doesn't work, and MODE is smaller than a full word,
4912          we can use zero-extension to the wider mode (an unsigned conversion)
4913          as the operation.  */
4914
4915       /* Note that ABS doesn't yield a positive number for INT_MIN, but
4916          that is compensated by the subsequent overflow when subtracting
4917          one / negating.  */
4918
4919       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
4920         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4921       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
4922         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4923       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4924         {
4925           tem = convert_modes (word_mode, mode, op0, 1);
4926           mode = word_mode;
4927         }
4928
4929       if (tem != 0)
4930         {
4931           if (code == EQ)
4932             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4933                                 0, OPTAB_WIDEN);
4934           else
4935             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4936         }
4937
4938       /* If we couldn't do it that way, for NE we can "or" the two's complement
4939          of the value with itself.  For EQ, we take the one's complement of
4940          that "or", which is an extra insn, so we only handle EQ if branches
4941          are expensive.  */
4942
4943       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4944         {
4945           if (rtx_equal_p (subtarget, op0))
4946             subtarget = 0;
4947
4948           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4949           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4950                               OPTAB_WIDEN);
4951
4952           if (tem && code == EQ)
4953             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4954         }
4955     }
4956
4957   if (tem && normalizep)
4958     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4959                         size_int (GET_MODE_BITSIZE (mode) - 1),
4960                         subtarget, normalizep == 1);
4961
4962   if (tem)
4963     {
4964       if (GET_MODE (tem) != target_mode)
4965         {
4966           convert_move (target, tem, 0);
4967           tem = target;
4968         }
4969       else if (!subtarget)
4970         {
4971           emit_move_insn (target, tem);
4972           tem = target;
4973         }
4974     }
4975   else
4976     delete_insns_since (last);
4977
4978   return tem;
4979 }
4980
4981 /* Like emit_store_flag, but always succeeds.  */
4982
4983 rtx
4984 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
4985                        enum machine_mode mode, int unsignedp, int normalizep)
4986 {
4987   rtx tem, label;
4988
4989   /* First see if emit_store_flag can do the job.  */
4990   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4991   if (tem != 0)
4992     return tem;
4993
4994   if (normalizep == 0)
4995     normalizep = 1;
4996
4997   /* If this failed, we have to do this with set/compare/jump/set code.  */
4998
4999   if (!REG_P (target)
5000       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5001     target = gen_reg_rtx (GET_MODE (target));
5002
5003   emit_move_insn (target, const1_rtx);
5004   label = gen_label_rtx ();
5005   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5006                            NULL_RTX, label);
5007
5008   emit_move_insn (target, const0_rtx);
5009   emit_label (label);
5010
5011   return target;
5012 }
5013 \f
5014 /* Perform possibly multi-word comparison and conditional jump to LABEL
5015    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5016
5017    The algorithm is based on the code in expr.c:do_jump.
5018
5019    Note that this does not perform a general comparison.  Only variants
5020    generated within expmed.c are correctly handled, others abort (but could
5021    be handled if needed).  */
5022
5023 static void
5024 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5025                  rtx label)
5026 {
5027   /* If this mode is an integer too wide to compare properly,
5028      compare word by word.  Rely on cse to optimize constant cases.  */
5029
5030   if (GET_MODE_CLASS (mode) == MODE_INT
5031       && ! can_compare_p (op, mode, ccp_jump))
5032     {
5033       rtx label2 = gen_label_rtx ();
5034
5035       switch (op)
5036         {
5037         case LTU:
5038           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5039           break;
5040
5041         case LEU:
5042           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5043           break;
5044
5045         case LT:
5046           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5047           break;
5048
5049         case GT:
5050           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5051           break;
5052
5053         case GE:
5054           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5055           break;
5056
5057           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5058              that's the only equality operations we do */
5059         case EQ:
5060           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5061             abort ();
5062           do_jump_by_parts_equality_rtx (arg1, label2, label);
5063           break;
5064
5065         case NE:
5066           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5067             abort ();
5068           do_jump_by_parts_equality_rtx (arg1, label, label2);
5069           break;
5070
5071         default:
5072           abort ();
5073         }
5074
5075       emit_label (label2);
5076     }
5077   else
5078     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5079 }