1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
35 #include "insn-attr.h"
42 #include "basic-block.h"
45 #ifdef EXTRA_CONSTRAINT
46 /* If EXTRA_CONSTRAINT is defined, then the 'S'
47 constraint in REG_CLASS_FROM_LETTER will no longer work, and various
48 asm statements that need 'S' for class SIREG will break. */
49 error EXTRA_CONSTRAINT conflicts with S constraint letter
50 /* The previous line used to be #error, but some compilers barf
51 even if the conditional was untrue. */
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT -1
58 /* Processor costs (relative to an add) */
59 struct processor_costs i386_cost = { /* 386 specific costs */
60 1, /* cost of an add instruction */
61 1, /* cost of a lea instruction */
62 3, /* variable shift costs */
63 2, /* constant shift costs */
64 6, /* cost of starting a multiply */
65 1, /* cost of multiply per each bit set */
66 23, /* cost of a divide/mod */
67 15, /* "large" insn */
69 4, /* cost for loading QImode using movzbl */
70 {2, 4, 2}, /* cost of loading integer registers
71 in QImode, HImode and SImode.
72 Relative to reg-reg move (2). */
73 {2, 4, 2}, /* cost of storing integer registers */
74 2, /* cost of reg,reg fld/fst */
75 {8, 8, 8}, /* cost of loading fp registers
76 in SFmode, DFmode and XFmode */
77 {8, 8, 8} /* cost of loading integer registers */
80 struct processor_costs i486_cost = { /* 486 specific costs */
81 1, /* cost of an add instruction */
82 1, /* cost of a lea instruction */
83 3, /* variable shift costs */
84 2, /* constant shift costs */
85 12, /* cost of starting a multiply */
86 1, /* cost of multiply per each bit set */
87 40, /* cost of a divide/mod */
88 15, /* "large" insn */
90 4, /* cost for loading QImode using movzbl */
91 {2, 4, 2}, /* cost of loading integer registers
92 in QImode, HImode and SImode.
93 Relative to reg-reg move (2). */
94 {2, 4, 2}, /* cost of storing integer registers */
95 2, /* cost of reg,reg fld/fst */
96 {8, 8, 8}, /* cost of loading fp registers
97 in SFmode, DFmode and XFmode */
98 {8, 8, 8} /* cost of loading integer registers */
101 struct processor_costs pentium_cost = {
102 1, /* cost of an add instruction */
103 1, /* cost of a lea instruction */
104 4, /* variable shift costs */
105 1, /* constant shift costs */
106 11, /* cost of starting a multiply */
107 0, /* cost of multiply per each bit set */
108 25, /* cost of a divide/mod */
109 8, /* "large" insn */
111 6, /* cost for loading QImode using movzbl */
112 {2, 4, 2}, /* cost of loading integer registers
113 in QImode, HImode and SImode.
114 Relative to reg-reg move (2). */
115 {2, 4, 2}, /* cost of storing integer registers */
116 2, /* cost of reg,reg fld/fst */
117 {2, 2, 6}, /* cost of loading fp registers
118 in SFmode, DFmode and XFmode */
119 {4, 4, 6} /* cost of loading integer registers */
122 struct processor_costs pentiumpro_cost = {
123 1, /* cost of an add instruction */
124 1, /* cost of a lea instruction */
125 1, /* variable shift costs */
126 1, /* constant shift costs */
127 4, /* cost of starting a multiply */
128 0, /* cost of multiply per each bit set */
129 17, /* cost of a divide/mod */
130 8, /* "large" insn */
132 2, /* cost for loading QImode using movzbl */
133 {4, 4, 4}, /* cost of loading integer registers
134 in QImode, HImode and SImode.
135 Relative to reg-reg move (2). */
136 {2, 2, 2}, /* cost of storing integer registers */
137 2, /* cost of reg,reg fld/fst */
138 {2, 2, 6}, /* cost of loading fp registers
139 in SFmode, DFmode and XFmode */
140 {4, 4, 6} /* cost of loading integer registers */
143 struct processor_costs k6_cost = {
144 1, /* cost of an add instruction */
145 2, /* cost of a lea instruction */
146 1, /* variable shift costs */
147 1, /* constant shift costs */
148 3, /* cost of starting a multiply */
149 0, /* cost of multiply per each bit set */
150 18, /* cost of a divide/mod */
151 8, /* "large" insn */
153 3, /* cost for loading QImode using movzbl */
154 {4, 5, 4}, /* cost of loading integer registers
155 in QImode, HImode and SImode.
156 Relative to reg-reg move (2). */
157 {2, 3, 2}, /* cost of storing integer registers */
158 4, /* cost of reg,reg fld/fst */
159 {6, 6, 6}, /* cost of loading fp registers
160 in SFmode, DFmode and XFmode */
161 {4, 4, 4} /* cost of loading integer registers */
164 struct processor_costs athlon_cost = {
165 1, /* cost of an add instruction */
166 2, /* cost of a lea instruction */
167 1, /* variable shift costs */
168 1, /* constant shift costs */
169 5, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 42, /* cost of a divide/mod */
172 8, /* "large" insn */
174 4, /* cost for loading QImode using movzbl */
175 {4, 5, 4}, /* cost of loading integer registers
176 in QImode, HImode and SImode.
177 Relative to reg-reg move (2). */
178 {2, 3, 2}, /* cost of storing integer registers */
179 4, /* cost of reg,reg fld/fst */
180 {6, 6, 20}, /* cost of loading fp registers
181 in SFmode, DFmode and XFmode */
182 {4, 4, 16} /* cost of loading integer registers */
185 struct processor_costs *ix86_cost = &pentium_cost;
187 /* Processor feature/optimization bitmasks. */
188 #define m_386 (1<<PROCESSOR_I386)
189 #define m_486 (1<<PROCESSOR_I486)
190 #define m_PENT (1<<PROCESSOR_PENTIUM)
191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
192 #define m_K6 (1<<PROCESSOR_K6)
193 #define m_ATHLON (1<<PROCESSOR_ATHLON)
195 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
196 const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
197 const int x86_zero_extend_with_and = m_486 | m_PENT;
198 const int x86_movx = m_ATHLON | m_PPRO /* m_386 | m_K6 */;
199 const int x86_double_with_add = ~m_386;
200 const int x86_use_bit_test = m_386;
201 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
202 const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
203 const int x86_use_any_reg = m_486;
204 const int x86_cmove = m_PPRO | m_ATHLON;
205 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
206 const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON;
207 const int x86_partial_reg_stall = m_PPRO;
208 const int x86_use_loop = m_K6;
209 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
210 const int x86_use_mov0 = m_K6;
211 const int x86_use_cltd = ~(m_PENT | m_K6);
212 const int x86_read_modify_write = ~m_PENT;
213 const int x86_read_modify = ~(m_PENT | m_PPRO);
214 const int x86_split_long_moves = m_PPRO;
215 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
216 const int x86_single_stringop = m_386;
217 const int x86_qimode_math = ~(0);
218 const int x86_promote_qi_regs = 0;
219 const int x86_himode_math = ~(m_PPRO);
220 const int x86_promote_hi_regs = m_PPRO;
221 const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
222 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
223 const int x86_add_esp_4 = m_ATHLON | m_K6;
224 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
225 const int x86_integer_DFmode_moves = ~m_ATHLON;
226 const int x86_partial_reg_dependency = m_ATHLON;
227 const int x86_memory_mismatch_stall = m_ATHLON;
229 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
231 const char * const hi_reg_name[] = HI_REGISTER_NAMES;
232 const char * const qi_reg_name[] = QI_REGISTER_NAMES;
233 const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
235 /* Array of the smallest class containing reg number REGNO, indexed by
236 REGNO. Used by REGNO_REG_CLASS in i386.h. */
238 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
241 AREG, DREG, CREG, BREG,
243 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
245 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
246 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
249 /* flags, fpsr, dirflag, frame */
250 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
251 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
253 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
257 /* The "default" register map. */
259 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
261 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
262 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
263 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
264 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
265 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
268 /* Define the register numbers to be used in Dwarf debugging information.
269 The SVR4 reference port C compiler uses the following register numbers
270 in its Dwarf output code:
271 0 for %eax (gcc regno = 0)
272 1 for %ecx (gcc regno = 2)
273 2 for %edx (gcc regno = 1)
274 3 for %ebx (gcc regno = 3)
275 4 for %esp (gcc regno = 7)
276 5 for %ebp (gcc regno = 6)
277 6 for %esi (gcc regno = 4)
278 7 for %edi (gcc regno = 5)
279 The following three DWARF register numbers are never generated by
280 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
281 believes these numbers have these meanings.
282 8 for %eip (no gcc equivalent)
283 9 for %eflags (gcc regno = 17)
284 10 for %trapno (no gcc equivalent)
285 It is not at all clear how we should number the FP stack registers
286 for the x86 architecture. If the version of SDB on x86/svr4 were
287 a bit less brain dead with respect to floating-point then we would
288 have a precedent to follow with respect to DWARF register numbers
289 for x86 FP registers, but the SDB on x86/svr4 is so completely
290 broken with respect to FP registers that it is hardly worth thinking
291 of it as something to strive for compatibility with.
292 The version of x86/svr4 SDB I have at the moment does (partially)
293 seem to believe that DWARF register number 11 is associated with
294 the x86 register %st(0), but that's about all. Higher DWARF
295 register numbers don't seem to be associated with anything in
296 particular, and even for DWARF regno 11, SDB only seems to under-
297 stand that it should say that a variable lives in %st(0) (when
298 asked via an `=' command) if we said it was in DWARF regno 11,
299 but SDB still prints garbage when asked for the value of the
300 variable in question (via a `/' command).
301 (Also note that the labels SDB prints for various FP stack regs
302 when doing an `x' command are all wrong.)
303 Note that these problems generally don't affect the native SVR4
304 C compiler because it doesn't allow the use of -O with -g and
305 because when it is *not* optimizing, it allocates a memory
306 location for each floating-point variable, and the memory
307 location is what gets described in the DWARF AT_location
308 attribute for the variable in question.
309 Regardless of the severe mental illness of the x86/svr4 SDB, we
310 do something sensible here and we use the following DWARF
311 register numbers. Note that these are all stack-top-relative
313 11 for %st(0) (gcc regno = 8)
314 12 for %st(1) (gcc regno = 9)
315 13 for %st(2) (gcc regno = 10)
316 14 for %st(3) (gcc regno = 11)
317 15 for %st(4) (gcc regno = 12)
318 16 for %st(5) (gcc regno = 13)
319 17 for %st(6) (gcc regno = 14)
320 18 for %st(7) (gcc regno = 15)
322 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
324 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
325 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
326 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
327 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
328 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
333 /* Test and compare insns in i386.md store the information needed to
334 generate branch and scc insns here. */
336 struct rtx_def *ix86_compare_op0 = NULL_RTX;
337 struct rtx_def *ix86_compare_op1 = NULL_RTX;
339 #define MAX_386_STACK_LOCALS 2
341 /* Define the structure for the machine field in struct function. */
342 struct machine_function
344 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
347 #define ix86_stack_locals (cfun->machine->stack_locals)
349 /* which cpu are we scheduling for */
350 enum processor_type ix86_cpu;
352 /* which instruction set architecture to use. */
355 /* Strings to hold which cpu and instruction set architecture to use. */
356 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
357 const char *ix86_arch_string; /* for -march=<xxx> */
359 /* Register allocation order */
360 const char *ix86_reg_alloc_order;
361 static char regs_allocated[FIRST_PSEUDO_REGISTER];
363 /* # of registers to use to pass arguments. */
364 const char *ix86_regparm_string;
366 /* ix86_regparm_string as a number */
369 /* Alignment to use for loops and jumps: */
371 /* Power of two alignment for loops. */
372 const char *ix86_align_loops_string;
374 /* Power of two alignment for non-loop jumps. */
375 const char *ix86_align_jumps_string;
377 /* Power of two alignment for stack boundary in bytes. */
378 const char *ix86_preferred_stack_boundary_string;
380 /* Preferred alignment for stack boundary in bits. */
381 int ix86_preferred_stack_boundary;
383 /* Values 1-5: see jump.c */
384 int ix86_branch_cost;
385 const char *ix86_branch_cost_string;
387 /* Power of two alignment for functions. */
388 int ix86_align_funcs;
389 const char *ix86_align_funcs_string;
391 /* Power of two alignment for loops. */
392 int ix86_align_loops;
394 /* Power of two alignment for non-loop jumps. */
395 int ix86_align_jumps;
397 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
398 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
400 static enum rtx_code unsigned_comparison PARAMS ((enum rtx_code code));
401 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
402 static enum machine_mode ix86_fp_compare_mode PARAMS ((enum rtx_code));
403 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
405 static rtx ix86_expand_compare PARAMS ((enum rtx_code));
406 static rtx gen_push PARAMS ((rtx));
407 static int memory_address_length PARAMS ((rtx addr));
408 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
409 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
410 static int ix86_safe_length PARAMS ((rtx));
411 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
412 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
413 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
414 static void ix86_dump_ppro_packet PARAMS ((FILE *));
415 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
416 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
418 static void ix86_init_machine_status PARAMS ((struct function *));
419 static void ix86_mark_machine_status PARAMS ((struct function *));
420 static void ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
421 static int ix86_safe_length_prefix PARAMS ((rtx));
422 static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT,
423 int *, int *, int *));
424 static int ix86_nsaved_regs PARAMS((void));
425 static void ix86_emit_save_regs PARAMS((void));
426 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
427 static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
428 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
429 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
430 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
434 rtx base, index, disp;
438 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
440 /* Sometimes certain combinations of command options do not make
441 sense on a particular target machine. You can define a macro
442 `OVERRIDE_OPTIONS' to take account of this. This macro, if
443 defined, is executed once just after all the command options have
446 Don't use this macro to turn on various extra optimizations for
447 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
452 /* Comes from final.c -- no real reason to change it. */
453 #define MAX_CODE_ALIGN 16
457 struct processor_costs *cost; /* Processor costs */
458 int target_enable; /* Target flags to enable. */
459 int target_disable; /* Target flags to disable. */
460 int align_loop; /* Default alignments. */
465 const processor_target_table[PROCESSOR_max] =
467 {&i386_cost, 0, 0, 2, 2, 2, 1},
468 {&i486_cost, 0, 0, 4, 4, 4, 1},
469 {&pentium_cost, 0, 0, -4, -4, -4, 1},
470 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
471 {&k6_cost, 0, 0, -5, -5, 4, 1},
472 {&athlon_cost, 0, 0, 4, -4, 4, 1}
477 const char *name; /* processor name or nickname. */
478 enum processor_type processor;
480 const processor_alias_table[] =
482 {"i386", PROCESSOR_I386},
483 {"i486", PROCESSOR_I486},
484 {"i586", PROCESSOR_PENTIUM},
485 {"pentium", PROCESSOR_PENTIUM},
486 {"i686", PROCESSOR_PENTIUMPRO},
487 {"pentiumpro", PROCESSOR_PENTIUMPRO},
488 {"k6", PROCESSOR_K6},
489 {"athlon", PROCESSOR_ATHLON},
492 int const pta_size = sizeof(processor_alias_table)/sizeof(struct pta);
494 #ifdef SUBTARGET_OVERRIDE_OPTIONS
495 SUBTARGET_OVERRIDE_OPTIONS;
498 ix86_arch = PROCESSOR_I386;
499 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
501 if (ix86_arch_string != 0)
504 for (i = 0; i < pta_size; i++)
505 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
507 ix86_arch = processor_alias_table[i].processor;
508 /* Default cpu tuning to the architecture. */
509 ix86_cpu = ix86_arch;
513 error ("bad value (%s) for -march= switch", ix86_arch_string);
516 if (ix86_cpu_string != 0)
519 for (i = 0; i < pta_size; i++)
520 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
522 ix86_cpu = processor_alias_table[i].processor;
526 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
529 ix86_cost = processor_target_table[ix86_cpu].cost;
530 target_flags |= processor_target_table[ix86_cpu].target_enable;
531 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
533 /* Arrange to set up i386_stack_locals for all functions. */
534 init_machine_status = ix86_init_machine_status;
535 mark_machine_status = ix86_mark_machine_status;
537 /* Validate registers in register allocation order. */
538 if (ix86_reg_alloc_order)
541 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
547 case 'a': regno = 0; break;
548 case 'd': regno = 1; break;
549 case 'c': regno = 2; break;
550 case 'b': regno = 3; break;
551 case 'S': regno = 4; break;
552 case 'D': regno = 5; break;
553 case 'B': regno = 6; break;
555 default: fatal ("Register '%c' is unknown", ch);
558 if (regs_allocated[regno])
559 fatal ("Register '%c' already specified in allocation order", ch);
561 regs_allocated[regno] = 1;
565 /* Validate -mregparm= value. */
566 if (ix86_regparm_string)
568 ix86_regparm = atoi (ix86_regparm_string);
569 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
570 fatal ("-mregparm=%d is not between 0 and %d",
571 ix86_regparm, REGPARM_MAX);
574 /* Validate -malign-loops= value, or provide default. */
575 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
576 if (ix86_align_loops_string)
578 ix86_align_loops = atoi (ix86_align_loops_string);
579 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
580 fatal ("-malign-loops=%d is not between 0 and %d",
581 ix86_align_loops, MAX_CODE_ALIGN);
584 /* Validate -malign-jumps= value, or provide default. */
585 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
586 if (ix86_align_jumps_string)
588 ix86_align_jumps = atoi (ix86_align_jumps_string);
589 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
590 fatal ("-malign-jumps=%d is not between 0 and %d",
591 ix86_align_jumps, MAX_CODE_ALIGN);
594 /* Validate -malign-functions= value, or provide default. */
595 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
596 if (ix86_align_funcs_string)
598 ix86_align_funcs = atoi (ix86_align_funcs_string);
599 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
600 fatal ("-malign-functions=%d is not between 0 and %d",
601 ix86_align_funcs, MAX_CODE_ALIGN);
604 /* Validate -mpreferred-stack-boundary= value, or provide default.
605 The default of 128 bits is for Pentium III's SSE __m128. */
606 ix86_preferred_stack_boundary = 128;
607 if (ix86_preferred_stack_boundary_string)
609 int i = atoi (ix86_preferred_stack_boundary_string);
611 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
612 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
615 /* Validate -mbranch-cost= value, or provide default. */
616 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
617 if (ix86_branch_cost_string)
619 ix86_branch_cost = atoi (ix86_branch_cost_string);
620 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
621 fatal ("-mbranch-cost=%d is not between 0 and 5",
625 /* Keep nonleaf frame pointers. */
626 if (TARGET_OMIT_LEAF_FRAME_POINTER)
627 flag_omit_frame_pointer = 1;
629 /* If we're doing fast math, we don't care about comparison order
630 wrt NaNs. This lets us use a shorter comparison sequence. */
632 target_flags &= ~MASK_IEEE_FP;
634 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
637 target_flags |= MASK_MMX;
640 /* A C statement (sans semicolon) to choose the order in which to
641 allocate hard registers for pseudo-registers local to a basic
644 Store the desired register order in the array `reg_alloc_order'.
645 Element 0 should be the register to allocate first; element 1, the
646 next register; and so on.
648 The macro body should not assume anything about the contents of
649 `reg_alloc_order' before execution of the macro.
651 On most machines, it is not necessary to define this macro. */
654 order_regs_for_local_alloc ()
658 /* User specified the register allocation order. */
660 if (ix86_reg_alloc_order)
662 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
668 case 'a': regno = 0; break;
669 case 'd': regno = 1; break;
670 case 'c': regno = 2; break;
671 case 'b': regno = 3; break;
672 case 'S': regno = 4; break;
673 case 'D': regno = 5; break;
674 case 'B': regno = 6; break;
677 reg_alloc_order[order++] = regno;
680 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
682 if (! regs_allocated[i])
683 reg_alloc_order[order++] = i;
687 /* If user did not specify a register allocation order, use natural order. */
690 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
691 reg_alloc_order[i] = i;
696 optimization_options (level, size)
698 int size ATTRIBUTE_UNUSED;
700 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
701 make the problem with not enough registers even worse. */
702 #ifdef INSN_SCHEDULING
704 flag_schedule_insns = 0;
708 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
709 attribute for DECL. The attributes in ATTRIBUTES have previously been
713 ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
714 tree decl ATTRIBUTE_UNUSED;
715 tree attributes ATTRIBUTE_UNUSED;
716 tree identifier ATTRIBUTE_UNUSED;
717 tree args ATTRIBUTE_UNUSED;
722 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
723 attribute for TYPE. The attributes in ATTRIBUTES have previously been
727 ix86_valid_type_attribute_p (type, attributes, identifier, args)
729 tree attributes ATTRIBUTE_UNUSED;
733 if (TREE_CODE (type) != FUNCTION_TYPE
734 && TREE_CODE (type) != METHOD_TYPE
735 && TREE_CODE (type) != FIELD_DECL
736 && TREE_CODE (type) != TYPE_DECL)
739 /* Stdcall attribute says callee is responsible for popping arguments
740 if they are not variable. */
741 if (is_attribute_p ("stdcall", identifier))
742 return (args == NULL_TREE);
744 /* Cdecl attribute says the callee is a normal C declaration. */
745 if (is_attribute_p ("cdecl", identifier))
746 return (args == NULL_TREE);
748 /* Regparm attribute specifies how many integer arguments are to be
749 passed in registers. */
750 if (is_attribute_p ("regparm", identifier))
754 if (! args || TREE_CODE (args) != TREE_LIST
755 || TREE_CHAIN (args) != NULL_TREE
756 || TREE_VALUE (args) == NULL_TREE)
759 cst = TREE_VALUE (args);
760 if (TREE_CODE (cst) != INTEGER_CST)
763 if (compare_tree_int (cst, REGPARM_MAX) > 0)
772 /* Return 0 if the attributes for two types are incompatible, 1 if they
773 are compatible, and 2 if they are nearly compatible (which causes a
774 warning to be generated). */
777 ix86_comp_type_attributes (type1, type2)
781 /* Check for mismatch of non-default calling convention. */
782 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
784 if (TREE_CODE (type1) != FUNCTION_TYPE)
787 /* Check for mismatched return types (cdecl vs stdcall). */
788 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
789 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
794 /* Value is the number of bytes of arguments automatically
795 popped when returning from a subroutine call.
796 FUNDECL is the declaration node of the function (as a tree),
797 FUNTYPE is the data type of the function (as a tree),
798 or for a library call it is an identifier node for the subroutine name.
799 SIZE is the number of bytes of arguments passed on the stack.
801 On the 80386, the RTD insn may be used to pop them if the number
802 of args is fixed, but if the number is variable then the caller
803 must pop them all. RTD can't be used for library calls now
804 because the library is compiled with the Unix compiler.
805 Use of RTD is a selectable option, since it is incompatible with
806 standard Unix calling sequences. If the option is not selected,
807 the caller must always pop the args.
809 The attribute stdcall is equivalent to RTD on a per module basis. */
812 ix86_return_pops_args (fundecl, funtype, size)
817 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
819 /* Cdecl functions override -mrtd, and never pop the stack. */
820 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
822 /* Stdcall functions will pop the stack if not variable args. */
823 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
827 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
828 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
833 /* Lose any fake structure return argument. */
834 if (aggregate_value_p (TREE_TYPE (funtype)))
835 return GET_MODE_SIZE (Pmode);
840 /* Argument support functions. */
842 /* Initialize a variable CUM of type CUMULATIVE_ARGS
843 for a call to a function whose data type is FNTYPE.
844 For a library call, FNTYPE is 0. */
847 init_cumulative_args (cum, fntype, libname)
848 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
849 tree fntype; /* tree ptr for function decl */
850 rtx libname; /* SYMBOL_REF of library name or 0 */
852 static CUMULATIVE_ARGS zero_cum;
853 tree param, next_param;
855 if (TARGET_DEBUG_ARG)
857 fprintf (stderr, "\ninit_cumulative_args (");
859 fprintf (stderr, "fntype code = %s, ret code = %s",
860 tree_code_name[(int) TREE_CODE (fntype)],
861 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
863 fprintf (stderr, "no fntype");
866 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
871 /* Set up the number of registers to use for passing arguments. */
872 cum->nregs = ix86_regparm;
875 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
878 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
881 /* Determine if this function has variable arguments. This is
882 indicated by the last argument being 'void_type_mode' if there
883 are no variable arguments. If there are variable arguments, then
884 we won't pass anything in registers */
888 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
889 param != 0; param = next_param)
891 next_param = TREE_CHAIN (param);
892 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
897 if (TARGET_DEBUG_ARG)
898 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
903 /* Update the data in CUM to advance over an argument
904 of mode MODE and data type TYPE.
905 (TYPE is null for libcalls where that information may not be available.) */
908 function_arg_advance (cum, mode, type, named)
909 CUMULATIVE_ARGS *cum; /* current arg information */
910 enum machine_mode mode; /* current arg mode */
911 tree type; /* type of the argument or 0 if lib support */
912 int named; /* whether or not the argument was named */
915 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
916 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
918 if (TARGET_DEBUG_ARG)
920 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
921 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
936 /* Define where to put the arguments to a function.
937 Value is zero to push the argument on the stack,
938 or a hard register in which to store the argument.
940 MODE is the argument's machine mode.
941 TYPE is the data type of the argument (as a tree).
942 This is null for libcalls where that information may
944 CUM is a variable of type CUMULATIVE_ARGS which gives info about
945 the preceding args and about the function being called.
946 NAMED is nonzero if this argument is a named parameter
947 (otherwise it is an extra parameter matching an ellipsis). */
950 function_arg (cum, mode, type, named)
951 CUMULATIVE_ARGS *cum; /* current arg information */
952 enum machine_mode mode; /* current arg mode */
953 tree type; /* type of the argument or 0 if lib support */
954 int named; /* != 0 for normal args, == 0 for ... args */
958 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
959 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
963 /* For now, pass fp/complex values on the stack. */
972 if (words <= cum->nregs)
973 ret = gen_rtx_REG (mode, cum->regno);
977 if (TARGET_DEBUG_ARG)
980 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
981 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
984 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
986 fprintf (stderr, ", stack");
988 fprintf (stderr, " )\n");
995 /* Return nonzero if OP is (const_int 1), else return zero. */
998 const_int_1_operand (op, mode)
1000 enum machine_mode mode ATTRIBUTE_UNUSED;
1002 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1005 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
1006 reference and a constant. */
1009 symbolic_operand (op, mode)
1011 enum machine_mode mode ATTRIBUTE_UNUSED;
1013 switch (GET_CODE (op))
1021 if (GET_CODE (op) == SYMBOL_REF
1022 || GET_CODE (op) == LABEL_REF
1023 || (GET_CODE (op) == UNSPEC
1024 && XINT (op, 1) >= 6
1025 && XINT (op, 1) <= 7))
1027 if (GET_CODE (op) != PLUS
1028 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1032 if (GET_CODE (op) == SYMBOL_REF
1033 || GET_CODE (op) == LABEL_REF)
1035 /* Only @GOTOFF gets offsets. */
1036 if (GET_CODE (op) != UNSPEC
1037 || XINT (op, 1) != 7)
1040 op = XVECEXP (op, 0, 0);
1041 if (GET_CODE (op) == SYMBOL_REF
1042 || GET_CODE (op) == LABEL_REF)
1051 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1054 pic_symbolic_operand (op, mode)
1056 enum machine_mode mode ATTRIBUTE_UNUSED;
1058 if (GET_CODE (op) == CONST)
1061 if (GET_CODE (op) == UNSPEC)
1063 if (GET_CODE (op) != PLUS
1064 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1067 if (GET_CODE (op) == UNSPEC)
1073 /* Test for a valid operand for a call instruction. Don't allow the
1074 arg pointer register or virtual regs since they may decay into
1075 reg + const, which the patterns can't handle. */
1078 call_insn_operand (op, mode)
1080 enum machine_mode mode ATTRIBUTE_UNUSED;
1082 if (GET_CODE (op) != MEM)
1086 /* Disallow indirect through a virtual register. This leads to
1087 compiler aborts when trying to eliminate them. */
1088 if (GET_CODE (op) == REG
1089 && (op == arg_pointer_rtx
1090 || op == frame_pointer_rtx
1091 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1092 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1095 /* Disallow `call 1234'. Due to varying assembler lameness this
1096 gets either rejected or translated to `call .+1234'. */
1097 if (GET_CODE (op) == CONST_INT)
1100 /* Explicitly allow SYMBOL_REF even if pic. */
1101 if (GET_CODE (op) == SYMBOL_REF)
1104 /* Half-pic doesn't allow anything but registers and constants.
1105 We've just taken care of the later. */
1107 return register_operand (op, Pmode);
1109 /* Otherwise we can allow any general_operand in the address. */
1110 return general_operand (op, Pmode);
1114 constant_call_address_operand (op, mode)
1116 enum machine_mode mode ATTRIBUTE_UNUSED;
1118 return (GET_CODE (op) == MEM
1119 && CONSTANT_ADDRESS_P (XEXP (op, 0))
1120 && GET_CODE (XEXP (op, 0)) != CONST_INT);
1123 /* Match exactly zero and one. */
1126 const0_operand (op, mode)
1128 enum machine_mode mode;
1130 return op == CONST0_RTX (mode);
1134 const1_operand (op, mode)
1136 enum machine_mode mode ATTRIBUTE_UNUSED;
1138 return op == const1_rtx;
1141 /* Match 2, 4, or 8. Used for leal multiplicands. */
1144 const248_operand (op, mode)
1146 enum machine_mode mode ATTRIBUTE_UNUSED;
1148 return (GET_CODE (op) == CONST_INT
1149 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1152 /* True if this is a constant appropriate for an increment or decremenmt. */
1155 incdec_operand (op, mode)
1157 enum machine_mode mode;
1159 if (op == const1_rtx || op == constm1_rtx)
1161 if (GET_CODE (op) != CONST_INT)
1163 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1165 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1167 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1172 /* Return false if this is the stack pointer, or any other fake
1173 register eliminable to the stack pointer. Otherwise, this is
1176 This is used to prevent esp from being used as an index reg.
1177 Which would only happen in pathological cases. */
1180 reg_no_sp_operand (op, mode)
1182 enum machine_mode mode;
1185 if (GET_CODE (t) == SUBREG)
1187 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
1190 return register_operand (op, mode);
1193 /* Return false if this is any eliminable register. Otherwise
1197 general_no_elim_operand (op, mode)
1199 enum machine_mode mode;
1202 if (GET_CODE (t) == SUBREG)
1204 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1205 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1206 || t == virtual_stack_dynamic_rtx)
1209 return general_operand (op, mode);
1212 /* Return false if this is any eliminable register. Otherwise
1213 register_operand or const_int. */
1216 nonmemory_no_elim_operand (op, mode)
1218 enum machine_mode mode;
1221 if (GET_CODE (t) == SUBREG)
1223 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1224 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1225 || t == virtual_stack_dynamic_rtx)
1228 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1231 /* Return true if op is a Q_REGS class register. */
1234 q_regs_operand (op, mode)
1236 enum machine_mode mode;
1238 if (mode != VOIDmode && GET_MODE (op) != mode)
1240 if (GET_CODE (op) == SUBREG)
1241 op = SUBREG_REG (op);
1242 return QI_REG_P (op);
1245 /* Return true if op is a NON_Q_REGS class register. */
1248 non_q_regs_operand (op, mode)
1250 enum machine_mode mode;
1252 if (mode != VOIDmode && GET_MODE (op) != mode)
1254 if (GET_CODE (op) == SUBREG)
1255 op = SUBREG_REG (op);
1256 return NON_QI_REG_P (op);
1259 /* Return 1 if OP is a comparison operator that can use the condition code
1260 generated by a logical operation, which characteristicly does not set
1261 overflow or carry. To be used with CCNOmode. */
1264 no_comparison_operator (op, mode)
1266 enum machine_mode mode;
1268 if (mode != VOIDmode && GET_MODE (op) != mode)
1271 switch (GET_CODE (op))
1275 case LEU: case LTU: case GEU: case GTU:
1283 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1286 fcmov_comparison_operator (op, mode)
1288 enum machine_mode mode;
1290 if (mode != VOIDmode && GET_MODE (op) != mode)
1293 switch (GET_CODE (op))
1296 case LEU: case LTU: case GEU: case GTU:
1297 case UNORDERED: case ORDERED:
1305 /* Return 1 if OP is any normal comparison operator plus {UN}ORDERED. */
1308 uno_comparison_operator (op, mode)
1310 enum machine_mode mode;
1312 if (mode != VOIDmode && GET_MODE (op) != mode)
1315 switch (GET_CODE (op))
1318 case LE: case LT: case GE: case GT:
1319 case LEU: case LTU: case GEU: case GTU:
1320 case UNORDERED: case ORDERED:
1328 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1331 promotable_binary_operator (op, mode)
1333 enum machine_mode mode ATTRIBUTE_UNUSED;
1335 switch (GET_CODE (op))
1338 /* Modern CPUs have same latency for HImode and SImode multiply,
1339 but 386 and 486 do HImode multiply faster. */
1340 return ix86_cpu > PROCESSOR_I486;
1352 /* Nearly general operand, but accept any const_double, since we wish
1353 to be able to drop them into memory rather than have them get pulled
1357 cmp_fp_expander_operand (op, mode)
1359 enum machine_mode mode;
1361 if (mode != VOIDmode && mode != GET_MODE (op))
1363 if (GET_CODE (op) == CONST_DOUBLE)
1365 return general_operand (op, mode);
1368 /* Match an SI or HImode register for a zero_extract. */
1371 ext_register_operand (op, mode)
1373 enum machine_mode mode ATTRIBUTE_UNUSED;
1375 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1377 return register_operand (op, VOIDmode);
1380 /* Return 1 if this is a valid binary floating-point operation.
1381 OP is the expression matched, and MODE is its mode. */
1384 binary_fp_operator (op, mode)
1386 enum machine_mode mode;
1388 if (mode != VOIDmode && mode != GET_MODE (op))
1391 switch (GET_CODE (op))
1397 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1405 mult_operator(op, mode)
1407 enum machine_mode mode ATTRIBUTE_UNUSED;
1409 return GET_CODE (op) == MULT;
1413 div_operator(op, mode)
1415 enum machine_mode mode ATTRIBUTE_UNUSED;
1417 return GET_CODE (op) == DIV;
1421 arith_or_logical_operator (op, mode)
1423 enum machine_mode mode;
1425 return ((mode == VOIDmode || GET_MODE (op) == mode)
1426 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1427 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1430 /* Returns 1 if OP is memory operand with a displacement. */
1433 memory_displacement_operand (op, mode)
1435 enum machine_mode mode;
1437 struct ix86_address parts;
1439 if (! memory_operand (op, mode))
1442 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1445 return parts.disp != NULL_RTX;
1448 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1449 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1451 ??? It seems likely that this will only work because cmpsi is an
1452 expander, and no actual insns use this. */
1455 cmpsi_operand (op, mode)
1457 enum machine_mode mode;
1459 if (general_operand (op, mode))
1462 if (GET_CODE (op) == AND
1463 && GET_MODE (op) == SImode
1464 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1465 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1466 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1467 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1468 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1469 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1475 /* Returns 1 if OP is memory operand that can not be represented by the
1479 long_memory_operand (op, mode)
1481 enum machine_mode mode;
1483 if (! memory_operand (op, mode))
1486 return memory_address_length (op) != 0;
1489 /* Return nonzero if the rtx is known aligned. */
1492 aligned_operand (op, mode)
1494 enum machine_mode mode;
1496 struct ix86_address parts;
1498 if (!general_operand (op, mode))
1501 /* Registers and immediate operands are always "aligned". */
1502 if (GET_CODE (op) != MEM)
1505 /* Don't even try to do any aligned optimizations with volatiles. */
1506 if (MEM_VOLATILE_P (op))
1511 /* Pushes and pops are only valid on the stack pointer. */
1512 if (GET_CODE (op) == PRE_DEC
1513 || GET_CODE (op) == POST_INC)
1516 /* Decode the address. */
1517 if (! ix86_decompose_address (op, &parts))
1520 /* Look for some component that isn't known to be aligned. */
1524 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
1529 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
1534 if (GET_CODE (parts.disp) != CONST_INT
1535 || (INTVAL (parts.disp) & 3) != 0)
1539 /* Didn't find one -- this must be an aligned address. */
1543 /* Return true if the constant is something that can be loaded with
1544 a special instruction. Only handle 0.0 and 1.0; others are less
1548 standard_80387_constant_p (x)
1551 if (GET_CODE (x) != CONST_DOUBLE)
1554 #if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1560 if (setjmp (handler))
1563 set_float_handler (handler);
1564 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1565 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1566 is1 = REAL_VALUES_EQUAL (d, dconst1);
1567 set_float_handler (NULL_PTR);
1575 /* Note that on the 80387, other constants, such as pi,
1576 are much slower to load as standard constants
1577 than to load from doubles in memory! */
1578 /* ??? Not true on K6: all constants are equal cost. */
1585 /* Returns 1 if OP contains a symbol reference */
1588 symbolic_reference_mentioned_p (op)
1591 register const char *fmt;
1594 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1597 fmt = GET_RTX_FORMAT (GET_CODE (op));
1598 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1604 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1605 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1609 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1616 /* Return 1 if it is appropriate to emit `ret' instructions in the
1617 body of a function. Do this only if the epilogue is simple, needing a
1618 couple of insns. Prior to reloading, we can't tell how many registers
1619 must be saved, so return 0 then. Return 0 if there is no frame
1620 marker to de-allocate.
1622 If NON_SAVING_SETJMP is defined and true, then it is not possible
1623 for the epilogue to be simple, so return 0. This is a special case
1624 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1625 until final, but jump_optimize may need to know sooner if a
1629 ix86_can_use_return_insn_p ()
1631 HOST_WIDE_INT tsize;
1634 #ifdef NON_SAVING_SETJMP
1635 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1638 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1639 if (profile_block_flag == 2)
1643 if (! reload_completed || frame_pointer_needed)
1646 /* Don't allow more than 32 pop, since that's all we can do
1647 with one instruction. */
1648 if (current_function_pops_args
1649 && current_function_args_size >= 32768)
1652 tsize = ix86_compute_frame_size (get_frame_size (), &nregs, NULL, NULL);
1653 return tsize == 0 && nregs == 0;
1656 static char *pic_label_name;
1657 static int pic_label_output;
1658 static char *global_offset_table_name;
1660 /* This function generates code for -fpic that loads %ebx with
1661 the return address of the caller and then returns. */
1664 asm_output_function_prefix (file, name)
1666 const char *name ATTRIBUTE_UNUSED;
1669 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1670 || current_function_uses_const_pool);
1671 xops[0] = pic_offset_table_rtx;
1672 xops[1] = stack_pointer_rtx;
1674 /* Deep branch prediction favors having a return for every call. */
1675 if (pic_reg_used && TARGET_DEEP_BRANCH_PREDICTION)
1677 if (!pic_label_output)
1679 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1680 internal (non-global) label that's being emitted, it didn't make
1681 sense to have .type information for local labels. This caused
1682 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1683 me debug info for a label that you're declaring non-global?) this
1684 was changed to call ASM_OUTPUT_LABEL() instead. */
1686 ASM_OUTPUT_LABEL (file, pic_label_name);
1688 xops[1] = gen_rtx_MEM (SImode, xops[1]);
1689 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1690 output_asm_insn ("ret", xops);
1692 pic_label_output = 1;
1698 load_pic_register ()
1702 if (global_offset_table_name == NULL)
1704 global_offset_table_name =
1705 ggc_alloc_string ("_GLOBAL_OFFSET_TABLE_", 21);
1706 ggc_add_string_root (&global_offset_table_name, 1);
1708 gotsym = gen_rtx_SYMBOL_REF (Pmode, global_offset_table_name);
1710 if (TARGET_DEEP_BRANCH_PREDICTION)
1712 if (pic_label_name == NULL)
1714 pic_label_name = ggc_alloc_string (NULL, 32);
1715 ggc_add_string_root (&pic_label_name, 1);
1716 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
1718 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
1722 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
1725 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
1727 if (! TARGET_DEEP_BRANCH_PREDICTION)
1728 emit_insn (gen_popsi1 (pic_offset_table_rtx));
1730 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
1733 /* Generate an SImode "push" pattern for input ARG. */
1739 return gen_rtx_SET (VOIDmode,
1740 gen_rtx_MEM (SImode,
1741 gen_rtx_PRE_DEC (SImode,
1742 stack_pointer_rtx)),
1746 /* Return number of registers to be saved on the stack. */
1752 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1753 || current_function_uses_const_pool);
1754 int limit = (frame_pointer_needed
1755 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1758 for (regno = limit - 1; regno >= 0; regno--)
1759 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1760 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1767 /* Return the offset between two registers, one to be eliminated, and the other
1768 its replacement, at the start of a routine. */
1771 ix86_initial_elimination_offset (from, to)
1778 /* Stack grows downward:
1784 saved frame pointer if frame_pointer_needed
1785 <- HARD_FRAME_POINTER
1795 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
1796 /* Skip saved PC and previous frame pointer.
1797 Executed only when frame_pointer_needed. */
1799 else if (from == FRAME_POINTER_REGNUM
1800 && to == HARD_FRAME_POINTER_REGNUM)
1802 ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *)0);
1803 padding1 += nregs * UNITS_PER_WORD;
1808 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1809 int frame_size = frame_pointer_needed ? 8 : 4;
1810 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (),
1811 &nregs, &padding1, (int *)0);
1814 if (to != STACK_POINTER_REGNUM)
1816 else if (from == ARG_POINTER_REGNUM)
1817 return tsize + nregs * UNITS_PER_WORD + frame_size;
1818 else if (from != FRAME_POINTER_REGNUM)
1821 return tsize - padding1;
1825 /* Compute the size of local storage taking into consideration the
1826 desired stack alignment which is to be maintained. Also determine
1827 the number of registers saved below the local storage.
1829 PADDING1 returns padding before stack frame and PADDING2 returns
1830 padding after stack frame;
1833 static HOST_WIDE_INT
1834 ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2)
1836 int *nregs_on_stack;
1843 HOST_WIDE_INT total_size;
1844 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
1846 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
1848 nregs = ix86_nsaved_regs ();
1851 offset = frame_pointer_needed ? 8 : 4;
1853 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1854 since i386 port is the only using those features that may break easilly. */
1856 if (size && !stack_alignment_needed)
1858 if (!size && stack_alignment_needed != STACK_BOUNDARY / BITS_PER_UNIT)
1860 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
1862 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1864 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1867 if (stack_alignment_needed < 4)
1868 stack_alignment_needed = 4;
1870 offset += nregs * UNITS_PER_WORD;
1872 if (ACCUMULATE_OUTGOING_ARGS)
1873 total_size += current_function_outgoing_args_size;
1875 total_size += offset;
1877 /* Align start of frame for local function. */
1878 padding1 = ((offset + stack_alignment_needed - 1)
1879 & -stack_alignment_needed) - offset;
1880 total_size += padding1;
1882 /* Align stack boundary. */
1883 padding2 = ((total_size + preferred_alignment - 1)
1884 & -preferred_alignment) - total_size;
1886 if (ACCUMULATE_OUTGOING_ARGS)
1887 padding2 += current_function_outgoing_args_size;
1890 *nregs_on_stack = nregs;
1892 *rpadding1 = padding1;
1894 *rpadding2 = padding2;
1896 return size + padding1 + padding2;
1899 /* Emit code to save registers in the prologue. */
1902 ix86_emit_save_regs ()
1907 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1908 || current_function_uses_const_pool);
1909 limit = (frame_pointer_needed
1910 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1912 for (regno = limit - 1; regno >= 0; regno--)
1913 if ((regs_ever_live[regno] && !call_used_regs[regno])
1914 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1916 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1917 RTX_FRAME_RELATED_P (insn) = 1;
1921 /* Expand the prologue into a bunch of separate insns. */
1924 ix86_expand_prologue ()
1926 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *)0, (int *)0,
1929 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1930 || current_function_uses_const_pool);
1932 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1933 slower on all targets. Also sdb doesn't like it. */
1935 if (frame_pointer_needed)
1937 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
1938 RTX_FRAME_RELATED_P (insn) = 1;
1940 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1941 RTX_FRAME_RELATED_P (insn) = 1;
1944 ix86_emit_save_regs ();
1948 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
1950 if (frame_pointer_needed)
1951 insn = emit_insn (gen_pro_epilogue_adjust_stack
1952 (stack_pointer_rtx, stack_pointer_rtx,
1953 GEN_INT (-tsize), hard_frame_pointer_rtx));
1955 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1957 RTX_FRAME_RELATED_P (insn) = 1;
1961 /* ??? Is this only valid for Win32? */
1965 arg0 = gen_rtx_REG (SImode, 0);
1966 emit_move_insn (arg0, GEN_INT (tsize));
1968 sym = gen_rtx_MEM (FUNCTION_MODE,
1969 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
1970 insn = emit_call_insn (gen_call (sym, const0_rtx));
1972 CALL_INSN_FUNCTION_USAGE (insn)
1973 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
1974 CALL_INSN_FUNCTION_USAGE (insn));
1977 #ifdef SUBTARGET_PROLOGUE
1982 load_pic_register ();
1984 /* If we are profiling, make sure no instructions are scheduled before
1985 the call to mcount. However, if -fpic, the above call will have
1987 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
1988 emit_insn (gen_blockage ());
1991 /* Emit code to add TSIZE to esp value. Use POP instruction when
1995 ix86_emit_epilogue_esp_adjustment (tsize)
1998 /* If a frame pointer is present, we must be sure to tie the sp
1999 to the fp so that we don't mis-schedule. */
2000 if (frame_pointer_needed)
2001 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2004 hard_frame_pointer_rtx));
2006 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2010 /* Emit code to restore saved registers using MOV insns. First register
2011 is restored from POINTER + OFFSET. */
2013 ix86_emit_restore_regs_using_mov (pointer, offset)
2018 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2019 || current_function_uses_const_pool);
2020 int limit = (frame_pointer_needed
2021 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
2023 for (regno = 0; regno < limit; regno++)
2024 if ((regs_ever_live[regno] && !call_used_regs[regno])
2025 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2027 emit_move_insn (gen_rtx_REG (SImode, regno),
2028 adj_offsettable_operand (gen_rtx_MEM (SImode,
2035 /* Restore function stack, frame, and registers. */
2038 ix86_expand_epilogue (emit_return)
2044 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2045 || current_function_uses_const_pool);
2046 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
2047 HOST_WIDE_INT offset;
2048 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs,
2049 (int *)0, (int *)0);
2052 /* Calculate start of saved registers relative to ebp. */
2053 offset = -nregs * UNITS_PER_WORD;
2055 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2056 if (profile_block_flag == 2)
2058 FUNCTION_BLOCK_PROFILER_EXIT;
2062 /* If we're only restoring one register and sp is not valid then
2063 using a move instruction to restore the register since it's
2064 less work than reloading sp and popping the register.
2066 The default code result in stack adjustment using add/lea instruction,
2067 while this code results in LEAVE instruction (or discrete equivalent),
2068 so it is profitable in some other cases as well. Especially when there
2069 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2070 and there is exactly one register to pop. This heruistic may need some
2071 tuning in future. */
2072 if ((!sp_valid && nregs <= 1)
2073 || (frame_pointer_needed && !nregs && tsize)
2074 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2077 /* Restore registers. We can use ebp or esp to address the memory
2078 locations. If both are available, default to ebp, since offsets
2079 are known to be small. Only exception is esp pointing directly to the
2080 end of block of saved registers, where we may simplify addressing
2083 if (!frame_pointer_needed || (sp_valid && !tsize))
2084 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, tsize);
2086 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2088 if (!frame_pointer_needed)
2089 ix86_emit_epilogue_esp_adjustment (tsize + nregs * UNITS_PER_WORD);
2090 /* If not an i386, mov & pop is faster than "leave". */
2091 else if (TARGET_USE_LEAVE || optimize_size)
2092 emit_insn (gen_leave ());
2095 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2096 hard_frame_pointer_rtx,
2098 hard_frame_pointer_rtx));
2099 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2104 /* First step is to deallocate the stack frame so that we can
2105 pop the registers. */
2108 if (!frame_pointer_needed)
2110 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2111 hard_frame_pointer_rtx,
2113 hard_frame_pointer_rtx));
2116 ix86_emit_epilogue_esp_adjustment (tsize);
2118 for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
2119 if ((regs_ever_live[regno] && !call_used_regs[regno])
2120 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2121 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
2124 /* Sibcall epilogues don't want a return instruction. */
2128 if (current_function_pops_args && current_function_args_size)
2130 rtx popc = GEN_INT (current_function_pops_args);
2132 /* i386 can only pop 64K bytes. If asked to pop more, pop
2133 return address, do explicit add, and jump indirectly to the
2136 if (current_function_pops_args >= 65536)
2138 rtx ecx = gen_rtx_REG (SImode, 2);
2140 emit_insn (gen_popsi1 (ecx));
2141 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2142 emit_jump_insn (gen_return_indirect_internal (ecx));
2145 emit_jump_insn (gen_return_pop_internal (popc));
2148 emit_jump_insn (gen_return_internal ());
2151 /* Extract the parts of an RTL expression that is a valid memory address
2152 for an instruction. Return false if the structure of the address is
2156 ix86_decompose_address (addr, out)
2158 struct ix86_address *out;
2160 rtx base = NULL_RTX;
2161 rtx index = NULL_RTX;
2162 rtx disp = NULL_RTX;
2163 HOST_WIDE_INT scale = 1;
2164 rtx scale_rtx = NULL_RTX;
2166 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2168 else if (GET_CODE (addr) == PLUS)
2170 rtx op0 = XEXP (addr, 0);
2171 rtx op1 = XEXP (addr, 1);
2172 enum rtx_code code0 = GET_CODE (op0);
2173 enum rtx_code code1 = GET_CODE (op1);
2175 if (code0 == REG || code0 == SUBREG)
2177 if (code1 == REG || code1 == SUBREG)
2178 index = op0, base = op1; /* index + base */
2180 base = op0, disp = op1; /* base + displacement */
2182 else if (code0 == MULT)
2184 index = XEXP (op0, 0);
2185 scale_rtx = XEXP (op0, 1);
2186 if (code1 == REG || code1 == SUBREG)
2187 base = op1; /* index*scale + base */
2189 disp = op1; /* index*scale + disp */
2191 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2193 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2194 scale_rtx = XEXP (XEXP (op0, 0), 1);
2195 base = XEXP (op0, 1);
2198 else if (code0 == PLUS)
2200 index = XEXP (op0, 0); /* index + base + disp */
2201 base = XEXP (op0, 1);
2207 else if (GET_CODE (addr) == MULT)
2209 index = XEXP (addr, 0); /* index*scale */
2210 scale_rtx = XEXP (addr, 1);
2212 else if (GET_CODE (addr) == ASHIFT)
2216 /* We're called for lea too, which implements ashift on occasion. */
2217 index = XEXP (addr, 0);
2218 tmp = XEXP (addr, 1);
2219 if (GET_CODE (tmp) != CONST_INT)
2221 scale = INTVAL (tmp);
2222 if ((unsigned HOST_WIDE_INT) scale > 3)
2227 disp = addr; /* displacement */
2229 /* Extract the integral value of scale. */
2232 if (GET_CODE (scale_rtx) != CONST_INT)
2234 scale = INTVAL (scale_rtx);
2237 /* Allow arg pointer and stack pointer as index if there is not scaling */
2238 if (base && index && scale == 1
2239 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2240 || index == stack_pointer_rtx))
2247 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2248 if ((base == hard_frame_pointer_rtx
2249 || base == frame_pointer_rtx
2250 || base == arg_pointer_rtx) && !disp)
2253 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2254 Avoid this by transforming to [%esi+0]. */
2255 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2256 && base && !index && !disp
2258 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2261 /* Special case: encode reg+reg instead of reg*2. */
2262 if (!base && index && scale && scale == 2)
2263 base = index, scale = 1;
2265 /* Special case: scaling cannot be encoded without base or displacement. */
2266 if (!base && !disp && index && scale != 1)
2277 /* Return cost of the memory address x.
2278 For i386, it is better to use a complex address than let gcc copy
2279 the address into a reg and make a new pseudo. But not if the address
2280 requires to two regs - that would mean more pseudos with longer
2283 ix86_address_cost (x)
2286 struct ix86_address parts;
2289 if (!ix86_decompose_address (x, &parts))
2292 /* More complex memory references are better. */
2293 if (parts.disp && parts.disp != const0_rtx)
2296 /* Attempt to minimize number of registers in the address. */
2298 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2300 && (!REG_P (parts.index)
2301 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2305 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2307 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2308 && parts.base != parts.index)
2311 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2312 since it's predecode logic can't detect the length of instructions
2313 and it degenerates to vector decoded. Increase cost of such
2314 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2315 to split such addresses or even refuse such addresses at all.
2317 Following addressing modes are affected:
2322 The first and last case may be avoidable by explicitly coding the zero in
2323 memory address, but I don't have AMD-K6 machine handy to check this
2327 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2328 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2329 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2335 /* Determine if a given CONST RTX is a valid memory displacement
2339 legitimate_pic_address_disp_p (disp)
2342 if (GET_CODE (disp) != CONST)
2344 disp = XEXP (disp, 0);
2346 if (GET_CODE (disp) == PLUS)
2348 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2350 disp = XEXP (disp, 0);
2353 if (GET_CODE (disp) != UNSPEC
2354 || XVECLEN (disp, 0) != 1)
2357 /* Must be @GOT or @GOTOFF. */
2358 if (XINT (disp, 1) != 6
2359 && XINT (disp, 1) != 7)
2362 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2363 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2369 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2370 memory address for an instruction. The MODE argument is the machine mode
2371 for the MEM expression that wants to use this address.
2373 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2374 convert common non-canonical forms to canonical form so that they will
2378 legitimate_address_p (mode, addr, strict)
2379 enum machine_mode mode;
2383 struct ix86_address parts;
2384 rtx base, index, disp;
2385 HOST_WIDE_INT scale;
2386 const char *reason = NULL;
2387 rtx reason_rtx = NULL_RTX;
2389 if (TARGET_DEBUG_ADDR)
2392 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2393 GET_MODE_NAME (mode), strict);
2397 if (! ix86_decompose_address (addr, &parts))
2399 reason = "decomposition failed";
2404 index = parts.index;
2406 scale = parts.scale;
2408 /* Validate base register.
2410 Don't allow SUBREG's here, it can lead to spill failures when the base
2411 is one word out of a two word structure, which is represented internally
2418 if (GET_CODE (base) != REG)
2420 reason = "base is not a register";
2424 if (GET_MODE (base) != Pmode)
2426 reason = "base is not in Pmode";
2430 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2431 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
2433 reason = "base is not valid";
2438 /* Validate index register.
2440 Don't allow SUBREG's here, it can lead to spill failures when the index
2441 is one word out of a two word structure, which is represented internally
2448 if (GET_CODE (index) != REG)
2450 reason = "index is not a register";
2454 if (GET_MODE (index) != Pmode)
2456 reason = "index is not in Pmode";
2460 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2461 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
2463 reason = "index is not valid";
2468 /* Validate scale factor. */
2471 reason_rtx = GEN_INT (scale);
2474 reason = "scale without index";
2478 if (scale != 2 && scale != 4 && scale != 8)
2480 reason = "scale is not a valid multiplier";
2485 /* Validate displacement. */
2490 if (!CONSTANT_ADDRESS_P (disp))
2492 reason = "displacement is not constant";
2496 if (GET_CODE (disp) == CONST_DOUBLE)
2498 reason = "displacement is a const_double";
2502 if (flag_pic && SYMBOLIC_CONST (disp))
2504 if (! legitimate_pic_address_disp_p (disp))
2506 reason = "displacement is an invalid pic construct";
2510 /* This code used to verify that a symbolic pic displacement
2511 includes the pic_offset_table_rtx register.
2513 While this is good idea, unfortunately these constructs may
2514 be created by "adds using lea" optimization for incorrect
2523 This code is nonsensical, but results in addressing
2524 GOT table with pic_offset_table_rtx base. We can't
2525 just refuse it easilly, since it gets matched by
2526 "addsi3" pattern, that later gets split to lea in the
2527 case output register differs from input. While this
2528 can be handled by separate addsi pattern for this case
2529 that never results in lea, this seems to be easier and
2530 correct fix for crash to disable this test. */
2532 else if (HALF_PIC_P ())
2534 if (! HALF_PIC_ADDRESS_P (disp)
2535 || (base != NULL_RTX || index != NULL_RTX))
2537 reason = "displacement is an invalid half-pic reference";
2543 /* Everything looks valid. */
2544 if (TARGET_DEBUG_ADDR)
2545 fprintf (stderr, "Success.\n");
2549 if (TARGET_DEBUG_ADDR)
2551 fprintf (stderr, "Error: %s\n", reason);
2552 debug_rtx (reason_rtx);
2557 /* Return an unique alias set for the GOT. */
2559 static HOST_WIDE_INT
2560 ix86_GOT_alias_set ()
2562 static HOST_WIDE_INT set = -1;
2564 set = new_alias_set ();
2568 /* Return a legitimate reference for ORIG (an address) using the
2569 register REG. If REG is 0, a new pseudo is generated.
2571 There are two types of references that must be handled:
2573 1. Global data references must load the address from the GOT, via
2574 the PIC reg. An insn is emitted to do this load, and the reg is
2577 2. Static data references, constant pool addresses, and code labels
2578 compute the address as an offset from the GOT, whose base is in
2579 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2580 differentiate them from global data objects. The returned
2581 address is the PIC reg + an unspec constant.
2583 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2584 reg also appears in the address. */
2587 legitimize_pic_address (orig, reg)
2595 if (GET_CODE (addr) == LABEL_REF
2596 || (GET_CODE (addr) == SYMBOL_REF
2597 && (CONSTANT_POOL_ADDRESS_P (addr)
2598 || SYMBOL_REF_FLAG (addr))))
2600 /* This symbol may be referenced via a displacement from the PIC
2601 base address (@GOTOFF). */
2603 current_function_uses_pic_offset_table = 1;
2604 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
2605 new = gen_rtx_CONST (Pmode, new);
2606 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2610 emit_move_insn (reg, new);
2614 else if (GET_CODE (addr) == SYMBOL_REF)
2616 /* This symbol must be referenced via a load from the
2617 Global Offset Table (@GOT). */
2619 current_function_uses_pic_offset_table = 1;
2620 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
2621 new = gen_rtx_CONST (Pmode, new);
2622 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2623 new = gen_rtx_MEM (Pmode, new);
2624 RTX_UNCHANGING_P (new) = 1;
2625 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
2628 reg = gen_reg_rtx (Pmode);
2629 emit_move_insn (reg, new);
2634 if (GET_CODE (addr) == CONST)
2636 addr = XEXP (addr, 0);
2637 if (GET_CODE (addr) == UNSPEC)
2639 /* Check that the unspec is one of the ones we generate? */
2641 else if (GET_CODE (addr) != PLUS)
2644 if (GET_CODE (addr) == PLUS)
2646 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
2648 /* Check first to see if this is a constant offset from a @GOTOFF
2649 symbol reference. */
2650 if ((GET_CODE (op0) == LABEL_REF
2651 || (GET_CODE (op0) == SYMBOL_REF
2652 && (CONSTANT_POOL_ADDRESS_P (op0)
2653 || SYMBOL_REF_FLAG (op0))))
2654 && GET_CODE (op1) == CONST_INT)
2656 current_function_uses_pic_offset_table = 1;
2657 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
2658 new = gen_rtx_PLUS (Pmode, new, op1);
2659 new = gen_rtx_CONST (Pmode, new);
2660 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2664 emit_move_insn (reg, new);
2670 base = legitimize_pic_address (XEXP (addr, 0), reg);
2671 new = legitimize_pic_address (XEXP (addr, 1),
2672 base == reg ? NULL_RTX : reg);
2674 if (GET_CODE (new) == CONST_INT)
2675 new = plus_constant (base, INTVAL (new));
2678 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2680 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2681 new = XEXP (new, 1);
2683 new = gen_rtx_PLUS (Pmode, base, new);
2691 /* Try machine-dependent ways of modifying an illegitimate address
2692 to be legitimate. If we find one, return the new, valid address.
2693 This macro is used in only one place: `memory_address' in explow.c.
2695 OLDX is the address as it was before break_out_memory_refs was called.
2696 In some cases it is useful to look at this to decide what needs to be done.
2698 MODE and WIN are passed so that this macro can use
2699 GO_IF_LEGITIMATE_ADDRESS.
2701 It is always safe for this macro to do nothing. It exists to recognize
2702 opportunities to optimize the output.
2704 For the 80386, we handle X+REG by loading X into a register R and
2705 using R+REG. R will go in a general reg and indexing will be used.
2706 However, if REG is a broken-out memory address or multiplication,
2707 nothing needs to be done because REG can certainly go in a general reg.
2709 When -fpic is used, special handling is needed for symbolic references.
2710 See comments by legitimize_pic_address in i386.c for details. */
2713 legitimize_address (x, oldx, mode)
2715 register rtx oldx ATTRIBUTE_UNUSED;
2716 enum machine_mode mode;
2721 if (TARGET_DEBUG_ADDR)
2723 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2724 GET_MODE_NAME (mode));
2728 if (flag_pic && SYMBOLIC_CONST (x))
2729 return legitimize_pic_address (x, 0);
2731 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2732 if (GET_CODE (x) == ASHIFT
2733 && GET_CODE (XEXP (x, 1)) == CONST_INT
2734 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2737 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2738 GEN_INT (1 << log));
2741 if (GET_CODE (x) == PLUS)
2743 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2745 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2746 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2747 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2750 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2751 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2752 GEN_INT (1 << log));
2755 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2756 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2757 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2760 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2761 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2762 GEN_INT (1 << log));
2765 /* Put multiply first if it isn't already. */
2766 if (GET_CODE (XEXP (x, 1)) == MULT)
2768 rtx tmp = XEXP (x, 0);
2769 XEXP (x, 0) = XEXP (x, 1);
2774 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2775 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2776 created by virtual register instantiation, register elimination, and
2777 similar optimizations. */
2778 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2781 x = gen_rtx_PLUS (Pmode,
2782 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2783 XEXP (XEXP (x, 1), 0)),
2784 XEXP (XEXP (x, 1), 1));
2788 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2789 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2790 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2791 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2792 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2793 && CONSTANT_P (XEXP (x, 1)))
2796 rtx other = NULL_RTX;
2798 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2800 constant = XEXP (x, 1);
2801 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2803 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2805 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2806 other = XEXP (x, 1);
2814 x = gen_rtx_PLUS (Pmode,
2815 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2816 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2817 plus_constant (other, INTVAL (constant)));
2821 if (changed && legitimate_address_p (mode, x, FALSE))
2824 if (GET_CODE (XEXP (x, 0)) == MULT)
2827 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2830 if (GET_CODE (XEXP (x, 1)) == MULT)
2833 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2837 && GET_CODE (XEXP (x, 1)) == REG
2838 && GET_CODE (XEXP (x, 0)) == REG)
2841 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2844 x = legitimize_pic_address (x, 0);
2847 if (changed && legitimate_address_p (mode, x, FALSE))
2850 if (GET_CODE (XEXP (x, 0)) == REG)
2852 register rtx temp = gen_reg_rtx (Pmode);
2853 register rtx val = force_operand (XEXP (x, 1), temp);
2855 emit_move_insn (temp, val);
2861 else if (GET_CODE (XEXP (x, 1)) == REG)
2863 register rtx temp = gen_reg_rtx (Pmode);
2864 register rtx val = force_operand (XEXP (x, 0), temp);
2866 emit_move_insn (temp, val);
2876 /* Print an integer constant expression in assembler syntax. Addition
2877 and subtraction are the only arithmetic that may appear in these
2878 expressions. FILE is the stdio stream to write to, X is the rtx, and
2879 CODE is the operand print code from the output string. */
2882 output_pic_addr_const (file, x, code)
2889 switch (GET_CODE (x))
2899 assemble_name (file, XSTR (x, 0));
2900 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2901 fputs ("@PLT", file);
2908 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2909 assemble_name (asm_out_file, buf);
2913 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2917 /* This used to output parentheses around the expression,
2918 but that does not work on the 386 (either ATT or BSD assembler). */
2919 output_pic_addr_const (file, XEXP (x, 0), code);
2923 if (GET_MODE (x) == VOIDmode)
2925 /* We can use %d if the number is <32 bits and positive. */
2926 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
2927 fprintf (file, "0x%lx%08lx",
2928 (unsigned long) CONST_DOUBLE_HIGH (x),
2929 (unsigned long) CONST_DOUBLE_LOW (x));
2931 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2934 /* We can't handle floating point constants;
2935 PRINT_OPERAND must handle them. */
2936 output_operand_lossage ("floating constant misused");
2940 /* Some assemblers need integer constants to appear first. */
2941 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
2943 output_pic_addr_const (file, XEXP (x, 0), code);
2945 output_pic_addr_const (file, XEXP (x, 1), code);
2947 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2949 output_pic_addr_const (file, XEXP (x, 1), code);
2951 output_pic_addr_const (file, XEXP (x, 0), code);
2958 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2959 output_pic_addr_const (file, XEXP (x, 0), code);
2961 output_pic_addr_const (file, XEXP (x, 1), code);
2962 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2966 if (XVECLEN (x, 0) != 1)
2968 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
2969 switch (XINT (x, 1))
2972 fputs ("@GOT", file);
2975 fputs ("@GOTOFF", file);
2978 fputs ("@PLT", file);
2981 output_operand_lossage ("invalid UNSPEC as operand");
2987 output_operand_lossage ("invalid expression as operand");
2991 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
2992 We need to handle our special PIC relocations. */
2995 i386_dwarf_output_addr_const (file, x)
2999 fprintf (file, "\t%s\t", INT_ASM_OP);
3001 output_pic_addr_const (file, x, '\0');
3003 output_addr_const (file, x);
3007 /* In the name of slightly smaller debug output, and to cater to
3008 general assembler losage, recognize PIC+GOTOFF and turn it back
3009 into a direct symbol reference. */
3012 i386_simplify_dwarf_addr (orig_x)
3017 if (GET_CODE (x) != PLUS
3018 || GET_CODE (XEXP (x, 0)) != REG
3019 || GET_CODE (XEXP (x, 1)) != CONST)
3022 x = XEXP (XEXP (x, 1), 0);
3023 if (GET_CODE (x) == UNSPEC
3024 && XINT (x, 1) == 7)
3025 return XVECEXP (x, 0, 0);
3027 if (GET_CODE (x) == PLUS
3028 && GET_CODE (XEXP (x, 0)) == UNSPEC
3029 && GET_CODE (XEXP (x, 1)) == CONST_INT
3030 && XINT (XEXP (x, 0), 1) == 7)
3031 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3037 put_condition_code (code, mode, reverse, fp, file)
3039 enum machine_mode mode;
3046 code = reverse_condition (code);
3057 if (mode == CCNOmode)
3062 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3063 Those same assemblers have the same but opposite losage on cmov. */
3064 suffix = fp ? "nbe" : "a";
3067 if (mode == CCNOmode)
3076 if (mode == CCNOmode)
3083 suffix = fp ? "nb" : "ae";
3086 if (mode == CCNOmode)
3102 fputs (suffix, file);
3106 print_reg (x, code, file)
3111 if (REGNO (x) == ARG_POINTER_REGNUM
3112 || REGNO (x) == FRAME_POINTER_REGNUM
3113 || REGNO (x) == FLAGS_REG
3114 || REGNO (x) == FPSR_REG)
3117 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3122 else if (code == 'b')
3124 else if (code == 'k')
3126 else if (code == 'y')
3128 else if (code == 'h')
3130 else if (code == 'm' || MMX_REG_P (x))
3133 code = GET_MODE_SIZE (GET_MODE (x));
3138 fputs (hi_reg_name[REGNO (x)], file);
3141 if (STACK_TOP_P (x))
3143 fputs ("st(0)", file);
3155 fputs (hi_reg_name[REGNO (x)], file);
3158 fputs (qi_reg_name[REGNO (x)], file);
3161 fputs (qi_high_reg_name[REGNO (x)], file);
3169 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3170 C -- print opcode suffix for set/cmov insn.
3171 c -- like C, but print reversed condition
3172 R -- print the prefix for register names.
3173 z -- print the opcode suffix for the size of the current operand.
3174 * -- print a star (in certain assembler syntax)
3175 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3176 s -- print a shift double count, followed by the assemblers argument
3178 b -- print the QImode name of the register for the indicated operand.
3179 %b0 would print %al if operands[0] is reg 0.
3180 w -- likewise, print the HImode name of the register.
3181 k -- likewise, print the SImode name of the register.
3182 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3183 y -- print "st(0)" instead of "st" as a register.
3184 m -- print "st(n)" as an mmx register. */
3187 print_operand (file, x, code)
3197 if (ASSEMBLER_DIALECT == 0)
3202 if (ASSEMBLER_DIALECT == 0)
3207 if (ASSEMBLER_DIALECT == 0)
3212 if (ASSEMBLER_DIALECT == 0)
3217 if (ASSEMBLER_DIALECT == 0)
3222 if (ASSEMBLER_DIALECT == 0)
3227 if (ASSEMBLER_DIALECT == 0)
3232 /* 387 opcodes don't get size suffixes if the operands are
3235 if (STACK_REG_P (x))
3238 /* Intel syntax has no truck with instruction suffixes. */
3239 if (ASSEMBLER_DIALECT != 0)
3242 /* this is the size of op from size of operand */
3243 switch (GET_MODE_SIZE (GET_MODE (x)))
3246 #ifdef HAVE_GAS_FILDS_FISTS
3252 if (GET_MODE (x) == SFmode)
3266 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
3268 #ifdef GAS_MNEMONICS
3294 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3296 PRINT_OPERAND (file, x, 0);
3302 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
3305 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
3308 /* Like above, but reverse condition */
3310 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3313 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
3319 sprintf (str, "invalid operand code `%c'", code);
3320 output_operand_lossage (str);
3325 if (GET_CODE (x) == REG)
3327 PRINT_REG (x, code, file);
3330 else if (GET_CODE (x) == MEM)
3332 /* No `byte ptr' prefix for call instructions. */
3333 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
3336 switch (GET_MODE_SIZE (GET_MODE (x)))
3338 case 1: size = "BYTE"; break;
3339 case 2: size = "WORD"; break;
3340 case 4: size = "DWORD"; break;
3341 case 8: size = "QWORD"; break;
3342 case 12: size = "XWORD"; break;
3343 case 16: size = "XMMWORD"; break;
3348 fputs (" PTR ", file);
3352 if (flag_pic && CONSTANT_ADDRESS_P (x))
3353 output_pic_addr_const (file, x, code);
3358 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3363 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3364 REAL_VALUE_TO_TARGET_SINGLE (r, l);
3366 if (ASSEMBLER_DIALECT == 0)
3368 fprintf (file, "0x%lx", l);
3371 /* These float cases don't actually occur as immediate operands. */
3372 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3377 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3378 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3379 fprintf (file, "%s", dstr);
3382 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
3387 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3388 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3389 fprintf (file, "%s", dstr);
3395 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
3397 if (ASSEMBLER_DIALECT == 0)
3400 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3401 || GET_CODE (x) == LABEL_REF)
3403 if (ASSEMBLER_DIALECT == 0)
3406 fputs ("OFFSET FLAT:", file);
3409 if (GET_CODE (x) == CONST_INT)
3410 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3412 output_pic_addr_const (file, x, code);
3414 output_addr_const (file, x);
3418 /* Print a memory operand whose address is ADDR. */
3421 print_operand_address (file, addr)
3425 struct ix86_address parts;
3426 rtx base, index, disp;
3429 if (! ix86_decompose_address (addr, &parts))
3433 index = parts.index;
3435 scale = parts.scale;
3437 if (!base && !index)
3439 /* Displacement only requires special attention. */
3441 if (GET_CODE (disp) == CONST_INT)
3443 if (ASSEMBLER_DIALECT != 0)
3444 fputs ("ds:", file);
3445 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
3448 output_pic_addr_const (file, addr, 0);
3450 output_addr_const (file, addr);
3454 if (ASSEMBLER_DIALECT == 0)
3459 output_pic_addr_const (file, disp, 0);
3460 else if (GET_CODE (disp) == LABEL_REF)
3461 output_asm_label (disp);
3463 output_addr_const (file, disp);
3468 PRINT_REG (base, 0, file);
3472 PRINT_REG (index, 0, file);
3474 fprintf (file, ",%d", scale);
3480 rtx offset = NULL_RTX;
3484 /* Pull out the offset of a symbol; print any symbol itself. */
3485 if (GET_CODE (disp) == CONST
3486 && GET_CODE (XEXP (disp, 0)) == PLUS
3487 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3489 offset = XEXP (XEXP (disp, 0), 1);
3490 disp = gen_rtx_CONST (VOIDmode,
3491 XEXP (XEXP (disp, 0), 0));
3495 output_pic_addr_const (file, disp, 0);
3496 else if (GET_CODE (disp) == LABEL_REF)
3497 output_asm_label (disp);
3498 else if (GET_CODE (disp) == CONST_INT)
3501 output_addr_const (file, disp);
3507 PRINT_REG (base, 0, file);
3510 if (INTVAL (offset) >= 0)
3512 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3516 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3523 PRINT_REG (index, 0, file);
3525 fprintf (file, "*%d", scale);
3532 /* Split one or more DImode RTL references into pairs of SImode
3533 references. The RTL can be REG, offsettable MEM, integer constant, or
3534 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3535 split and "num" is its length. lo_half and hi_half are output arrays
3536 that parallel "operands". */
3539 split_di (operands, num, lo_half, hi_half)
3542 rtx lo_half[], hi_half[];
3546 rtx op = operands[num];
3547 if (CONSTANT_P (op))
3548 split_double (op, &lo_half[num], &hi_half[num]);
3549 else if (! reload_completed)
3551 lo_half[num] = gen_lowpart (SImode, op);
3552 hi_half[num] = gen_highpart (SImode, op);
3554 else if (GET_CODE (op) == REG)
3556 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3557 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
3559 else if (offsettable_memref_p (op))
3561 rtx lo_addr = XEXP (op, 0);
3562 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3563 lo_half[num] = change_address (op, SImode, lo_addr);
3564 hi_half[num] = change_address (op, SImode, hi_addr);
3571 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
3572 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3573 is the expression of the binary operation. The output may either be
3574 emitted here, or returned to the caller, like all output_* functions.
3576 There is no guarantee that the operands are the same mode, as they
3577 might be within FLOAT or FLOAT_EXTEND expressions. */
3579 #ifndef SYSV386_COMPAT
3580 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
3581 wants to fix the assemblers because that causes incompatibility
3582 with gcc. No-one wants to fix gcc because that causes
3583 incompatibility with assemblers... You can use the option of
3584 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3585 #define SYSV386_COMPAT 1
3589 output_387_binary_op (insn, operands)
3593 static char buf[30];
3596 #ifdef ENABLE_CHECKING
3597 /* Even if we do not want to check the inputs, this documents input
3598 constraints. Which helps in understanding the following code. */
3599 if (STACK_REG_P (operands[0])
3600 && ((REG_P (operands[1])
3601 && REGNO (operands[0]) == REGNO (operands[1])
3602 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
3603 || (REG_P (operands[2])
3604 && REGNO (operands[0]) == REGNO (operands[2])
3605 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
3606 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
3612 switch (GET_CODE (operands[3]))
3615 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3616 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3623 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3624 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3631 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3632 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3639 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3640 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3652 switch (GET_CODE (operands[3]))
3656 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3658 rtx temp = operands[2];
3659 operands[2] = operands[1];
3663 /* know operands[0] == operands[1]. */
3665 if (GET_CODE (operands[2]) == MEM)
3671 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3673 if (STACK_TOP_P (operands[0]))
3674 /* How is it that we are storing to a dead operand[2]?
3675 Well, presumably operands[1] is dead too. We can't
3676 store the result to st(0) as st(0) gets popped on this
3677 instruction. Instead store to operands[2] (which I
3678 think has to be st(1)). st(1) will be popped later.
3679 gcc <= 2.8.1 didn't have this check and generated
3680 assembly code that the Unixware assembler rejected. */
3681 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3683 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3687 if (STACK_TOP_P (operands[0]))
3688 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3690 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3695 if (GET_CODE (operands[1]) == MEM)
3701 if (GET_CODE (operands[2]) == MEM)
3707 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3710 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3711 derived assemblers, confusingly reverse the direction of
3712 the operation for fsub{r} and fdiv{r} when the
3713 destination register is not st(0). The Intel assembler
3714 doesn't have this brain damage. Read !SYSV386_COMPAT to
3715 figure out what the hardware really does. */
3716 if (STACK_TOP_P (operands[0]))
3717 p = "{p\t%0, %2|rp\t%2, %0}";
3719 p = "{rp\t%2, %0|p\t%0, %2}";
3721 if (STACK_TOP_P (operands[0]))
3722 /* As above for fmul/fadd, we can't store to st(0). */
3723 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3725 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3730 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
3733 if (STACK_TOP_P (operands[0]))
3734 p = "{rp\t%0, %1|p\t%1, %0}";
3736 p = "{p\t%1, %0|rp\t%0, %1}";
3738 if (STACK_TOP_P (operands[0]))
3739 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
3741 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
3746 if (STACK_TOP_P (operands[0]))
3748 if (STACK_TOP_P (operands[1]))
3749 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3751 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
3754 else if (STACK_TOP_P (operands[1]))
3757 p = "{\t%1, %0|r\t%0, %1}";
3759 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
3765 p = "{r\t%2, %0|\t%0, %2}";
3767 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3780 /* Output code for INSN to convert a float to a signed int. OPERANDS
3781 are the insn operands. The output may be [HSD]Imode and the input
3782 operand may be [SDX]Fmode. */
3785 output_fix_trunc (insn, operands)
3789 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3790 int dimode_p = GET_MODE (operands[0]) == DImode;
3793 /* Jump through a hoop or two for DImode, since the hardware has no
3794 non-popping instruction. We used to do this a different way, but
3795 that was somewhat fragile and broke with post-reload splitters. */
3796 if (dimode_p && !stack_top_dies)
3797 output_asm_insn ("fld\t%y1", operands);
3799 if (! STACK_TOP_P (operands[1]))
3802 xops[0] = GEN_INT (12);
3803 xops[1] = adj_offsettable_operand (operands[2], 1);
3804 xops[1] = change_address (xops[1], QImode, NULL_RTX);
3806 xops[2] = operands[0];
3807 if (GET_CODE (operands[0]) != MEM)
3808 xops[2] = operands[3];
3810 output_asm_insn ("fnstcw\t%2", operands);
3811 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3812 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3813 output_asm_insn ("fldcw\t%2", operands);
3814 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
3816 if (stack_top_dies || dimode_p)
3817 output_asm_insn ("fistp%z2\t%2", xops);
3819 output_asm_insn ("fist%z2\t%2", xops);
3821 output_asm_insn ("fldcw\t%2", operands);
3823 if (GET_CODE (operands[0]) != MEM)
3827 split_di (operands+0, 1, xops+0, xops+1);
3828 split_di (operands+3, 1, xops+2, xops+3);
3829 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3830 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
3832 else if (GET_MODE (operands[0]) == SImode)
3833 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
3835 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
3841 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3842 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3843 when fucom should be used. */
3846 output_fp_compare (insn, operands, eflags_p, unordered_p)
3849 int eflags_p, unordered_p;
3852 rtx cmp_op0 = operands[0];
3853 rtx cmp_op1 = operands[1];
3858 cmp_op1 = operands[2];
3861 if (! STACK_TOP_P (cmp_op0))
3864 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3866 if (STACK_REG_P (cmp_op1)
3868 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3869 && REGNO (cmp_op1) != FIRST_STACK_REG)
3871 /* If both the top of the 387 stack dies, and the other operand
3872 is also a stack register that dies, then this must be a
3873 `fcompp' float compare */
3877 /* There is no double popping fcomi variant. Fortunately,
3878 eflags is immune from the fstp's cc clobbering. */
3880 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
3882 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
3890 return "fucompp\n\tfnstsw\t%0";
3892 return "fcompp\n\tfnstsw\t%0";
3905 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
3907 static const char * const alt[24] =
3919 "fcomi\t{%y1, %0|%0, %y1}",
3920 "fcomip\t{%y1, %0|%0, %y1}",
3921 "fucomi\t{%y1, %0|%0, %y1}",
3922 "fucomip\t{%y1, %0|%0, %y1}",
3929 "fcom%z2\t%y2\n\tfnstsw\t%0",
3930 "fcomp%z2\t%y2\n\tfnstsw\t%0",
3931 "fucom%z2\t%y2\n\tfnstsw\t%0",
3932 "fucomp%z2\t%y2\n\tfnstsw\t%0",
3934 "ficom%z2\t%y2\n\tfnstsw\t%0",
3935 "ficomp%z2\t%y2\n\tfnstsw\t%0",
3943 mask = eflags_p << 3;
3944 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
3945 mask |= unordered_p << 1;
3946 mask |= stack_top_dies;
3958 /* Output assembler code to FILE to initialize basic-block profiling.
3960 If profile_block_flag == 2
3962 Output code to call the subroutine `__bb_init_trace_func'
3963 and pass two parameters to it. The first parameter is
3964 the address of a block allocated in the object module.
3965 The second parameter is the number of the first basic block
3968 The name of the block is a local symbol made with this statement:
3970 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
3972 Of course, since you are writing the definition of
3973 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3974 can take a short cut in the definition of this macro and use the
3975 name that you know will result.
3977 The number of the first basic block of the function is
3978 passed to the macro in BLOCK_OR_LABEL.
3980 If described in a virtual assembler language the code to be
3984 parameter2 <- BLOCK_OR_LABEL
3985 call __bb_init_trace_func
3987 else if profile_block_flag != 0
3989 Output code to call the subroutine `__bb_init_func'
3990 and pass one single parameter to it, which is the same
3991 as the first parameter to `__bb_init_trace_func'.
3993 The first word of this parameter is a flag which will be nonzero if
3994 the object module has already been initialized. So test this word
3995 first, and do not call `__bb_init_func' if the flag is nonzero.
3996 Note: When profile_block_flag == 2 the test need not be done
3997 but `__bb_init_trace_func' *must* be called.
3999 BLOCK_OR_LABEL may be used to generate a label number as a
4000 branch destination in case `__bb_init_func' will not be called.
4002 If described in a virtual assembler language the code to be
4013 ix86_output_function_block_profiler (file, block_or_label)
4017 static int num_func = 0;
4019 char block_table[80], false_label[80];
4021 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4023 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4024 xops[5] = stack_pointer_rtx;
4025 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4027 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4029 switch (profile_block_flag)
4032 xops[2] = GEN_INT (block_or_label);
4033 xops[3] = gen_rtx_MEM (Pmode,
4034 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4035 xops[6] = GEN_INT (8);
4037 output_asm_insn ("push{l}\t%2", xops);
4039 output_asm_insn ("push{l}\t%1", xops);
4042 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4043 output_asm_insn ("push{l}\t%7", xops);
4045 output_asm_insn ("call\t%P3", xops);
4046 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4050 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
4052 xops[0] = const0_rtx;
4053 xops[2] = gen_rtx_MEM (Pmode,
4054 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4055 xops[3] = gen_rtx_MEM (Pmode,
4056 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4057 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4058 xops[6] = GEN_INT (4);
4060 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
4062 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4063 output_asm_insn ("jne\t%2", xops);
4066 output_asm_insn ("push{l}\t%1", xops);
4069 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4070 output_asm_insn ("push{l}\t%7", xops);
4072 output_asm_insn ("call\t%P3", xops);
4073 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4074 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4080 /* Output assembler code to FILE to increment a counter associated
4081 with basic block number BLOCKNO.
4083 If profile_block_flag == 2
4085 Output code to initialize the global structure `__bb' and
4086 call the function `__bb_trace_func' which will increment the
4089 `__bb' consists of two words. In the first word the number
4090 of the basic block has to be stored. In the second word
4091 the address of a block allocated in the object module
4094 The basic block number is given by BLOCKNO.
4096 The address of the block is given by the label created with
4098 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4100 by FUNCTION_BLOCK_PROFILER.
4102 Of course, since you are writing the definition of
4103 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4104 can take a short cut in the definition of this macro and use the
4105 name that you know will result.
4107 If described in a virtual assembler language the code to be
4110 move BLOCKNO -> (__bb)
4111 move LPBX0 -> (__bb+4)
4112 call __bb_trace_func
4114 Note that function `__bb_trace_func' must not change the
4115 machine state, especially the flag register. To grant
4116 this, you must output code to save and restore registers
4117 either in this macro or in the macros MACHINE_STATE_SAVE
4118 and MACHINE_STATE_RESTORE. The last two macros will be
4119 used in the function `__bb_trace_func', so you must make
4120 sure that the function prologue does not change any
4121 register prior to saving it with MACHINE_STATE_SAVE.
4123 else if profile_block_flag != 0
4125 Output code to increment the counter directly.
4126 Basic blocks are numbered separately from zero within each
4127 compiled object module. The count associated with block number
4128 BLOCKNO is at index BLOCKNO in an array of words; the name of
4129 this array is a local symbol made with this statement:
4131 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4133 Of course, since you are writing the definition of
4134 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4135 can take a short cut in the definition of this macro and use the
4136 name that you know will result.
4138 If described in a virtual assembler language the code to be
4141 inc (LPBX2+4*BLOCKNO)
4145 ix86_output_block_profiler (file, blockno)
4146 FILE *file ATTRIBUTE_UNUSED;
4149 rtx xops[8], cnt_rtx;
4151 char *block_table = counts;
4153 switch (profile_block_flag)
4156 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4158 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4159 xops[2] = GEN_INT (blockno);
4160 xops[3] = gen_rtx_MEM (Pmode,
4161 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4162 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4163 xops[5] = plus_constant (xops[4], 4);
4164 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4165 xops[6] = gen_rtx_MEM (SImode, xops[5]);
4167 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4169 output_asm_insn ("pushf", xops);
4170 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4173 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4174 output_asm_insn ("push{l}\t%7", xops);
4175 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4176 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4177 output_asm_insn ("pop{l}\t%7", xops);
4180 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4181 output_asm_insn ("call\t%P3", xops);
4182 output_asm_insn ("popf", xops);
4187 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4188 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4189 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
4192 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
4195 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
4197 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4198 output_asm_insn ("inc{l}\t%0", xops);
4205 ix86_expand_move (mode, operands)
4206 enum machine_mode mode;
4209 int strict = (reload_in_progress || reload_completed);
4212 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
4214 /* Emit insns to move operands[1] into operands[0]. */
4216 if (GET_CODE (operands[0]) == MEM)
4217 operands[1] = force_reg (Pmode, operands[1]);
4220 rtx temp = operands[0];
4221 if (GET_CODE (temp) != REG)
4222 temp = gen_reg_rtx (Pmode);
4223 temp = legitimize_pic_address (operands[1], temp);
4224 if (temp == operands[0])
4231 if (GET_CODE (operands[0]) == MEM
4232 && (GET_MODE (operands[0]) == QImode
4233 || !push_operand (operands[0], mode))
4234 && GET_CODE (operands[1]) == MEM)
4235 operands[1] = force_reg (mode, operands[1]);
4237 if (push_operand (operands[0], mode)
4238 && ! general_no_elim_operand (operands[1], mode))
4239 operands[1] = copy_to_mode_reg (mode, operands[1]);
4241 if (FLOAT_MODE_P (mode))
4243 /* If we are loading a floating point constant to a register,
4244 force the value to memory now, since we'll get better code
4245 out the back end. */
4249 else if (GET_CODE (operands[1]) == CONST_DOUBLE
4250 && register_operand (operands[0], mode))
4251 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
4255 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
4260 /* Attempt to expand a binary operator. Make the expansion closer to the
4261 actual machine, then just general_operand, which will allow 3 separate
4262 memory references (one output, two input) in a single insn. */
4265 ix86_expand_binary_operator (code, mode, operands)
4267 enum machine_mode mode;
4270 int matching_memory;
4271 rtx src1, src2, dst, op, clob;
4277 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4278 if (GET_RTX_CLASS (code) == 'c'
4279 && (rtx_equal_p (dst, src2)
4280 || immediate_operand (src1, mode)))
4287 /* If the destination is memory, and we do not have matching source
4288 operands, do things in registers. */
4289 matching_memory = 0;
4290 if (GET_CODE (dst) == MEM)
4292 if (rtx_equal_p (dst, src1))
4293 matching_memory = 1;
4294 else if (GET_RTX_CLASS (code) == 'c'
4295 && rtx_equal_p (dst, src2))
4296 matching_memory = 2;
4298 dst = gen_reg_rtx (mode);
4301 /* Both source operands cannot be in memory. */
4302 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4304 if (matching_memory != 2)
4305 src2 = force_reg (mode, src2);
4307 src1 = force_reg (mode, src1);
4310 /* If the operation is not commutable, source 1 cannot be a constant
4311 or non-matching memory. */
4312 if ((CONSTANT_P (src1)
4313 || (!matching_memory && GET_CODE (src1) == MEM))
4314 && GET_RTX_CLASS (code) != 'c')
4315 src1 = force_reg (mode, src1);
4317 /* If optimizing, copy to regs to improve CSE */
4318 if (optimize && ! no_new_pseudos)
4320 if (GET_CODE (dst) == MEM)
4321 dst = gen_reg_rtx (mode);
4322 if (GET_CODE (src1) == MEM)
4323 src1 = force_reg (mode, src1);
4324 if (GET_CODE (src2) == MEM)
4325 src2 = force_reg (mode, src2);
4328 /* Emit the instruction. */
4330 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4331 if (reload_in_progress)
4333 /* Reload doesn't know about the flags register, and doesn't know that
4334 it doesn't want to clobber it. We can only do this with PLUS. */
4341 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4342 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4345 /* Fix up the destination if needed. */
4346 if (dst != operands[0])
4347 emit_move_insn (operands[0], dst);
4350 /* Return TRUE or FALSE depending on whether the binary operator meets the
4351 appropriate constraints. */
4354 ix86_binary_operator_ok (code, mode, operands)
4356 enum machine_mode mode ATTRIBUTE_UNUSED;
4359 /* Both source operands cannot be in memory. */
4360 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4362 /* If the operation is not commutable, source 1 cannot be a constant. */
4363 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4365 /* If the destination is memory, we must have a matching source operand. */
4366 if (GET_CODE (operands[0]) == MEM
4367 && ! (rtx_equal_p (operands[0], operands[1])
4368 || (GET_RTX_CLASS (code) == 'c'
4369 && rtx_equal_p (operands[0], operands[2]))))
4371 /* If the operation is not commutable and the source 1 is memory, we must
4372 have a matching destionation. */
4373 if (GET_CODE (operands[1]) == MEM
4374 && GET_RTX_CLASS (code) != 'c'
4375 && ! rtx_equal_p (operands[0], operands[1]))
4380 /* Attempt to expand a unary operator. Make the expansion closer to the
4381 actual machine, then just general_operand, which will allow 2 separate
4382 memory references (one output, one input) in a single insn. */
4385 ix86_expand_unary_operator (code, mode, operands)
4387 enum machine_mode mode;
4390 int matching_memory;
4391 rtx src, dst, op, clob;
4396 /* If the destination is memory, and we do not have matching source
4397 operands, do things in registers. */
4398 matching_memory = 0;
4399 if (GET_CODE (dst) == MEM)
4401 if (rtx_equal_p (dst, src))
4402 matching_memory = 1;
4404 dst = gen_reg_rtx (mode);
4407 /* When source operand is memory, destination must match. */
4408 if (!matching_memory && GET_CODE (src) == MEM)
4409 src = force_reg (mode, src);
4411 /* If optimizing, copy to regs to improve CSE */
4412 if (optimize && ! no_new_pseudos)
4414 if (GET_CODE (dst) == MEM)
4415 dst = gen_reg_rtx (mode);
4416 if (GET_CODE (src) == MEM)
4417 src = force_reg (mode, src);
4420 /* Emit the instruction. */
4422 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4423 if (reload_in_progress || code == NOT)
4425 /* Reload doesn't know about the flags register, and doesn't know that
4426 it doesn't want to clobber it. */
4433 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4434 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4437 /* Fix up the destination if needed. */
4438 if (dst != operands[0])
4439 emit_move_insn (operands[0], dst);
4442 /* Return TRUE or FALSE depending on whether the unary operator meets the
4443 appropriate constraints. */
4446 ix86_unary_operator_ok (code, mode, operands)
4447 enum rtx_code code ATTRIBUTE_UNUSED;
4448 enum machine_mode mode ATTRIBUTE_UNUSED;
4449 rtx operands[2] ATTRIBUTE_UNUSED;
4451 /* If one of operands is memory, source and destination must match. */
4452 if ((GET_CODE (operands[0]) == MEM
4453 || GET_CODE (operands[1]) == MEM)
4454 && ! rtx_equal_p (operands[0], operands[1]))
4459 /* Return TRUE or FALSE depending on whether the first SET in INSN
4460 has source and destination with matching CC modes, and that the
4461 CC mode is at least as constrained as REQ_MODE. */
4464 ix86_match_ccmode (insn, req_mode)
4466 enum machine_mode req_mode;
4469 enum machine_mode set_mode;
4471 set = PATTERN (insn);
4472 if (GET_CODE (set) == PARALLEL)
4473 set = XVECEXP (set, 0, 0);
4474 if (GET_CODE (set) != SET)
4477 set_mode = GET_MODE (SET_DEST (set));
4481 if (req_mode == CCNOmode)
4485 if (req_mode == CCZmode)
4495 return (GET_MODE (SET_SRC (set)) == set_mode);
4498 /* Produce an unsigned comparison for a given signed comparison. */
4500 static enum rtx_code
4501 unsigned_comparison (code)
4533 /* Generate insn patterns to do an integer compare of OPERANDS. */
4536 ix86_expand_int_compare (code, op0, op1)
4540 enum machine_mode cmpmode;
4543 cmpmode = SELECT_CC_MODE (code, op0, op1);
4544 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4546 /* This is very simple, but making the interface the same as in the
4547 FP case makes the rest of the code easier. */
4548 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4549 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4551 /* Return the test that should be put into the flags user, i.e.
4552 the bcc, scc, or cmov instruction. */
4553 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4556 /* Figure out whether to use ordered or unordered fp comparisons.
4557 Return the appropriate mode to use. */
4559 static enum machine_mode
4560 ix86_fp_compare_mode (code)
4568 /* When not doing IEEE compliant compares, fault on NaNs. */
4569 unordered = (TARGET_IEEE_FP != 0);
4572 case LT: case LE: case GT: case GE:
4576 case UNORDERED: case ORDERED:
4577 case UNEQ: case UNGE: case UNGT: case UNLE: case UNLT: case LTGT:
4585 /* ??? If we knew whether invalid-operand exceptions were masked,
4586 we could rely on fcom to raise an exception and take care of
4587 NaNs. But we don't. We could know this from c99 math pragmas. */
4591 return unordered ? CCFPUmode : CCFPmode;
4594 /* Return true if we should use an FCOMI instruction for this fp comparison. */
4597 ix86_use_fcomi_compare (code)
4600 return (TARGET_CMOVE
4601 && (code == ORDERED || code == UNORDERED
4602 /* All other unordered compares require checking
4603 multiple sets of bits. */
4604 || ix86_fp_compare_mode (code) == CCFPmode));
4607 /* Swap, force into registers, or otherwise massage the two operands
4608 to a fp comparison. The operands are updated in place; the new
4609 comparsion code is returned. */
4611 static enum rtx_code
4612 ix86_prepare_fp_compare_args (code, pop0, pop1)
4616 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
4617 rtx op0 = *pop0, op1 = *pop1;
4618 enum machine_mode op_mode = GET_MODE (op0);
4620 /* All of the unordered compare instructions only work on registers.
4621 The same is true of the XFmode compare instructions. The same is
4622 true of the fcomi compare instructions. */
4624 if (fpcmp_mode == CCFPUmode
4625 || op_mode == XFmode
4626 || ix86_use_fcomi_compare (code))
4628 op0 = force_reg (op_mode, op0);
4629 op1 = force_reg (op_mode, op1);
4633 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4634 things around if they appear profitable, otherwise force op0
4637 if (standard_80387_constant_p (op0) == 0
4638 || (GET_CODE (op0) == MEM
4639 && ! (standard_80387_constant_p (op1) == 0
4640 || GET_CODE (op1) == MEM)))
4643 tmp = op0, op0 = op1, op1 = tmp;
4644 code = swap_condition (code);
4647 if (GET_CODE (op0) != REG)
4648 op0 = force_reg (op_mode, op0);
4650 if (CONSTANT_P (op1))
4652 if (standard_80387_constant_p (op1))
4653 op1 = force_reg (op_mode, op1);
4655 op1 = validize_mem (force_const_mem (op_mode, op1));
4664 /* Generate insn patterns to do a floating point compare of OPERANDS. */
4667 ix86_expand_fp_compare (code, op0, op1, scratch)
4669 rtx op0, op1, scratch;
4671 enum machine_mode fpcmp_mode, intcmp_mode;
4674 fpcmp_mode = ix86_fp_compare_mode (code);
4675 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
4677 /* %%% fcomi is probably always faster, even when dealing with memory,
4678 since compare-and-branch would be three insns instead of four. */
4679 if (ix86_use_fcomi_compare (code))
4681 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4682 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
4685 /* The FP codes work out to act like unsigned. */
4686 code = unsigned_comparison (code);
4687 intcmp_mode = CCmode;
4691 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
4694 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4695 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
4696 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
4698 if (fpcmp_mode == CCFPmode
4700 || code == UNORDERED)
4702 /* We have two options here -- use sahf, or testing bits of ah
4703 directly. On PPRO, they are equivalent, sahf being one byte
4704 smaller. On Pentium, sahf is non-pairable while test is UV
4707 if (TARGET_USE_SAHF || optimize_size)
4710 emit_insn (gen_x86_sahf_1 (scratch));
4712 /* The FP codes work out to act like unsigned. */
4713 code = unsigned_comparison (code);
4714 intcmp_mode = CCmode;
4719 * The numbers below correspond to the bits of the FPSW in AH.
4720 * C3, C2, and C0 are in bits 0x40, 0x4, and 0x01 respectively.
4742 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
4743 faster in all cases to just fall back on sahf. */
4770 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (mask)));
4771 intcmp_mode = CCNOmode;
4776 /* In the unordered case, we have to check C2 for NaN's, which
4777 doesn't happen to work out to anything nice combination-wise.
4778 So do some bit twiddling on the value we've got in AH to come
4779 up with an appropriate set of condition codes. */
4781 intcmp_mode = CCNOmode;
4785 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
4789 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4790 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
4791 intcmp_mode = CCmode;
4795 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
4799 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4800 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4801 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
4802 intcmp_mode = CCmode;
4806 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4807 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
4808 intcmp_mode = CCmode;
4812 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4813 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x40)));
4818 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4822 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4826 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
4830 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4831 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x01)));
4835 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4836 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4837 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
4841 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
4845 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
4849 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
4859 /* Return the test that should be put into the flags user, i.e.
4860 the bcc, scc, or cmov instruction. */
4861 return gen_rtx_fmt_ee (code, VOIDmode,
4862 gen_rtx_REG (intcmp_mode, FLAGS_REG),
4867 ix86_expand_compare (code)
4871 op0 = ix86_compare_op0;
4872 op1 = ix86_compare_op1;
4874 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4875 ret = ix86_expand_fp_compare (code, op0, op1, gen_reg_rtx (HImode));
4877 ret = ix86_expand_int_compare (code, op0, op1);
4883 ix86_expand_branch (code, label)
4889 switch (GET_MODE (ix86_compare_op0))
4894 tmp = ix86_expand_compare (code);
4895 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4896 gen_rtx_LABEL_REF (VOIDmode, label),
4898 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
4904 /* Don't expand the comparison early, so that we get better code
4905 when jump or whoever decides to reverse the comparison. */
4910 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
4913 tmp = gen_rtx_fmt_ee (code, VOIDmode,
4914 ix86_compare_op0, ix86_compare_op1);
4915 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4916 gen_rtx_LABEL_REF (VOIDmode, label),
4918 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
4920 use_fcomi = ix86_use_fcomi_compare (code);
4921 vec = rtvec_alloc (3 + !use_fcomi);
4922 RTVEC_ELT (vec, 0) = tmp;
4924 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
4926 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
4929 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
4931 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
4936 /* Expand DImode branch into multiple compare+branch. */
4938 rtx lo[2], hi[2], label2;
4939 enum rtx_code code1, code2, code3;
4941 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
4943 tmp = ix86_compare_op0;
4944 ix86_compare_op0 = ix86_compare_op1;
4945 ix86_compare_op1 = tmp;
4946 code = swap_condition (code);
4948 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
4949 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
4951 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
4952 avoid two branches. This costs one extra insn, so disable when
4953 optimizing for size. */
4955 if ((code == EQ || code == NE)
4957 || hi[1] == const0_rtx || lo[1] == const0_rtx))
4962 if (hi[1] != const0_rtx)
4963 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
4964 NULL_RTX, 0, OPTAB_WIDEN);
4967 if (lo[1] != const0_rtx)
4968 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
4969 NULL_RTX, 0, OPTAB_WIDEN);
4971 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
4972 NULL_RTX, 0, OPTAB_WIDEN);
4974 ix86_compare_op0 = tmp;
4975 ix86_compare_op1 = const0_rtx;
4976 ix86_expand_branch (code, label);
4980 /* Otherwise, if we are doing less-than or greater-or-equal-than,
4981 op1 is a constant and the low word is zero, then we can just
4982 examine the high word. */
4984 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
4987 case LT: case LTU: case GE: case GEU:
4988 ix86_compare_op0 = hi[0];
4989 ix86_compare_op1 = hi[1];
4990 ix86_expand_branch (code, label);
4996 /* Otherwise, we need two or three jumps. */
4998 label2 = gen_label_rtx ();
5001 code2 = swap_condition (code);
5002 code3 = unsigned_condition (code);
5006 case LT: case GT: case LTU: case GTU:
5009 case LE: code1 = LT; code2 = GT; break;
5010 case GE: code1 = GT; code2 = LT; break;
5011 case LEU: code1 = LTU; code2 = GTU; break;
5012 case GEU: code1 = GTU; code2 = LTU; break;
5014 case EQ: code1 = NIL; code2 = NE; break;
5015 case NE: code2 = NIL; break;
5023 * if (hi(a) < hi(b)) goto true;
5024 * if (hi(a) > hi(b)) goto false;
5025 * if (lo(a) < lo(b)) goto true;
5029 ix86_compare_op0 = hi[0];
5030 ix86_compare_op1 = hi[1];
5033 ix86_expand_branch (code1, label);
5035 ix86_expand_branch (code2, label2);
5037 ix86_compare_op0 = lo[0];
5038 ix86_compare_op1 = lo[1];
5039 ix86_expand_branch (code3, label);
5042 emit_label (label2);
5052 ix86_expand_setcc (code, dest)
5059 if (GET_MODE (ix86_compare_op0) == DImode)
5060 return 0; /* FAIL */
5062 /* Three modes of generation:
5063 0 -- destination does not overlap compare sources:
5064 clear dest first, emit strict_low_part setcc.
5065 1 -- destination does overlap compare sources:
5066 emit subreg setcc, zero extend.
5067 2 -- destination is in QImode:
5073 if (GET_MODE (dest) == QImode)
5075 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
5076 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
5080 emit_move_insn (dest, const0_rtx);
5082 ret = ix86_expand_compare (code);
5083 PUT_MODE (ret, QImode);
5088 tmp = gen_lowpart (QImode, dest);
5089 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5093 if (!cse_not_expected)
5094 tmp = gen_reg_rtx (QImode);
5096 tmp = gen_lowpart (QImode, dest);
5099 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
5105 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5106 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5107 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5108 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5112 return 1; /* DONE */
5116 ix86_expand_int_movcc (operands)
5119 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5120 rtx compare_seq, compare_op;
5122 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5123 In case comparsion is done with immediate, we can convert it to LTU or
5124 GEU by altering the integer. */
5126 if ((code == LEU || code == GTU)
5127 && GET_CODE (ix86_compare_op1) == CONST_INT
5128 && GET_MODE (operands[0]) != HImode
5129 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
5130 && GET_CODE (operands[2]) == CONST_INT
5131 && GET_CODE (operands[3]) == CONST_INT)
5137 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5141 compare_op = ix86_expand_compare (code);
5142 compare_seq = gen_sequence ();
5145 compare_code = GET_CODE (compare_op);
5147 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5148 HImode insns, we'd be swallowed in word prefix ops. */
5150 if (GET_MODE (operands[0]) != HImode
5151 && GET_CODE (operands[2]) == CONST_INT
5152 && GET_CODE (operands[3]) == CONST_INT)
5154 rtx out = operands[0];
5155 HOST_WIDE_INT ct = INTVAL (operands[2]);
5156 HOST_WIDE_INT cf = INTVAL (operands[3]);
5159 if (compare_code == LTU || compare_code == GEU)
5162 /* Detect overlap between destination and compare sources. */
5165 /* To simplify rest of code, restrict to the GEU case. */
5166 if (compare_code == LTU)
5171 compare_code = reverse_condition (compare_code);
5172 code = reverse_condition (code);
5176 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
5177 || reg_overlap_mentioned_p (out, ix86_compare_op1))
5178 tmp = gen_reg_rtx (SImode);
5180 emit_insn (compare_seq);
5181 emit_insn (gen_x86_movsicc_0_m1 (tmp));
5193 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5204 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
5206 else if (diff == -1 && ct)
5216 emit_insn (gen_one_cmplsi2 (tmp, tmp));
5218 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
5225 * andl cf - ct, dest
5230 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
5232 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5236 emit_move_insn (out, tmp);
5238 return 1; /* DONE */
5245 tmp = ct, ct = cf, cf = tmp;
5247 compare_code = reverse_condition (compare_code);
5248 code = reverse_condition (code);
5250 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
5251 || diff == 3 || diff == 5 || diff == 9)
5257 * lea cf(dest*(ct-cf)),dest
5261 * This also catches the degenerate setcc-only case.
5267 out = emit_store_flag (out, code, ix86_compare_op0,
5268 ix86_compare_op1, VOIDmode, 0, 1);
5275 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
5279 tmp = gen_rtx_PLUS (SImode, tmp, out);
5285 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
5291 emit_move_insn (out, tmp);
5296 clob = gen_rtx_REG (CCmode, FLAGS_REG);
5297 clob = gen_rtx_CLOBBER (VOIDmode, clob);
5299 tmp = gen_rtx_SET (VOIDmode, out, tmp);
5300 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5304 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
5306 if (out != operands[0])
5307 emit_move_insn (operands[0], out);
5309 return 1; /* DONE */
5313 * General case: Jumpful:
5314 * xorl dest,dest cmpl op1, op2
5315 * cmpl op1, op2 movl ct, dest
5317 * decl dest movl cf, dest
5318 * andl (cf-ct),dest 1:
5323 * This is reasonably steep, but branch mispredict costs are
5324 * high on modern cpus, so consider failing only if optimizing
5327 * %%% Parameterize branch_cost on the tuning architecture, then
5328 * use that. The 80386 couldn't care less about mispredicts.
5331 if (!optimize_size && !TARGET_CMOVE)
5337 compare_code = reverse_condition (compare_code);
5338 code = reverse_condition (code);
5341 out = emit_store_flag (out, code, ix86_compare_op0,
5342 ix86_compare_op1, VOIDmode, 0, 1);
5344 emit_insn (gen_addsi3 (out, out, constm1_rtx));
5345 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
5347 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5348 if (out != operands[0])
5349 emit_move_insn (operands[0], out);
5351 return 1; /* DONE */
5357 /* Try a few things more with specific constants and a variable. */
5360 rtx var, orig_out, out, tmp;
5363 return 0; /* FAIL */
5365 /* If one of the two operands is an interesting constant, load a
5366 constant with the above and mask it in with a logical operation. */
5368 if (GET_CODE (operands[2]) == CONST_INT)
5371 if (INTVAL (operands[2]) == 0)
5372 operands[3] = constm1_rtx, op = and_optab;
5373 else if (INTVAL (operands[2]) == -1)
5374 operands[3] = const0_rtx, op = ior_optab;
5376 return 0; /* FAIL */
5378 else if (GET_CODE (operands[3]) == CONST_INT)
5381 if (INTVAL (operands[3]) == 0)
5382 operands[2] = constm1_rtx, op = and_optab;
5383 else if (INTVAL (operands[3]) == -1)
5384 operands[2] = const0_rtx, op = ior_optab;
5386 return 0; /* FAIL */
5389 return 0; /* FAIL */
5391 orig_out = operands[0];
5392 tmp = gen_reg_rtx (GET_MODE (orig_out));
5395 /* Recurse to get the constant loaded. */
5396 if (ix86_expand_int_movcc (operands) == 0)
5397 return 0; /* FAIL */
5399 /* Mask in the interesting variable. */
5400 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
5402 if (out != orig_out)
5403 emit_move_insn (orig_out, out);
5405 return 1; /* DONE */
5409 * For comparison with above,
5419 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
5420 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
5421 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
5422 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
5424 emit_insn (compare_seq);
5425 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5426 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5427 compare_op, operands[2],
5430 return 1; /* DONE */
5434 ix86_expand_fp_movcc (operands)
5438 enum machine_mode mode;
5441 /* The floating point conditional move instructions don't directly
5442 support conditions resulting from a signed integer comparison. */
5444 code = GET_CODE (operands[1]);
5451 tmp = gen_reg_rtx (QImode);
5452 ix86_expand_setcc (code, tmp);
5454 ix86_compare_op0 = tmp;
5455 ix86_compare_op1 = const0_rtx;
5462 mode = SELECT_CC_MODE (code, ix86_compare_op0, ix86_compare_op1);
5463 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, FLAGS_REG),
5464 gen_rtx_COMPARE (mode,
5466 ix86_compare_op1)));
5467 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5468 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5469 gen_rtx_fmt_ee (code, VOIDmode,
5470 gen_rtx_REG (mode, FLAGS_REG),
5478 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5479 works for floating pointer parameters and nonoffsetable memories.
5480 For pushes, it returns just stack offsets; the values will be saved
5481 in the right order. Maximally three parts are generated. */
5484 ix86_split_to_parts (operand, parts, mode)
5487 enum machine_mode mode;
5489 int size = GET_MODE_SIZE (mode) / 4;
5491 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
5493 if (size < 2 || size > 3)
5496 /* Optimize constant pool reference to immediates. This is used by fp moves,
5497 that force all constants to memory to allow combining. */
5499 if (GET_CODE (operand) == MEM
5500 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
5501 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
5502 operand = get_pool_constant (XEXP (operand, 0));
5504 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
5506 /* The only non-offsetable memories we handle are pushes. */
5507 if (! push_operand (operand, VOIDmode))
5510 PUT_MODE (operand, SImode);
5511 parts[0] = parts[1] = parts[2] = operand;
5516 split_di (&operand, 1, &parts[0], &parts[1]);
5519 if (REG_P (operand))
5521 if (!reload_completed)
5523 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
5524 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
5526 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
5528 else if (offsettable_memref_p (operand))
5530 PUT_MODE (operand, SImode);
5532 parts[1] = adj_offsettable_operand (operand, 4);
5534 parts[2] = adj_offsettable_operand (operand, 8);
5536 else if (GET_CODE (operand) == CONST_DOUBLE)
5541 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
5545 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
5546 parts[2] = GEN_INT (l[2]);
5549 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
5554 parts[1] = GEN_INT (l[1]);
5555 parts[0] = GEN_INT (l[0]);
5565 /* Emit insns to perform a move or push of DI, DF, and XF values.
5566 Return false when normal moves are needed; true when all required
5567 insns have been emitted. Operands 2-4 contain the input values
5568 int the correct order; operands 5-7 contain the output values. */
5571 ix86_split_long_move (operands1)
5576 int size = GET_MODE_SIZE (GET_MODE (operands1[0])) / 4;
5580 /* Make our own copy to avoid clobbering the operands. */
5581 operands[0] = copy_rtx (operands1[0]);
5582 operands[1] = copy_rtx (operands1[1]);
5584 if (size < 2 || size > 3)
5587 /* The only non-offsettable memory we handle is push. */
5588 if (push_operand (operands[0], VOIDmode))
5590 else if (GET_CODE (operands[0]) == MEM
5591 && ! offsettable_memref_p (operands[0]))
5594 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
5595 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
5597 /* When emitting push, take care for source operands on the stack. */
5598 if (push && GET_CODE (operands[1]) == MEM
5599 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
5602 part[1][1] = part[1][2];
5603 part[1][0] = part[1][1];
5606 /* We need to do copy in the right order in case an address register
5607 of the source overlaps the destination. */
5608 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
5610 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
5612 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5615 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
5618 /* Collision in the middle part can be handled by reordering. */
5619 if (collisions == 1 && size == 3
5620 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5623 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
5624 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
5627 /* If there are more collisions, we can't handle it by reordering.
5628 Do an lea to the last part and use only one colliding move. */
5629 else if (collisions > 1)
5632 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
5633 XEXP (part[1][0], 0)));
5634 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
5635 part[1][1] = adj_offsettable_operand (part[1][0], 4);
5637 part[1][2] = adj_offsettable_operand (part[1][0], 8);
5644 emit_insn (gen_push (part[1][2]));
5645 emit_insn (gen_push (part[1][1]));
5646 emit_insn (gen_push (part[1][0]));
5650 /* Choose correct order to not overwrite the source before it is copied. */
5651 if ((REG_P (part[0][0])
5652 && REG_P (part[1][1])
5653 && (REGNO (part[0][0]) == REGNO (part[1][1])
5655 && REGNO (part[0][0]) == REGNO (part[1][2]))))
5657 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
5661 operands1[2] = part[0][2];
5662 operands1[3] = part[0][1];
5663 operands1[4] = part[0][0];
5664 operands1[5] = part[1][2];
5665 operands1[6] = part[1][1];
5666 operands1[7] = part[1][0];
5670 operands1[2] = part[0][1];
5671 operands1[3] = part[0][0];
5672 operands1[5] = part[1][1];
5673 operands1[6] = part[1][0];
5680 operands1[2] = part[0][0];
5681 operands1[3] = part[0][1];
5682 operands1[4] = part[0][2];
5683 operands1[5] = part[1][0];
5684 operands1[6] = part[1][1];
5685 operands1[7] = part[1][2];
5689 operands1[2] = part[0][0];
5690 operands1[3] = part[0][1];
5691 operands1[5] = part[1][0];
5692 operands1[6] = part[1][1];
5700 ix86_split_ashldi (operands, scratch)
5701 rtx *operands, scratch;
5703 rtx low[2], high[2];
5706 if (GET_CODE (operands[2]) == CONST_INT)
5708 split_di (operands, 2, low, high);
5709 count = INTVAL (operands[2]) & 63;
5713 emit_move_insn (high[0], low[1]);
5714 emit_move_insn (low[0], const0_rtx);
5717 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
5721 if (!rtx_equal_p (operands[0], operands[1]))
5722 emit_move_insn (operands[0], operands[1]);
5723 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
5724 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
5729 if (!rtx_equal_p (operands[0], operands[1]))
5730 emit_move_insn (operands[0], operands[1]);
5732 split_di (operands, 1, low, high);
5734 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
5735 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
5737 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
5739 if (! no_new_pseudos)
5740 scratch = force_reg (SImode, const0_rtx);
5742 emit_move_insn (scratch, const0_rtx);
5744 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
5748 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
5753 ix86_split_ashrdi (operands, scratch)
5754 rtx *operands, scratch;
5756 rtx low[2], high[2];
5759 if (GET_CODE (operands[2]) == CONST_INT)
5761 split_di (operands, 2, low, high);
5762 count = INTVAL (operands[2]) & 63;
5766 emit_move_insn (low[0], high[1]);
5768 if (! reload_completed)
5769 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
5772 emit_move_insn (high[0], low[0]);
5773 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
5777 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
5781 if (!rtx_equal_p (operands[0], operands[1]))
5782 emit_move_insn (operands[0], operands[1]);
5783 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5784 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
5789 if (!rtx_equal_p (operands[0], operands[1]))
5790 emit_move_insn (operands[0], operands[1]);
5792 split_di (operands, 1, low, high);
5794 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5795 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
5797 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
5799 if (! no_new_pseudos)
5800 scratch = gen_reg_rtx (SImode);
5801 emit_move_insn (scratch, high[0]);
5802 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
5803 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5807 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
5812 ix86_split_lshrdi (operands, scratch)
5813 rtx *operands, scratch;
5815 rtx low[2], high[2];
5818 if (GET_CODE (operands[2]) == CONST_INT)
5820 split_di (operands, 2, low, high);
5821 count = INTVAL (operands[2]) & 63;
5825 emit_move_insn (low[0], high[1]);
5826 emit_move_insn (high[0], const0_rtx);
5829 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
5833 if (!rtx_equal_p (operands[0], operands[1]))
5834 emit_move_insn (operands[0], operands[1]);
5835 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5836 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
5841 if (!rtx_equal_p (operands[0], operands[1]))
5842 emit_move_insn (operands[0], operands[1]);
5844 split_di (operands, 1, low, high);
5846 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5847 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
5849 /* Heh. By reversing the arguments, we can reuse this pattern. */
5850 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
5852 if (! no_new_pseudos)
5853 scratch = force_reg (SImode, const0_rtx);
5855 emit_move_insn (scratch, const0_rtx);
5857 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5861 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
5865 /* Expand the appropriate insns for doing strlen if not just doing
5868 out = result, initialized with the start address
5869 align_rtx = alignment of the address.
5870 scratch = scratch register, initialized with the startaddress when
5871 not aligned, otherwise undefined
5873 This is just the body. It needs the initialisations mentioned above and
5874 some address computing at the end. These things are done in i386.md. */
5877 ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
5878 rtx out, align_rtx, scratch;
5882 rtx align_2_label = NULL_RTX;
5883 rtx align_3_label = NULL_RTX;
5884 rtx align_4_label = gen_label_rtx ();
5885 rtx end_0_label = gen_label_rtx ();
5887 rtx no_flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
5888 rtx z_flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
5889 rtx tmpreg = gen_reg_rtx (SImode);
5892 if (GET_CODE (align_rtx) == CONST_INT)
5893 align = INTVAL (align_rtx);
5895 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
5897 /* Is there a known alignment and is it less than 4? */
5900 /* Is there a known alignment and is it not 2? */
5903 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
5904 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
5906 /* Leave just the 3 lower bits. */
5907 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
5908 NULL_RTX, 0, OPTAB_WIDEN);
5910 emit_insn (gen_cmpsi_ccz_1 (align_rtx, const0_rtx));
5912 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
5913 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5914 gen_rtx_LABEL_REF (VOIDmode,
5917 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5919 emit_insn (gen_cmpsi_ccno_1 (align_rtx, GEN_INT (2)));
5921 tmp = gen_rtx_EQ (VOIDmode, no_flags, const0_rtx);
5922 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5923 gen_rtx_LABEL_REF (VOIDmode,
5926 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5928 tmp = gen_rtx_GTU (VOIDmode, no_flags, const0_rtx);
5929 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5930 gen_rtx_LABEL_REF (VOIDmode,
5933 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5937 /* Since the alignment is 2, we have to check 2 or 0 bytes;
5938 check if is aligned to 4 - byte. */
5940 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
5941 NULL_RTX, 0, OPTAB_WIDEN);
5943 emit_insn (gen_cmpsi_ccz_1 (align_rtx, const0_rtx));
5945 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
5946 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5947 gen_rtx_LABEL_REF (VOIDmode,
5950 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5953 mem = gen_rtx_MEM (QImode, out);
5955 /* Now compare the bytes. */
5957 /* Compare the first n unaligned byte on a byte per byte basis. */
5958 emit_insn (gen_cmpqi_ccz_1 (mem, const0_rtx));
5960 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
5961 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5962 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5964 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5966 /* Increment the address. */
5967 emit_insn (gen_addsi3 (out, out, const1_rtx));
5969 /* Not needed with an alignment of 2 */
5972 emit_label (align_2_label);
5974 emit_insn (gen_cmpqi_ccz_1 (mem, const0_rtx));
5976 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
5977 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5978 gen_rtx_LABEL_REF (VOIDmode,
5981 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5983 emit_insn (gen_addsi3 (out, out, const1_rtx));
5985 emit_label (align_3_label);
5988 emit_insn (gen_cmpqi_ccz_1 (mem, const0_rtx));
5990 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
5991 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5992 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5994 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5996 emit_insn (gen_addsi3 (out, out, const1_rtx));
5999 /* Generate loop to check 4 bytes at a time. It is not a good idea to
6000 align this loop. It gives only huge programs, but does not help to
6002 emit_label (align_4_label);
6004 mem = gen_rtx_MEM (SImode, out);
6005 emit_move_insn (scratch, mem);
6006 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
6008 /* This formula yields a nonzero result iff one of the bytes is zero.
6009 This saves three branches inside loop and many cycles. */
6011 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
6012 emit_insn (gen_one_cmplsi2 (scratch, scratch));
6013 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
6014 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
6015 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 0, align_4_label);
6019 rtx reg = gen_reg_rtx (SImode);
6020 emit_move_insn (reg, tmpreg);
6021 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
6023 /* If zero is not in the first two bytes, move two bytes forward. */
6024 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6025 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6026 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6027 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
6028 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6031 /* Emit lea manually to avoid clobbering of flags. */
6032 emit_insn (gen_rtx_SET (SImode, reg,
6033 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
6035 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6036 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6037 emit_insn (gen_rtx_SET (VOIDmode, out,
6038 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6045 rtx end_2_label = gen_label_rtx ();
6046 /* Is zero in the first two bytes? */
6048 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6049 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6050 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
6051 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6052 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
6054 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
6055 JUMP_LABEL (tmp) = end_2_label;
6057 /* Not in the first two. Move two bytes forward. */
6058 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
6059 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
6061 emit_label (end_2_label);
6065 /* Avoid branch in fixing the byte. */
6066 tmpreg = gen_lowpart (QImode, tmpreg);
6067 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
6068 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
6070 emit_label (end_0_label);
6073 /* Clear stack slot assignments remembered from previous functions.
6074 This is called from INIT_EXPANDERS once before RTL is emitted for each
6078 ix86_init_machine_status (p)
6081 enum machine_mode mode;
6084 = (struct machine_function *) xmalloc (sizeof (struct machine_function));
6086 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6087 mode = (enum machine_mode) ((int) mode + 1))
6088 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6089 ix86_stack_locals[(int) mode][n] = NULL_RTX;
6092 /* Mark machine specific bits of P for GC. */
6094 ix86_mark_machine_status (p)
6097 enum machine_mode mode;
6100 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6101 mode = (enum machine_mode) ((int) mode + 1))
6102 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6103 ggc_mark_rtx (p->machine->stack_locals[(int) mode][n]);
6106 /* Return a MEM corresponding to a stack slot with mode MODE.
6107 Allocate a new slot if necessary.
6109 The RTL for a function can have several slots available: N is
6110 which slot to use. */
6113 assign_386_stack_local (mode, n)
6114 enum machine_mode mode;
6117 if (n < 0 || n >= MAX_386_STACK_LOCALS)
6120 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
6121 ix86_stack_locals[(int) mode][n]
6122 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
6124 return ix86_stack_locals[(int) mode][n];
6127 /* Calculate the length of the memory address in the instruction
6128 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6131 memory_address_length (addr)
6134 struct ix86_address parts;
6135 rtx base, index, disp;
6138 if (GET_CODE (addr) == PRE_DEC
6139 || GET_CODE (addr) == POST_INC)
6142 if (! ix86_decompose_address (addr, &parts))
6146 index = parts.index;
6150 /* Register Indirect. */
6151 if (base && !index && !disp)
6153 /* Special cases: ebp and esp need the two-byte modrm form. */
6154 if (addr == stack_pointer_rtx
6155 || addr == arg_pointer_rtx
6156 || addr == frame_pointer_rtx
6157 || addr == hard_frame_pointer_rtx)
6161 /* Direct Addressing. */
6162 else if (disp && !base && !index)
6167 /* Find the length of the displacement constant. */
6170 if (GET_CODE (disp) == CONST_INT
6171 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
6177 /* An index requires the two-byte modrm form. */
6185 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
6186 expect that insn have 8bit immediate alternative. */
6188 ix86_attr_length_immediate_default (insn, shortform)
6194 extract_insn (insn);
6195 for (i = recog_data.n_operands - 1; i >= 0; --i)
6196 if (CONSTANT_P (recog_data.operand[i]))
6201 && GET_CODE (recog_data.operand[i]) == CONST_INT
6202 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
6206 switch (get_attr_mode (insn))
6218 fatal_insn ("Unknown insn mode", insn);
6224 /* Compute default value for "length_address" attribute. */
6226 ix86_attr_length_address_default (insn)
6230 extract_insn (insn);
6231 for (i = recog_data.n_operands - 1; i >= 0; --i)
6232 if (GET_CODE (recog_data.operand[i]) == MEM)
6234 return memory_address_length (XEXP (recog_data.operand[i], 0));
6240 /* Return the maximum number of instructions a cpu can issue. */
6247 case PROCESSOR_PENTIUM:
6251 case PROCESSOR_PENTIUMPRO:
6259 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6260 by DEP_INSN and nothing set by DEP_INSN. */
6263 ix86_flags_dependant (insn, dep_insn, insn_type)
6265 enum attr_type insn_type;
6269 /* Simplify the test for uninteresting insns. */
6270 if (insn_type != TYPE_SETCC
6271 && insn_type != TYPE_ICMOV
6272 && insn_type != TYPE_FCMOV
6273 && insn_type != TYPE_IBR)
6276 if ((set = single_set (dep_insn)) != 0)
6278 set = SET_DEST (set);
6281 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
6282 && XVECLEN (PATTERN (dep_insn), 0) == 2
6283 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
6284 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
6286 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6287 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6292 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
6295 /* This test is true if the dependant insn reads the flags but
6296 not any other potentially set register. */
6297 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
6300 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
6306 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
6307 address with operands set by DEP_INSN. */
6310 ix86_agi_dependant (insn, dep_insn, insn_type)
6312 enum attr_type insn_type;
6316 if (insn_type == TYPE_LEA)
6318 addr = PATTERN (insn);
6319 if (GET_CODE (addr) == SET)
6321 else if (GET_CODE (addr) == PARALLEL
6322 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
6323 addr = XVECEXP (addr, 0, 0);
6326 addr = SET_SRC (addr);
6331 extract_insn (insn);
6332 for (i = recog_data.n_operands - 1; i >= 0; --i)
6333 if (GET_CODE (recog_data.operand[i]) == MEM)
6335 addr = XEXP (recog_data.operand[i], 0);
6342 return modified_in_p (addr, dep_insn);
6346 ix86_adjust_cost (insn, link, dep_insn, cost)
6347 rtx insn, link, dep_insn;
6350 enum attr_type insn_type, dep_insn_type;
6351 enum attr_memory memory;
6353 int dep_insn_code_number;
6355 /* Anti and output depenancies have zero cost on all CPUs. */
6356 if (REG_NOTE_KIND (link) != 0)
6359 dep_insn_code_number = recog_memoized (dep_insn);
6361 /* If we can't recognize the insns, we can't really do anything. */
6362 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
6365 insn_type = get_attr_type (insn);
6366 dep_insn_type = get_attr_type (dep_insn);
6368 /* Prologue and epilogue allocators can have a false dependency on ebp.
6369 This results in one cycle extra stall on Pentium prologue scheduling,
6370 so handle this important case manually. */
6371 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
6372 && dep_insn_type == TYPE_ALU
6373 && !reg_mentioned_p (stack_pointer_rtx, insn))
6378 case PROCESSOR_PENTIUM:
6379 /* Address Generation Interlock adds a cycle of latency. */
6380 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6383 /* ??? Compares pair with jump/setcc. */
6384 if (ix86_flags_dependant (insn, dep_insn, insn_type))
6387 /* Floating point stores require value to be ready one cycle ealier. */
6388 if (insn_type == TYPE_FMOV
6389 && get_attr_memory (insn) == MEMORY_STORE
6390 && !ix86_agi_dependant (insn, dep_insn, insn_type))
6394 case PROCESSOR_PENTIUMPRO:
6395 /* Since we can't represent delayed latencies of load+operation,
6396 increase the cost here for non-imov insns. */
6397 if (dep_insn_type != TYPE_IMOV
6398 && dep_insn_type != TYPE_FMOV
6399 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6400 || memory == MEMORY_BOTH))
6403 /* INT->FP conversion is expensive. */
6404 if (get_attr_fp_int_src (dep_insn))
6407 /* There is one cycle extra latency between an FP op and a store. */
6408 if (insn_type == TYPE_FMOV
6409 && (set = single_set (dep_insn)) != NULL_RTX
6410 && (set2 = single_set (insn)) != NULL_RTX
6411 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
6412 && GET_CODE (SET_DEST (set2)) == MEM)
6417 /* The esp dependency is resolved before the instruction is really
6419 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
6420 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
6423 /* Since we can't represent delayed latencies of load+operation,
6424 increase the cost here for non-imov insns. */
6425 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6426 || memory == MEMORY_BOTH)
6427 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
6429 /* INT->FP conversion is expensive. */
6430 if (get_attr_fp_int_src (dep_insn))
6434 case PROCESSOR_ATHLON:
6435 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
6436 || memory == MEMORY_BOTH)
6438 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
6453 struct ppro_sched_data
6456 int issued_this_cycle;
6461 ix86_safe_length (insn)
6464 if (recog_memoized (insn) >= 0)
6465 return get_attr_length(insn);
6471 ix86_safe_length_prefix (insn)
6474 if (recog_memoized (insn) >= 0)
6475 return get_attr_length(insn);
6480 static enum attr_memory
6481 ix86_safe_memory (insn)
6484 if (recog_memoized (insn) >= 0)
6485 return get_attr_memory(insn);
6487 return MEMORY_UNKNOWN;
6490 static enum attr_pent_pair
6491 ix86_safe_pent_pair (insn)
6494 if (recog_memoized (insn) >= 0)
6495 return get_attr_pent_pair(insn);
6497 return PENT_PAIR_NP;
6500 static enum attr_ppro_uops
6501 ix86_safe_ppro_uops (insn)
6504 if (recog_memoized (insn) >= 0)
6505 return get_attr_ppro_uops (insn);
6507 return PPRO_UOPS_MANY;
6511 ix86_dump_ppro_packet (dump)
6514 if (ix86_sched_data.ppro.decode[0])
6516 fprintf (dump, "PPRO packet: %d",
6517 INSN_UID (ix86_sched_data.ppro.decode[0]));
6518 if (ix86_sched_data.ppro.decode[1])
6519 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
6520 if (ix86_sched_data.ppro.decode[2])
6521 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
6526 /* We're beginning a new block. Initialize data structures as necessary. */
6529 ix86_sched_init (dump, sched_verbose)
6530 FILE *dump ATTRIBUTE_UNUSED;
6531 int sched_verbose ATTRIBUTE_UNUSED;
6533 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
6536 /* Shift INSN to SLOT, and shift everything else down. */
6539 ix86_reorder_insn (insnp, slot)
6546 insnp[0] = insnp[1];
6547 while (++insnp != slot);
6552 /* Find an instruction with given pairability and minimal amount of cycles
6553 lost by the fact that the CPU waits for both pipelines to finish before
6554 reading next instructions. Also take care that both instructions together
6555 can not exceed 7 bytes. */
6558 ix86_pent_find_pair (e_ready, ready, type, first)
6561 enum attr_pent_pair type;
6564 int mincycles, cycles;
6565 enum attr_pent_pair tmp;
6566 enum attr_memory memory;
6567 rtx *insnp, *bestinsnp = NULL;
6569 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
6572 memory = ix86_safe_memory (first);
6573 cycles = result_ready_cost (first);
6574 mincycles = INT_MAX;
6576 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
6577 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
6578 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6580 enum attr_memory second_memory;
6581 int secondcycles, currentcycles;
6583 second_memory = ix86_safe_memory (*insnp);
6584 secondcycles = result_ready_cost (*insnp);
6585 currentcycles = abs (cycles - secondcycles);
6587 if (secondcycles >= 1 && cycles >= 1)
6589 /* Two read/modify/write instructions together takes two
6591 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
6594 /* Read modify/write instruction followed by read/modify
6595 takes one cycle longer. */
6596 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
6597 && tmp != PENT_PAIR_UV
6598 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
6601 if (currentcycles < mincycles)
6602 bestinsnp = insnp, mincycles = currentcycles;
6608 /* Subroutines of ix86_sched_reorder. */
6611 ix86_sched_reorder_pentium (ready, e_ready)
6615 enum attr_pent_pair pair1, pair2;
6618 /* This wouldn't be necessary if Haifa knew that static insn ordering
6619 is important to which pipe an insn is issued to. So we have to make
6620 some minor rearrangements. */
6622 pair1 = ix86_safe_pent_pair (*e_ready);
6624 /* If the first insn is non-pairable, let it be. */
6625 if (pair1 == PENT_PAIR_NP)
6628 pair2 = PENT_PAIR_NP;
6631 /* If the first insn is UV or PV pairable, search for a PU
6633 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
6635 insnp = ix86_pent_find_pair (e_ready-1, ready,
6636 PENT_PAIR_PU, *e_ready);
6638 pair2 = PENT_PAIR_PU;
6641 /* If the first insn is PU or UV pairable, search for a PV
6643 if (pair2 == PENT_PAIR_NP
6644 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
6646 insnp = ix86_pent_find_pair (e_ready-1, ready,
6647 PENT_PAIR_PV, *e_ready);
6649 pair2 = PENT_PAIR_PV;
6652 /* If the first insn is pairable, search for a UV
6654 if (pair2 == PENT_PAIR_NP)
6656 insnp = ix86_pent_find_pair (e_ready-1, ready,
6657 PENT_PAIR_UV, *e_ready);
6659 pair2 = PENT_PAIR_UV;
6662 if (pair2 == PENT_PAIR_NP)
6665 /* Found something! Decide if we need to swap the order. */
6666 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
6667 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
6668 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
6669 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
6670 ix86_reorder_insn (insnp, e_ready);
6672 ix86_reorder_insn (insnp, e_ready - 1);
6676 ix86_sched_reorder_ppro (ready, e_ready)
6681 enum attr_ppro_uops cur_uops;
6682 int issued_this_cycle;
6686 /* At this point .ppro.decode contains the state of the three
6687 decoders from last "cycle". That is, those insns that were
6688 actually independent. But here we're scheduling for the
6689 decoder, and we may find things that are decodable in the
6692 memcpy (decode, ix86_sched_data.ppro.decode, sizeof(decode));
6693 issued_this_cycle = 0;
6696 cur_uops = ix86_safe_ppro_uops (*insnp);
6698 /* If the decoders are empty, and we've a complex insn at the
6699 head of the priority queue, let it issue without complaint. */
6700 if (decode[0] == NULL)
6702 if (cur_uops == PPRO_UOPS_MANY)
6708 /* Otherwise, search for a 2-4 uop unsn to issue. */
6709 while (cur_uops != PPRO_UOPS_FEW)
6713 cur_uops = ix86_safe_ppro_uops (*--insnp);
6716 /* If so, move it to the head of the line. */
6717 if (cur_uops == PPRO_UOPS_FEW)
6718 ix86_reorder_insn (insnp, e_ready);
6720 /* Issue the head of the queue. */
6721 issued_this_cycle = 1;
6722 decode[0] = *e_ready--;
6725 /* Look for simple insns to fill in the other two slots. */
6726 for (i = 1; i < 3; ++i)
6727 if (decode[i] == NULL)
6729 if (ready >= e_ready)
6733 cur_uops = ix86_safe_ppro_uops (*insnp);
6734 while (cur_uops != PPRO_UOPS_ONE)
6738 cur_uops = ix86_safe_ppro_uops (*--insnp);
6741 /* Found one. Move it to the head of the queue and issue it. */
6742 if (cur_uops == PPRO_UOPS_ONE)
6744 ix86_reorder_insn (insnp, e_ready);
6745 decode[i] = *e_ready--;
6746 issued_this_cycle++;
6750 /* ??? Didn't find one. Ideally, here we would do a lazy split
6751 of 2-uop insns, issue one and queue the other. */
6755 if (issued_this_cycle == 0)
6756 issued_this_cycle = 1;
6757 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
6761 /* We are about to being issuing insns for this clock cycle.
6762 Override the default sort algorithm to better slot instructions. */
6764 ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
6765 FILE *dump ATTRIBUTE_UNUSED;
6766 int sched_verbose ATTRIBUTE_UNUSED;
6769 int clock_var ATTRIBUTE_UNUSED;
6771 rtx *e_ready = ready + n_ready - 1;
6781 case PROCESSOR_PENTIUM:
6782 ix86_sched_reorder_pentium (ready, e_ready);
6785 case PROCESSOR_PENTIUMPRO:
6786 ix86_sched_reorder_ppro (ready, e_ready);
6791 return ix86_issue_rate ();
6794 /* We are about to issue INSN. Return the number of insns left on the
6795 ready queue that can be issued this cycle. */
6798 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
6808 return can_issue_more - 1;
6810 case PROCESSOR_PENTIUMPRO:
6812 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
6814 if (uops == PPRO_UOPS_MANY)
6817 ix86_dump_ppro_packet (dump);
6818 ix86_sched_data.ppro.decode[0] = insn;
6819 ix86_sched_data.ppro.decode[1] = NULL;
6820 ix86_sched_data.ppro.decode[2] = NULL;
6822 ix86_dump_ppro_packet (dump);
6823 ix86_sched_data.ppro.decode[0] = NULL;
6825 else if (uops == PPRO_UOPS_FEW)
6828 ix86_dump_ppro_packet (dump);
6829 ix86_sched_data.ppro.decode[0] = insn;
6830 ix86_sched_data.ppro.decode[1] = NULL;
6831 ix86_sched_data.ppro.decode[2] = NULL;
6835 for (i = 0; i < 3; ++i)
6836 if (ix86_sched_data.ppro.decode[i] == NULL)
6838 ix86_sched_data.ppro.decode[i] = insn;
6846 ix86_dump_ppro_packet (dump);
6847 ix86_sched_data.ppro.decode[0] = NULL;
6848 ix86_sched_data.ppro.decode[1] = NULL;
6849 ix86_sched_data.ppro.decode[2] = NULL;
6853 return --ix86_sched_data.ppro.issued_this_cycle;
6857 /* Compute the alignment given to a constant that is being placed in memory.
6858 EXP is the constant and ALIGN is the alignment that the object would
6860 The value of this function is used instead of that alignment to align
6864 ix86_constant_alignment (exp, align)
6868 if (TREE_CODE (exp) == REAL_CST)
6870 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
6872 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
6875 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
6882 /* Compute the alignment for a static variable.
6883 TYPE is the data type, and ALIGN is the alignment that
6884 the object would ordinarily have. The value of this function is used
6885 instead of that alignment to align the object. */
6888 ix86_data_alignment (type, align)
6892 if (AGGREGATE_TYPE_P (type)
6894 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6895 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
6896 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
6899 if (TREE_CODE (type) == ARRAY_TYPE)
6901 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
6903 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
6906 else if (TREE_CODE (type) == COMPLEX_TYPE)
6909 if (TYPE_MODE (type) == DCmode && align < 64)
6911 if (TYPE_MODE (type) == XCmode && align < 128)
6914 else if ((TREE_CODE (type) == RECORD_TYPE
6915 || TREE_CODE (type) == UNION_TYPE
6916 || TREE_CODE (type) == QUAL_UNION_TYPE)
6917 && TYPE_FIELDS (type))
6919 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
6921 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
6924 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
6925 || TREE_CODE (type) == INTEGER_TYPE)
6927 if (TYPE_MODE (type) == DFmode && align < 64)
6929 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
6936 /* Compute the alignment for a local variable.
6937 TYPE is the data type, and ALIGN is the alignment that
6938 the object would ordinarily have. The value of this macro is used
6939 instead of that alignment to align the object. */
6942 ix86_local_alignment (type, align)
6946 if (TREE_CODE (type) == ARRAY_TYPE)
6948 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
6950 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
6953 else if (TREE_CODE (type) == COMPLEX_TYPE)
6955 if (TYPE_MODE (type) == DCmode && align < 64)
6957 if (TYPE_MODE (type) == XCmode && align < 128)
6960 else if ((TREE_CODE (type) == RECORD_TYPE
6961 || TREE_CODE (type) == UNION_TYPE
6962 || TREE_CODE (type) == QUAL_UNION_TYPE)
6963 && TYPE_FIELDS (type))
6965 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
6967 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
6970 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
6971 || TREE_CODE (type) == INTEGER_TYPE)
6974 if (TYPE_MODE (type) == DFmode && align < 64)
6976 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)